use std::convert::TryFrom; use std::io::{self, Write}; use crate::error::IntoIOError; macro_rules! guard { ($cond:expr) => { if !$cond { return None; } } } pub fn from_latin_1(bytes: &[u8]) -> Option { guard!(bytes.len() >= 1); Some(bytes.iter().map(|&b| char::from(b)).collect()) } pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { let len = bytes.len(); guard!(len % 2 == 0); guard!(len >= 2); let bom = read_big_endian(&bytes[0..2], 8); guard!(bom == 0xfeff || bom == 0xfffe); let mut res = String::with_capacity(len/2 - 1); for i in 1..len/2 { let val = if bom == 0xfeff { 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16 } else { 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16 }; match char::try_from(u32::from(val)) { Ok(c) => res.push(c), Err(_) => return None, } } Some(res) } /// 'bytes' should NOT contain the BOM! pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option { let len = bytes.len(); guard!(len % 2 == 0); let mut nibbles = Vec::with_capacity(len/2); match bom { 0xfeff => { for i in 0..len/2 { nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1])); } } 0xfffe => { for i in 0..len/2 { nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i])); } } _ => { // Invalid BOM return None } } String::from_utf16(&nibbles).ok() } pub fn from_utf16_bom(bytes: &[u8]) -> Option { guard!(bytes.len() >= 2); from_utf16_genericbom(&bytes[2..], read_big_endian(&bytes[0..2], 8) as u16) } pub fn from_utf16_nobom(bytes: &[u8]) -> Option { from_utf16_genericbom(bytes, 0xfeff) } pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result { latin1 .chars() .map(|c| u8::try_from(u32::from(c))) .collect::, _>>() .map_err(|e| e.ioerr()) .and_then(|v| std::str::from_utf8(&v) .map(|s| s.to_string()) .map_err(|e| e.ioerr())) } pub fn read_big_endian(bytes: &[u8], bits: usize) -> usize { if bits > 8 { panic!("Invalid number of bits in encoding::read_big_endian()"); } bytes .iter() .enumerate() .map(|(i, &b)| (b as usize) << (bits * (bytes.len() - 1 - i))) .sum() } pub fn write_big_endian(mut stream: W, mut value: usize, num_bytes: usize, bits: usize) -> io::Result<()> { if num_bytes > 8 { panic!("Invalid number of bytes in encoding::write_big_endian()"); } if bits > 8 || bits == 0 { panic!("Invalid number of bits in encoding::write_big_endian()"); } let mask = (1 << bits) - 1; let mut bytes = [0u8; 8]; for i in (0..num_bytes).rev() { bytes[i] = (value & mask) as u8; value >>= bits; } if value != 0 { panic!("Value doesn't fit in encoding::write_big_endian()"); } stream.write_all(&bytes[0..num_bytes]) }