diff options
Diffstat (limited to 'src/encoding.rs')
-rw-r--r-- | src/encoding.rs | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/src/encoding.rs b/src/encoding.rs index 9bc7290..598f993 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -34,15 +34,51 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> { 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16 }; - match char::try_from(val as u32) { + match char::try_from(u32::from(val)) { Ok(c) => res.push(c), Err(_) => return None, - }; + } } Some(res) } +pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> { + let len = bytes.len(); + guard!(len % 2 == 0); + + let mut nibbles = Vec::with_capacity(len/2); + + match bom { + 0xfeff => { + for i in 1..len/2 { + nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1])); + } + } + + 0xfffe => { + for i in 1..len/2 { + nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i])); + } + } + + _ => { // Invalid BOM + return None + } + } + + String::from_utf16(&nibbles).ok() +} + +pub fn from_utf16_bom(bytes: &[u8]) -> Option<String> { + guard!(bytes.len() >= 2); + from_utf16_genericbom(&bytes[2..], read_big_endian(&bytes[0..2], 8) as u16) +} + +pub fn from_utf16_nobom(bytes: &[u8]) -> Option<String> { + from_utf16_genericbom(bytes, 0xfeff) +} + pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result<String> { latin1 .chars() |