From 8421f2c03d6f905b58b5447a6e0469519c7f8fa6 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sun, 5 Jan 2020 20:44:27 +0100 Subject: Initial --- src/encoding.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 src/encoding.rs (limited to 'src/encoding.rs') diff --git a/src/encoding.rs b/src/encoding.rs new file mode 100644 index 0000000..a3851aa --- /dev/null +++ b/src/encoding.rs @@ -0,0 +1,51 @@ +use std::convert::TryFrom; +use crate::util::read_big_endian; + +macro_rules! guard { + ($cond:expr) => { + if !$cond { + return None; + } + } +} + +pub fn from_latin_1(bytes: &[u8]) -> Option { + guard!(bytes.len() >= 1); + + Some(bytes.iter().map(|&b| char::from(b)).collect()) +} + +pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { + let len = bytes.len(); + + guard!(len % 2 == 0); + guard!(len >= 2); + + let bom = read_big_endian(&bytes[0..2], 8); + guard!(bom == 0xfeff || bom == 0xfffe); + + let mut res = String::with_capacity(len/2 - 1); + + for i in 1 .. len/2 { + let val = if bom == 0xfeff { + 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16 + } else { + 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16 + }; + + match char::try_from(val as u32) { + Ok(c) => res.push(c), + Err(_) => return None, + }; + } + + Some(res) +} + +// pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> Option { +// guard!(latin1.chars().all(|c| (c as usize) < 256)); +// match std::str::from_utf8(latin1.as_bytes()) { +// Ok(res) => Some(res.to_string()), +// Err(_) => None, +// } +// } -- cgit v1.2.3-54-g00ecf