summaryrefslogtreecommitdiff
path: root/src/encoding.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/encoding.rs')
-rw-r--r--src/encoding.rs51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
new file mode 100644
index 0000000..a3851aa
--- /dev/null
+++ b/src/encoding.rs
@@ -0,0 +1,51 @@
+use std::convert::TryFrom;
+use crate::util::read_big_endian;
+
+macro_rules! guard {
+ ($cond:expr) => {
+ if !$cond {
+ return None;
+ }
+ }
+}
+
+pub fn from_latin_1(bytes: &[u8]) -> Option<String> {
+ guard!(bytes.len() >= 1);
+
+ Some(bytes.iter().map(|&b| char::from(b)).collect())
+}
+
+pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
+ let len = bytes.len();
+
+ guard!(len % 2 == 0);
+ guard!(len >= 2);
+
+ let bom = read_big_endian(&bytes[0..2], 8);
+ guard!(bom == 0xfeff || bom == 0xfffe);
+
+ let mut res = String::with_capacity(len/2 - 1);
+
+ for i in 1 .. len/2 {
+ let val = if bom == 0xfeff {
+ 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16
+ } else {
+ 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16
+ };
+
+ match char::try_from(val as u32) {
+ Ok(c) => res.push(c),
+ Err(_) => return None,
+ };
+ }
+
+ Some(res)
+}
+
+// pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> Option<String> {
+// guard!(latin1.chars().all(|c| (c as usize) < 256));
+// match std::str::from_utf8(latin1.as_bytes()) {
+// Ok(res) => Some(res.to_string()),
+// Err(_) => None,
+// }
+// }