summaryrefslogtreecommitdiff
path: root/src/encoding.rs
blob: a3851aa9a14c90cfa227429d170b55f7c229c6c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
use std::convert::TryFrom;
use crate::util::read_big_endian;

macro_rules! guard {
    ($cond:expr) => {
        if !$cond {
            return None;
        }
    }
}

pub fn from_latin_1(bytes: &[u8]) -> Option<String> {
    guard!(bytes.len() >= 1);

    Some(bytes.iter().map(|&b| char::from(b)).collect())
}

pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
    let len = bytes.len();

    guard!(len % 2 == 0);
    guard!(len >= 2);

    let bom = read_big_endian(&bytes[0..2], 8);
    guard!(bom == 0xfeff || bom == 0xfffe);

    let mut res = String::with_capacity(len/2 - 1);

    for i in 1 .. len/2 {
        let val = if bom == 0xfeff {
            256 * bytes[2*i] as u16 + bytes[2*i+1] as u16
        } else {
            256 * bytes[2*i+1] as u16 + bytes[2*i] as u16
        };

        match char::try_from(val as u32) {
            Ok(c) => res.push(c),
            Err(_) => return None,
        };
    }

    Some(res)
}

// pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> Option<String> {
//     guard!(latin1.chars().all(|c| (c as usize) < 256));
//     match std::str::from_utf8(latin1.as_bytes()) {
//         Ok(res) => Some(res.to_string()),
//         Err(_) => None,
//     }
// }