summaryrefslogtreecommitdiff
path: root/src/encoding.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/encoding.rs')
-rw-r--r--src/encoding.rs40
1 files changed, 38 insertions, 2 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
index 9bc7290..598f993 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -34,15 +34,51 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
256 * bytes[2*i+1] as u16 + bytes[2*i] as u16
};
- match char::try_from(val as u32) {
+ match char::try_from(u32::from(val)) {
Ok(c) => res.push(c),
Err(_) => return None,
- };
+ }
}
Some(res)
}
+pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> {
+ let len = bytes.len();
+ guard!(len % 2 == 0);
+
+ let mut nibbles = Vec::with_capacity(len/2);
+
+ match bom {
+ 0xfeff => {
+ for i in 1..len/2 {
+ nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1]));
+ }
+ }
+
+ 0xfffe => {
+ for i in 1..len/2 {
+ nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i]));
+ }
+ }
+
+ _ => { // Invalid BOM
+ return None
+ }
+ }
+
+ String::from_utf16(&nibbles).ok()
+}
+
+pub fn from_utf16_bom(bytes: &[u8]) -> Option<String> {
+ guard!(bytes.len() >= 2);
+ from_utf16_genericbom(&bytes[2..], read_big_endian(&bytes[0..2], 8) as u16)
+}
+
+pub fn from_utf16_nobom(bytes: &[u8]) -> Option<String> {
+ from_utf16_genericbom(bytes, 0xfeff)
+}
+
pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result<String> {
latin1
.chars()