diff options
author | Tom Smeding <tom.smeding@gmail.com> | 2020-03-24 21:23:46 +0100 |
---|---|---|
committer | Tom Smeding <tom.smeding@gmail.com> | 2020-03-24 21:23:46 +0100 |
commit | 18de6875fa22506a0769ab759f5ef3b3d92f27e8 (patch) | |
tree | 7bc8610256ab0e6dde2c1fce76e170e5642942b3 | |
parent | dddf79cd0e1ac1c79d10d9802bf90ee1600e16aa (diff) |
Fix decoding UTF-16 text with BOM
Previously, the first byte pair was skipped because the BOM was, in
effect, stripped off twice.
-rw-r--r-- | src/encoding.rs | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/src/encoding.rs b/src/encoding.rs index 598f993..43105ff 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -27,7 +27,7 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> { let mut res = String::with_capacity(len/2 - 1); - for i in 1 .. len/2 { + for i in 1..len/2 { let val = if bom == 0xfeff { 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16 } else { @@ -43,6 +43,7 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> { Some(res) } +/// 'bytes' should NOT contain the BOM! pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> { let len = bytes.len(); guard!(len % 2 == 0); @@ -51,13 +52,13 @@ pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> { match bom { 0xfeff => { - for i in 1..len/2 { + for i in 0..len/2 { nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1])); } } 0xfffe => { - for i in 1..len/2 { + for i in 0..len/2 { nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i])); } } |