From 18de6875fa22506a0769ab759f5ef3b3d92f27e8 Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Tue, 24 Mar 2020 21:23:46 +0100 Subject: Fix decoding UTF-16 text with BOM Previously, the first byte pair was skipped because the BOM was, in effect, stripped off twice. --- src/encoding.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/encoding.rs') diff --git a/src/encoding.rs b/src/encoding.rs index 598f993..43105ff 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -27,7 +27,7 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { let mut res = String::with_capacity(len/2 - 1); - for i in 1 .. len/2 { + for i in 1..len/2 { let val = if bom == 0xfeff { 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16 } else { @@ -43,6 +43,7 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { Some(res) } +/// 'bytes' should NOT contain the BOM! pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option { let len = bytes.len(); guard!(len % 2 == 0); @@ -51,13 +52,13 @@ pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option { match bom { 0xfeff => { - for i in 1..len/2 { + for i in 0..len/2 { nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1])); } } 0xfffe => { - for i in 1..len/2 { + for i in 0..len/2 { nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i])); } } -- cgit v1.2.3