summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2020-03-24 21:23:46 +0100
committerTom Smeding <tom.smeding@gmail.com>2020-03-24 21:23:46 +0100
commit18de6875fa22506a0769ab759f5ef3b3d92f27e8 (patch)
tree7bc8610256ab0e6dde2c1fce76e170e5642942b3
parentdddf79cd0e1ac1c79d10d9802bf90ee1600e16aa (diff)
Fix decoding UTF-16 text with BOM
Previously, the first byte pair was skipped because the BOM was, in effect, stripped off twice.
-rw-r--r--src/encoding.rs7
1 files changed, 4 insertions, 3 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
index 598f993..43105ff 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -27,7 +27,7 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
let mut res = String::with_capacity(len/2 - 1);
- for i in 1 .. len/2 {
+ for i in 1..len/2 {
let val = if bom == 0xfeff {
256 * bytes[2*i] as u16 + bytes[2*i+1] as u16
} else {
@@ -43,6 +43,7 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
Some(res)
}
+/// 'bytes' should NOT contain the BOM!
pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> {
let len = bytes.len();
guard!(len % 2 == 0);
@@ -51,13 +52,13 @@ pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> {
match bom {
0xfeff => {
- for i in 1..len/2 {
+ for i in 0..len/2 {
nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1]));
}
}
0xfffe => {
- for i in 1..len/2 {
+ for i in 0..len/2 {
nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i]));
}
}