From 2d9bca9a1eb75771a308668e2b365e82e5039e5a Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Fri, 17 Jan 2020 10:50:47 +0100 Subject: Partial support for ID3v2.4 --- src/encoding.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'src/encoding.rs') diff --git a/src/encoding.rs b/src/encoding.rs index 9bc7290..598f993 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -34,15 +34,51 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16 }; - match char::try_from(val as u32) { + match char::try_from(u32::from(val)) { Ok(c) => res.push(c), Err(_) => return None, - }; + } } Some(res) } +pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option { + let len = bytes.len(); + guard!(len % 2 == 0); + + let mut nibbles = Vec::with_capacity(len/2); + + match bom { + 0xfeff => { + for i in 1..len/2 { + nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1])); + } + } + + 0xfffe => { + for i in 1..len/2 { + nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i])); + } + } + + _ => { // Invalid BOM + return None + } + } + + String::from_utf16(&nibbles).ok() +} + +pub fn from_utf16_bom(bytes: &[u8]) -> Option { + guard!(bytes.len() >= 2); + from_utf16_genericbom(&bytes[2..], read_big_endian(&bytes[0..2], 8) as u16) +} + +pub fn from_utf16_nobom(bytes: &[u8]) -> Option { + from_utf16_genericbom(bytes, 0xfeff) +} + pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result { latin1 .chars() -- cgit v1.2.3-54-g00ecf