From 2d9bca9a1eb75771a308668e2b365e82e5039e5a Mon Sep 17 00:00:00 2001 From: Tom Smeding Date: Fri, 17 Jan 2020 10:50:47 +0100 Subject: Partial support for ID3v2.4 --- src/encoding.rs | 40 +++++++++++++++++++++++-- src/id3v2.rs | 93 ++++++++++++++++++++++++++++++++++++++------------------- src/main.rs | 6 ++-- 3 files changed, 103 insertions(+), 36 deletions(-) diff --git a/src/encoding.rs b/src/encoding.rs index 9bc7290..598f993 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -34,15 +34,51 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16 }; - match char::try_from(val as u32) { + match char::try_from(u32::from(val)) { Ok(c) => res.push(c), Err(_) => return None, - }; + } } Some(res) } +pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option { + let len = bytes.len(); + guard!(len % 2 == 0); + + let mut nibbles = Vec::with_capacity(len/2); + + match bom { + 0xfeff => { + for i in 1..len/2 { + nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1])); + } + } + + 0xfffe => { + for i in 1..len/2 { + nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i])); + } + } + + _ => { // Invalid BOM + return None + } + } + + String::from_utf16(&nibbles).ok() +} + +pub fn from_utf16_bom(bytes: &[u8]) -> Option { + guard!(bytes.len() >= 2); + from_utf16_genericbom(&bytes[2..], read_big_endian(&bytes[0..2], 8) as u16) +} + +pub fn from_utf16_nobom(bytes: &[u8]) -> Option { + from_utf16_genericbom(bytes, 0xfeff) +} + pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result { latin1 .chars() diff --git a/src/id3v2.rs b/src/id3v2.rs index 10a25a8..19f5e89 100644 --- a/src/id3v2.rs +++ b/src/id3v2.rs @@ -4,7 +4,7 @@ use std::convert::TryFrom; use std::io::{self, Read, Write}; use std::iter; use std::num::TryFromIntError; -use crate::encoding::{from_latin_1, from_ucs_2_bom, read_big_endian, write_big_endian}; +use crate::encoding::{from_latin_1, from_ucs_2_bom, from_utf16_bom, from_utf16_nobom, read_big_endian, write_big_endian}; use crate::error::IntoIOError; fn parse_id3v2_header(bytes: &[u8]) -> Option<(u16, u8, usize)> { @@ -58,6 +58,7 @@ pub struct ID3v2 { pub struct RawFrame { id: String, flags: u16, + tag_version_sub: u8, // ID3v2.{} body: Vec, } @@ -95,7 +96,7 @@ impl Iterator for ArrayIter2 { } impl RawFrame { - fn parse(data: &[u8]) -> Result, String> { + fn parse(data: &[u8], tag_version_sub: u8) -> Result, String> { if data.len() < 10 { return Err(String::from("Frame buffer too short")); } @@ -114,7 +115,7 @@ impl RawFrame { let body = data[10..10+size].to_vec(); - Ok(Some((RawFrame { id, flags, body }, 10 + size))) + Ok(Some((RawFrame { id, flags, tag_version_sub, body }, 10 + size))) } fn encode(&self, mut stream: W) -> io::Result<()> { @@ -126,29 +127,50 @@ impl RawFrame { } fn interpret_encoded_string(&self) -> io::Result { - match self.body.get(0).ok_or("String field too small".ioerr())? { - 0 => { // Latin-1 - let mut i = self.body.len(); - while i > 0 && self.body[i-1] == 0 { i -= 1; } - from_latin_1(&self.body[1..i]).ok_or("Invalid Latin-1 string field".ioerr()) - } + enum Encoding { + Latin1, + UCS2, + UTF16BOM, + UTF16BE, + UTF8, + } + let encoding = match (self.body.get(0).ok_or("String field too small".ioerr())?, self.tag_version_sub) { + (0, _) => Encoding::Latin1, + (1, 3) => Encoding::UCS2, + (1, 4) => Encoding::UTF16BOM, + (2, 3) => return Err("UTF-16BE-encoded strings unsupported in ID3v2.3".ioerr()), + (2, 4) => Encoding::UTF16BE, + (3, 3) => return Err("UTF8-encoded strings unsupported in ID3v2.3".ioerr()), + (3, 4) => Encoding::UTF8, + (enc, _) => return Err(format!("Unknown string encoding {}", enc).ioerr()), + }; - // TODOv2.4: in 2.4 this is UTF-16 - 1 => { // UCS-2 - let mut i = self.body.len(); - while i > 1 && self.body[i-2] == 0 && self.body[i-1] == 0 { i -= 2; } - from_ucs_2_bom(&self.body[1..i]).ok_or("Invalid UCS-2 string field".ioerr()) - } + let contents = &self.body[1..]; // after the encoding byte - // TODOv2.4: UTF-16BE - // 2 => {} + macro_rules! trunc_zeros_1 { + ($v:expr) => {{ + let v = $v; + let mut i = 0; + while i < v.len() && v[i] != 0 { i += 1; } + &v[..i] + }} + } - // TODOv2.4: UTF-8 - // 3 => {} + macro_rules! trunc_zeros_2 { + ($v:expr) => {{ + let v = $v; + let mut i = 0; + while i <= v.len() - 2 && v[i] != 0 && v[i+1] != 0 { i += 2; } + &v[..i] + }} + } - enc => { - Err(format!("Unknown string encoding {}", enc).ioerr()) - } + match encoding { + Encoding::Latin1 => from_latin_1(trunc_zeros_1!(contents)).ok_or("Invalid Latin-1 string field".ioerr()), + Encoding::UCS2 => from_ucs_2_bom(trunc_zeros_2!(contents)).ok_or("Invalid UCS-2 string field".ioerr()), + Encoding::UTF16BOM => from_utf16_bom(trunc_zeros_2!(contents)).ok_or("Invalid UTF-16 string field".ioerr()), + Encoding::UTF16BE => from_utf16_nobom(trunc_zeros_2!(contents)).ok_or("Invalid UTF-16BE string field".ioerr()), + Encoding::UTF8 => String::from_utf8(trunc_zeros_1!(contents).to_vec()).map_err(|e| e.ioerr()), } } @@ -170,6 +192,7 @@ impl RawFrame { Ok(Self { id: id.to_string(), flags: 0, + tag_version_sub: self.tag_version_sub, body: encode_string(&body)?, }) }; @@ -190,8 +213,13 @@ impl RawFrame { } impl Frame { - pub fn to_raw(self) -> io::Result { - let type_t = |typ: &str, body: String| Ok(RawFrame { id: typ.to_string(), flags: 0, body: encode_string(&body)? }); + fn to_raw(self, tag_version_sub: u8) -> io::Result { + let type_t = |typ: &str, body: String| Ok(RawFrame { + id: typ.to_string(), + flags: 0, + tag_version_sub, + body: encode_string(&body)? + }); match self { Self::TIT2(s) => type_t("TIT2", s), @@ -222,11 +250,10 @@ impl ID3v2 { let version_sub = match id3version { 0x0300 => 3, - // TODOv2.4: uncomment this - // 0x0400 => { - // eprintln!("WARNING: ID3v2.4 tags only partially supported!"); - // 4 - // } + 0x0400 => { + eprintln!("WARNING: ID3v2.4 tags only partially supported!"); + 4 + } _ => { return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr()) } @@ -252,7 +279,7 @@ impl ID3v2 { if tag.iter().all(|&b| b == 0) { break; } // zero tag indicates end of ID3 header - match RawFrame::parse(&body[cursor..]).map_err(|e| e.ioerr())? { + match RawFrame::parse(&body[cursor..], version_sub).map_err(|e| e.ioerr())? { Some((frame, consumed)) => { frames.push(frame); cursor += consumed; @@ -271,7 +298,7 @@ impl ID3v2 { let mut result = Vec::new(); result.push(b'I'); result.push(b'D'); result.push(b'3'); // magic tag - result.push(0x03); result.push(0x00); // version + result.push(self.version_sub); result.push(0x00); // version result.push(0); // flags write_big_endian(&mut result, self.header_size, 4, 7).unwrap(); // header size @@ -294,4 +321,8 @@ impl ID3v2 { Ok(result) } + + pub fn to_raw(&self, frame: Frame) -> io::Result { + frame.to_raw(self.version_sub) + } } diff --git a/src/main.rs b/src/main.rs index ab070ee..6c15834 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,7 @@ mod options; fn parse_options_into(opt: &mut Options) { let mut ap = ArgumentParser::new(); - ap.set_description("ID3v2 tag editor/fixer. Incomplete/work-in-progress."); + ap.set_description("ID3v2 tag editor/fixer. Incomplete/work-in-progress. Support for ID3v2.3, with partial support of ID3v2.4 (no footer tags supported)."); ap.refer(&mut opt.write) .add_option(&["-w", "--write"], StoreTrue, @@ -68,11 +68,11 @@ fn modify_tag(tag: &mut ID3v2, new_frame: Frame) -> io::Result<()> { match indices.len() { 0 => { - tag.frames.push(new_frame.to_raw()?); + tag.frames.push(tag.to_raw(new_frame)?); } 1 => { - tag.frames[indices[0]] = new_frame.to_raw()?; + tag.frames[indices[0]] = tag.to_raw(new_frame)?; } _ => { -- cgit v1.2.3