From 771ff47545cea2b3e9a12aceb305ffcb53aa40b6 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sun, 12 Jan 2020 21:08:12 +0100 Subject: Correctly write tags It's now able to fix fake-utf8 v2.3 tags generated with the id3v2 tool! --- src/encoding.rs | 37 +++++++++++++++++++++++++++-- src/id3v2.rs | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++------- src/main.rs | 12 ++++++---- src/util.rs | 7 ------ 4 files changed, 108 insertions(+), 21 deletions(-) delete mode 100644 src/util.rs diff --git a/src/encoding.rs b/src/encoding.rs index fc0c335..9bc7290 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -1,7 +1,6 @@ use std::convert::TryFrom; -use std::io; +use std::io::{self, Write}; use crate::error::IntoIOError; -use crate::util::read_big_endian; macro_rules! guard { ($cond:expr) => { @@ -54,3 +53,37 @@ pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result { .map(|s| s.to_string()) .map_err(|e| e.ioerr())) } + +pub fn read_big_endian(bytes: &[u8], bits: usize) -> usize { + if bits > 8 { + panic!("Invalid number of bits in encoding::read_big_endian()"); + } + + bytes + .iter() + .enumerate() + .map(|(i, &b)| (b as usize) << (bits * (bytes.len() - 1 - i))) + .sum() +} + +pub fn write_big_endian(mut stream: W, mut value: usize, num_bytes: usize, bits: usize) -> io::Result<()> { + if num_bytes > 8 { + panic!("Invalid number of bytes in encoding::write_big_endian()"); + } + if bits > 8 || bits == 0 { + panic!("Invalid number of bits in encoding::write_big_endian()"); + } + + let mask = (1 << bits) - 1; + let mut bytes = [0u8; 8]; + for i in (0..num_bytes).rev() { + bytes[i] = (value & mask) as u8; + value >>= bits; + } + + if value != 0 { + panic!("Value doesn't fit in encoding::write_big_endian()"); + } + + stream.write_all(&bytes[0..num_bytes]) +} diff --git a/src/id3v2.rs b/src/id3v2.rs index 9e07cd3..10a25a8 100644 --- a/src/id3v2.rs +++ b/src/id3v2.rs @@ -1,12 +1,11 @@ // http://id3.org/id3v2.3.0 use std::convert::TryFrom; -use std::io::{self, Read}; +use std::io::{self, Read, Write}; use std::iter; use std::num::TryFromIntError; -use crate::encoding::{from_latin_1, from_ucs_2_bom}; +use crate::encoding::{from_latin_1, from_ucs_2_bom, read_big_endian, write_big_endian}; use crate::error::IntoIOError; -use crate::util::read_big_endian; fn parse_id3v2_header(bytes: &[u8]) -> Option<(u16, u8, usize)> { if bytes.len() == 10 && @@ -51,6 +50,7 @@ fn encode_string(s: &str) -> io::Result> { #[derive(Debug)] pub struct ID3v2 { header_size: usize, + version_sub: u8, // ID3v2.{} pub frames: Vec, } @@ -117,6 +117,14 @@ impl RawFrame { Ok(Some((RawFrame { id, flags, body }, 10 + size))) } + fn encode(&self, mut stream: W) -> io::Result<()> { + stream.write_all(self.id.as_bytes())?; + write_big_endian(&mut stream, self.body.len(), 4, 8)?; + write_big_endian(&mut stream, self.flags as usize, 2, 8)?; + stream.write_all(&self.body)?; + Ok(()) + } + fn interpret_encoded_string(&self) -> io::Result { match self.body.get(0).ok_or("String field too small".ioerr())? { 0 => { // Latin-1 @@ -125,12 +133,19 @@ impl RawFrame { from_latin_1(&self.body[1..i]).ok_or("Invalid Latin-1 string field".ioerr()) } + // TODOv2.4: in 2.4 this is UTF-16 1 => { // UCS-2 let mut i = self.body.len(); while i > 1 && self.body[i-2] == 0 && self.body[i-1] == 0 { i -= 2; } from_ucs_2_bom(&self.body[1..i]).ok_or("Invalid UCS-2 string field".ioerr()) } + // TODOv2.4: UTF-16BE + // 2 => {} + + // TODOv2.4: UTF-8 + // 3 => {} + enc => { Err(format!("Unknown string encoding {}", enc).ioerr()) } @@ -204,9 +219,18 @@ impl ID3v2 { stream.read_exact(&mut header)?; let (id3version, flags, header_size) = parse_id3v2_header(&header).ok_or("Invalid ID3 header".ioerr())?; - if id3version != 0x0300 { - return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr()) - } + + let version_sub = match id3version { + 0x0300 => 3, + // TODOv2.4: uncomment this + // 0x0400 => { + // eprintln!("WARNING: ID3v2.4 tags only partially supported!"); + // 4 + // } + _ => { + return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr()) + } + }; if flags != 0 { return Err(format!("No ID3 header flags supported ({:x})", flags).ioerr()); @@ -223,6 +247,11 @@ impl ID3v2 { let mut cursor = 0; while cursor < body.len() { + let tag = &body[cursor..cursor+4]; + if tag.len() < 4 { break; } // not even enough bytes anymore + + if tag.iter().all(|&b| b == 0) { break; } // zero tag indicates end of ID3 header + match RawFrame::parse(&body[cursor..]).map_err(|e| e.ioerr())? { Some((frame, consumed)) => { frames.push(frame); @@ -230,11 +259,39 @@ impl ID3v2 { } None => { - break; + return Err(format!("Failed parsing frame in header starting at offset {}", cursor).ioerr()) } } } - Ok(ID3v2 { frames, header_size }) + Ok(ID3v2 { frames, version_sub, header_size }) + } + + pub fn encode(&self) -> io::Result> { + let mut result = Vec::new(); + + result.push(b'I'); result.push(b'D'); result.push(b'3'); // magic tag + result.push(0x03); result.push(0x00); // version + result.push(0); // flags + write_big_endian(&mut result, self.header_size, 4, 7).unwrap(); // header size + + for frame in &self.frames { + frame.encode(&mut result).unwrap(); + } + + // Zero out the rest of the header to ensure it does not get read as more frames + if result.len() < self.header_size { + result.resize(self.header_size, 0u8); + } + + if result.len() > self.header_size { + return Err( + format!("New tag grew larger ({} bytes) than space allocated for original tag ({} bytes), dare not encode", + result.len(), self.header_size) + .ioerr() + ); + } + + Ok(result) } } diff --git a/src/main.rs b/src/main.rs index c2b4c5a..ab070ee 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ -use std::io; -use std::fs::File; +use std::io::{self, Write}; +use std::fs::{File, OpenOptions}; use argparse::{ArgumentParser, Store, StoreTrue, StoreOption}; use crate::error::IntoIOError; use crate::id3v2::{ID3v2, Frame}; @@ -9,7 +9,6 @@ mod encoding; mod error; mod id3v2; mod options; -mod util; fn parse_options_into(opt: &mut Options) { let mut ap = ArgumentParser::new(); @@ -87,7 +86,7 @@ fn modify_tag(tag: &mut ID3v2, new_frame: Frame) -> io::Result<()> { fn main() -> io::Result<()> { let options = parse_options(); - let mut tag = ID3v2::from_stream(&mut File::open(options.file)?)?; + let mut tag = ID3v2::from_stream(&mut File::open(&options.file)?)?; // println!("{:?}", tag); if options.latin1_as_utf8 { @@ -109,6 +108,11 @@ fn main() -> io::Result<()> { print_tag(&tag)?; // TODO: if -w, then write tags to file (if it fits) + if options.write { + let encoded = tag.encode()?; + let mut f = OpenOptions::new().write(true).open(&options.file)?; + f.write_all(&encoded)?; + } Ok(()) } diff --git a/src/util.rs b/src/util.rs deleted file mode 100644 index b17e304..0000000 --- a/src/util.rs +++ /dev/null @@ -1,7 +0,0 @@ -pub fn read_big_endian(bytes: &[u8], bits: usize) -> usize { - bytes - .iter() - .enumerate() - .map(|(i, &b)| (b as usize) << (bits * (bytes.len() - 1 - i))) - .sum() -} -- cgit v1.2.3