// http://id3.org/id3v2.3.0 use std::convert::TryFrom; use std::io::{self, Read, Write}; use std::iter; use std::num::TryFromIntError; use crate::encoding::{from_latin_1, from_ucs_2_bom, from_utf16_bom, from_utf16_nobom, read_big_endian, write_big_endian}; use crate::error::IntoIOError; fn parse_id3v2_header(bytes: &[u8]) -> Option<(u16, u8, usize)> { if bytes.len() == 10 && bytes[0] == b'I' && bytes[1] == b'D' && bytes[2] == b'3' && bytes[3] != 0xff && bytes[4] != 0xff && bytes[6..10].iter().all(|b| (b & 0x80) == 0) { Some(( read_big_endian(&bytes[3..5], 8) as u16, bytes[5], read_big_endian(&bytes[6..10], 7) )) } else { None } } /// Returns the number of bytes consumed of the body. fn parse_extended_header(body: &[u8], version_sub: u8) -> io::Result { let extended_size = match version_sub { 3 => 4 + read_big_endian(&body[0..4], 8), 4 => read_big_endian(&body[0..4], 7), _ => panic!("") }; if body.len() < extended_size { return Err("Header too small for extended header size field".ioerr()); } match version_sub { 3 => if extended_size != 10 && extended_size != 14 { // Error message uses the v2.3 size format, which does not include the size // number itself. return Err( format!("Extended header has unrecognised length {} (not 6 or 10)", extended_size) .ioerr() ); } 4 => if extended_size < 6 { return Err(format!("Extended header size too small ({})", extended_size).ioerr()); } _ => panic!("") } match version_sub { 3 => { let flags = read_big_endian(&body[4..6], 8); // First bit is "CRC present", which we do not care about (it's contained in // the extended header, which we otherwise ignore anyhow). if (flags & 0x7fff) != 0 { return Err( format!("Unknown extended header flags set (flags bytes 0x{:04x})", flags) .ioerr() ); } } 4 => { let num_flag_bytes = usize::from(body[4]); if num_flag_bytes != 1 { return Err( format!("Unknown number of extended flag bytes {}", num_flag_bytes) .ioerr() ); } let flags = body[5]; // 0x40: "update tag"; ignored // 0x20: "CRC present"; ignored // 0x10: tag restructions; ignored if (flags & 0x8f) != 0 { return Err( format!("Unknown extended flags (flags byte 0x{:02x})", flags) .ioerr() ); } } _ => panic!("") } // Ignore the rest of the extended header Ok(extended_size) } /// Encodes native string to ID3v2 string encoding (either Latin-1 or UCS-2 according to the /// characters used). fn encode_string(s: &str) -> io::Result> { let as_latin1 = || iter::once(Ok(0)) .chain(s.chars().map(|c| u8::try_from(u32::from(c)))) .collect::, _>>(); let as_ucs2 = || { let nibbles = s.chars() .map(|c| u16::try_from(u32::from(c))) .collect::, _>>()?; Ok(iter::once(1) .chain(ArrayIter2::new([0xfe, 0xff])) // BOM .chain(nibbles.iter().flat_map(|&n| ArrayIter2::new([(n >> 8) as u8, n as u8]))) .collect::>()) }; as_latin1() .or_else(|_| as_ucs2()) .map_err(|e: TryFromIntError| e.ioerr()) } #[derive(Debug)] pub struct ID3v2 { header_size: usize, pub version_sub: u8, // ID3v2.{} pub frames: Vec, } #[derive(Debug)] pub struct RawFrame { id: String, flags: u16, tag_version_sub: u8, // ID3v2.{} body: Vec, } #[derive(Debug)] pub enum Frame { TIT2(String), TYER(String), TPE1(String), TALB(String), TRCK(String), } struct ArrayIter2 { arr: [T; 2], cursor: u8, } impl ArrayIter2 { fn new(arr: [T; 2]) -> Self { Self { arr, cursor: 0 } } } impl Iterator for ArrayIter2 { type Item = T; fn next(&mut self) -> Option { if (self.cursor as usize) < self.arr.len() { let i = self.cursor; self.cursor += 1; Some(self.arr[i as usize].clone()) } else { None } } } impl RawFrame { fn parse(data: &[u8], tag_version_sub: u8) -> Result, String> { if data.len() < 10 { return Err(String::from("Frame buffer too short")); } if data[0..4].iter().all(|&b| b == 0) { return Ok(None) } if !data[0..4].iter().all(|&b| (b'A' <= b && b <= b'Z') || (b'0' <= b && b <= b'9')) { return Err(format!("Invalid frame type {:?}", &data[0..4])); } let id = String::from_utf8(data[0..4].to_vec()).unwrap(); let size = match tag_version_sub { 3 => read_big_endian(&data[4..8], 8), 4 => read_big_endian(&data[4..8], 7), _ => panic!("") }; let flags = read_big_endian(&data[8..10], 8) as u16; if flags != 0 { return Err(format!("Frame flags not supported (flags bytes {:04x})", flags)); } let body = data[10..10+size].to_vec(); Ok(Some((RawFrame { id, flags, tag_version_sub, body }, 10 + size))) } fn encode(&self, mut stream: W) -> io::Result<()> { stream.write_all(self.id.as_bytes())?; write_big_endian(&mut stream, self.body.len(), 4, 8)?; write_big_endian(&mut stream, self.flags as usize, 2, 8)?; stream.write_all(&self.body)?; Ok(()) } fn interpret_encoded_string(&self) -> io::Result { enum Encoding { Latin1, UCS2, UTF16BOM, UTF16BE, UTF8, } let encoding = match (self.body.get(0).ok_or("String field too small".ioerr())?, self.tag_version_sub) { (0, _) => Encoding::Latin1, (1, 3) => Encoding::UCS2, (1, 4) => Encoding::UTF16BOM, (2, 3) => return Err("UTF-16BE-encoded strings unsupported in ID3v2.3".ioerr()), (2, 4) => Encoding::UTF16BE, (3, 3) => return Err("UTF8-encoded strings unsupported in ID3v2.3".ioerr()), (3, 4) => Encoding::UTF8, (enc, _) => return Err(format!("Unknown string encoding {}", enc).ioerr()), }; let contents = &self.body[1..]; // after the encoding byte macro_rules! trunc_zeros_1 { ($v:expr) => {{ let v = $v; let mut i = 0; while i < v.len() && v[i] != 0 { i += 1; } &v[..i] }} } macro_rules! trunc_zeros_2 { ($v:expr) => {{ let v = $v; let mut i = 0; while i <= v.len() - 2 && (v[i] != 0 || v[i+1] != 0) { i += 2; } &v[..i] }} } match encoding { Encoding::Latin1 => from_latin_1(trunc_zeros_1!(contents)).ok_or("Invalid Latin-1 string field".ioerr()), Encoding::UCS2 => from_ucs_2_bom(trunc_zeros_2!(contents)).ok_or("Invalid UCS-2 string field".ioerr()), Encoding::UTF16BOM => from_utf16_bom(trunc_zeros_2!(contents)).ok_or("Invalid UTF-16 string field".ioerr()), Encoding::UTF16BE => from_utf16_nobom(trunc_zeros_2!(contents)).ok_or("Invalid UTF-16BE string field".ioerr()), Encoding::UTF8 => String::from_utf8(trunc_zeros_1!(contents).to_vec()).map_err(|e| e.ioerr()), } } pub fn interpret(&self) -> io::Result> { let type_t = |typ: fn(String) -> Frame| self.interpret_encoded_string().map(typ).map(Some); if self.id == "TIT2" { type_t(Frame::TIT2) } else if self.id == "TYER" { type_t(Frame::TYER) } else if self.id == "TPE1" { type_t(Frame::TPE1) } else if self.id == "TALB" { type_t(Frame::TALB) } else if self.id == "TRCK" { type_t(Frame::TRCK) } else { Ok(None) } } pub fn map_string String>(&self, f: F) -> io::Result> { let type_t = |id: &str, body: String| -> io::Result { Ok(Self { id: id.to_string(), flags: 0, tag_version_sub: self.tag_version_sub, body: encode_string(&body)?, }) }; match self.interpret()? { Some(Frame::TIT2(s)) => Ok(Some(type_t("TIT2", f(s))?)), Some(Frame::TYER(s)) => Ok(Some(type_t("TYER", f(s))?)), Some(Frame::TPE1(s)) => Ok(Some(type_t("TPE1", f(s))?)), Some(Frame::TALB(s)) => Ok(Some(type_t("TALB", f(s))?)), Some(Frame::TRCK(s)) => Ok(Some(type_t("TRCK", f(s))?)), None => Ok(None), } } pub fn get_id(&self) -> &str { &self.id } } impl Frame { fn to_raw(self, tag_version_sub: u8) -> io::Result { let type_t = |typ: &str, body: String| Ok(RawFrame { id: typ.to_string(), flags: 0, tag_version_sub, body: encode_string(&body)? }); match self { Self::TIT2(s) => type_t("TIT2", s), Self::TYER(s) => type_t("TYER", s), Self::TPE1(s) => type_t("TPE1", s), Self::TALB(s) => type_t("TALB", s), Self::TRCK(s) => type_t("TRCK", s), } } pub fn id(&self) -> &str { match self { Frame::TIT2(_) => "TIT2", Frame::TYER(_) => "TYER", Frame::TPE1(_) => "TPE1", Frame::TALB(_) => "TALB", Frame::TRCK(_) => "TRCK", } } } impl ID3v2 { pub fn from_stream(stream: &mut R) -> io::Result { let mut header = [0u8; 10]; stream.read_exact(&mut header)?; let (id3version, flags, header_size) = parse_id3v2_header(&header).ok_or("Invalid ID3v2 header or no ID3v2 tag found".ioerr())?; let version_sub = match id3version { 0x0300 => 3, 0x0400 => { eprintln!("WARNING: ID3v2.4 tags only partially supported!"); 4 } _ => { return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr()) } }; if (flags & 0x80) != 0 { return Err(format!("ID3 unsynchronisation not supported").ioerr()); } let extended_header = (flags & 0x40) != 0; if (flags & 0x20) != 0 { return Err( format!("Refusing to read ID3 tag in \"experimental\" stage, whatever that may mean") .ioerr() ); } // ID3v2.4 only if (flags & 0x10) != 0 { return Err(format!("3DI footer unsupported (ID3v2.4 section 3.4)").ioerr()); } if (flags & 0x0f) != 0 { return Err( format!("Unknown ID3 header flags found (flags byte: 0x{:02x})", flags) .ioerr() ); } let body = { let mut body = Vec::new(); body.resize(header_size, 0u8); stream.read_exact(&mut body)?; body }; let mut frames = Vec::new(); let mut cursor = 0; if extended_header { cursor += parse_extended_header(&body, version_sub)?; } while cursor < body.len() { let tag = &body[cursor..cursor+4]; if tag.len() < 4 { break; } // not even enough bytes anymore if tag.iter().all(|&b| b == 0) { break; } // zero tag indicates end of ID3 header match RawFrame::parse(&body[cursor..], version_sub).map_err(|e| e.ioerr())? { Some((frame, consumed)) => { frames.push(frame); cursor += consumed; } None => { return Err(format!("Failed parsing frame in header starting at offset {}", cursor).ioerr()) } } } Ok(ID3v2 { frames, version_sub, header_size }) } pub fn encode(&self) -> io::Result> { let mut result = Vec::new(); result.push(b'I'); result.push(b'D'); result.push(b'3'); // magic tag result.push(self.version_sub); result.push(0x00); // version result.push(0); // flags write_big_endian(&mut result, self.header_size, 4, 7).unwrap(); // header size for frame in &self.frames { frame.encode(&mut result).unwrap(); } // Zero out the rest of the header to ensure it does not get read as more frames if result.len() < self.header_size { result.resize(self.header_size, 0u8); } if result.len() > self.header_size { return Err( format!("New tag grew larger ({} bytes) than space allocated for original tag ({} bytes), dare not encode", result.len(), self.header_size) .ioerr() ); } Ok(result) } pub fn to_raw(&self, frame: Frame) -> io::Result { frame.to_raw(self.version_sub) } }