From 8421f2c03d6f905b58b5447a6e0469519c7f8fa6 Mon Sep 17 00:00:00 2001 From: tomsmeding Date: Sun, 5 Jan 2020 20:44:27 +0100 Subject: Initial --- .gitignore | 2 + Cargo.lock | 16 +++++++ Cargo.toml | 10 ++++ src/encoding.rs | 51 ++++++++++++++++++++ src/error.rs | 23 +++++++++ src/id3v2.rs | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 41 ++++++++++++++++ src/options.rs | 17 +++++++ src/util.rs | 7 +++ 9 files changed, 313 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/encoding.rs create mode 100644 src/error.rs create mode 100644 src/id3v2.rs create mode 100644 src/main.rs create mode 100644 src/options.rs create mode 100644 src/util.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53eaa21 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..5ec3dea --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,16 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "argparse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rid3v2" +version = "0.1.0" +dependencies = [ + "argparse 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[metadata] +"checksum argparse 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "3f8ebf5827e4ac4fd5946560e6a99776ea73b596d80898f357007317a7141e47" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..3974114 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rid3v2" +version = "0.1.0" +authors = ["tomsmeding "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +argparse = "^0.2" diff --git a/src/encoding.rs b/src/encoding.rs new file mode 100644 index 0000000..a3851aa --- /dev/null +++ b/src/encoding.rs @@ -0,0 +1,51 @@ +use std::convert::TryFrom; +use crate::util::read_big_endian; + +macro_rules! guard { + ($cond:expr) => { + if !$cond { + return None; + } + } +} + +pub fn from_latin_1(bytes: &[u8]) -> Option { + guard!(bytes.len() >= 1); + + Some(bytes.iter().map(|&b| char::from(b)).collect()) +} + +pub fn from_ucs_2_bom(bytes: &[u8]) -> Option { + let len = bytes.len(); + + guard!(len % 2 == 0); + guard!(len >= 2); + + let bom = read_big_endian(&bytes[0..2], 8); + guard!(bom == 0xfeff || bom == 0xfffe); + + let mut res = String::with_capacity(len/2 - 1); + + for i in 1 .. len/2 { + let val = if bom == 0xfeff { + 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16 + } else { + 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16 + }; + + match char::try_from(val as u32) { + Ok(c) => res.push(c), + Err(_) => return None, + }; + } + + Some(res) +} + +// pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> Option { +// guard!(latin1.chars().all(|c| (c as usize) < 256)); +// match std::str::from_utf8(latin1.as_bytes()) { +// Ok(res) => Some(res.to_string()), +// Err(_) => None, +// } +// } diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..6358bfe --- /dev/null +++ b/src/error.rs @@ -0,0 +1,23 @@ +use std::io; + +pub trait IntoIOError { + fn ioerr(self) -> io::Error; +} + +impl IntoIOError for String { + fn ioerr(self) -> io::Error { + io::Error::new(io::ErrorKind::Other, self) + } +} + +impl IntoIOError for &str { + fn ioerr(self) -> io::Error { + io::Error::new(io::ErrorKind::Other, self) + } +} + +impl IntoIOError for std::string::FromUtf8Error { + fn ioerr(self) -> io::Error { + io::Error::new(io::ErrorKind::Other, self) + } +} diff --git a/src/id3v2.rs b/src/id3v2.rs new file mode 100644 index 0000000..c16c573 --- /dev/null +++ b/src/id3v2.rs @@ -0,0 +1,146 @@ +// http://id3.org/id3v2.3.0 + +use std::io::{self, Read}; +use crate::encoding::{from_latin_1, from_ucs_2_bom}; +use crate::error::IntoIOError; +use crate::options::EncodingOptions; +use crate::util::read_big_endian; + +fn parse_id3v2_header(bytes: &[u8]) -> Option<(u16, u8, usize)> { + if bytes.len() == 10 && + bytes[0] == b'I' && + bytes[1] == b'D' && + bytes[2] == b'3' && + bytes[3] != 0xff && + bytes[4] != 0xff && + bytes[6..10].iter().all(|b| (b & 0x80) == 0) { + Some(( + read_big_endian(&bytes[3..5], 8) as u16, + bytes[5], + read_big_endian(&bytes[6..10], 7) + )) + } else { + None + } +} + +#[derive(Debug)] +pub struct ID3v2 { + header_size: usize, + pub frames: Vec, +} + +#[derive(Debug)] +pub struct RawFrame { + id: String, + flags: u16, + body: Vec, +} + +#[derive(Debug)] +pub enum Frame { + TIT2(String), + TYER(String), + TPE1(String), +} + +impl RawFrame { + fn parse(data: &[u8]) -> Result, String> { + if data.len() < 10 { + return Err(String::from("Frame buffer too short")); + } + + if data[0..4].iter().all(|&b| b == 0) { + return Ok(None) + } + + if !data[0..4].iter().all(|&b| (b'A' <= b && b <= b'Z') || (b'0' <= b && b <= b'9')) { + return Err(format!("Invalid frame type {:?}", &data[0..4])); + } + + let id = String::from_utf8(data[0..4].to_vec()).unwrap(); + let size = read_big_endian(&data[4..8], 8); + let flags = read_big_endian(&data[8..10], 8) as u16; + + let body = data[10..10+size].to_vec(); + + Ok(Some((RawFrame { id, flags, body }, 10 + size))) + } + + fn interpret_encoded_string(&self, encopts: &EncodingOptions) -> io::Result { + match self.body.get(0).ok_or("String field too small".ioerr())? { + 0 => { // Latin-1 + let mut i = self.body.len(); + while i > 0 && self.body[i-1] == 0 { i -= 1; } + if encopts.latin1_as_utf8 { + String::from_utf8(self.body[1..i].to_vec()).map_err(|e| e.ioerr()) + } else { + from_latin_1(&self.body[1..i]).ok_or("Invalid Latin-1 string field".ioerr()) + } + } + + 1 => { // UCS-2 + let mut i = self.body.len(); + while i > 1 && self.body[i-2] == 0 && self.body[i-1] == 0 { i -= 2; } + from_ucs_2_bom(&self.body[1..i]).ok_or("Invalid UCS-2 string field".ioerr()) + } + + enc => { + Err(format!("Unknown string encoding {}", enc).ioerr()) + } + } + } + + pub fn interpret(&self, encopts: &EncodingOptions) -> io::Result> { + if self.id == "TIT2" { + self.interpret_encoded_string(encopts).map(Frame::TIT2).map(Some) + } else if self.id == "TYER" { + self.interpret_encoded_string(encopts).map(Frame::TYER).map(Some) + } else if self.id == "TPE1" { + self.interpret_encoded_string(encopts).map(Frame::TPE1).map(Some) + } else { + Ok(None) + } + } +} + +impl ID3v2 { + pub fn from_stream(stream: &mut R) -> io::Result { + let mut header = [0u8; 10]; + stream.read_exact(&mut header)?; + + let (id3version, flags, header_size) = parse_id3v2_header(&header).ok_or("Invalid ID3 header".ioerr())?; + if id3version != 0x0300 { + return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr()) + } + + if flags != 0 { + return Err(format!("No ID3 header flags supported ({:x})", flags).ioerr()); + } + + let body = { + let mut body = Vec::new(); + body.resize(header_size, 0u8); + stream.read_exact(&mut body)?; + body + }; + + let mut frames = Vec::new(); + let mut cursor = 0; + + while cursor < body.len() { + match RawFrame::parse(&body[cursor..]).map_err(|e| e.ioerr())? { + Some((frame, consumed)) => { + frames.push(frame); + cursor += consumed; + } + + None => { + break; + } + } + } + + Ok(ID3v2 { frames, header_size }) + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..6760ee7 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,41 @@ +use std::io; +use std::fs::File; +use argparse::{ArgumentParser, StoreTrue, Store}; +use crate::options::{EncodingOptions, Options}; + +mod encoding; +mod error; +mod id3v2; +mod options; +mod util; + +fn main() -> io::Result<()> { + let options = { + let mut options: Options = Default::default(); + + { + let mut ap = ArgumentParser::new(); + ap.set_description("ID3v2 tag editor/fixer. Incomplete/work-in-progress."); + ap.refer(&mut options.latin1_as_utf8) + .add_option(&["--assume-utf8"], StoreTrue, + "Assume that all strings specified as Latin-1 are really UTF-8."); + ap.refer(&mut options.file) + .required() + .add_argument("file", Store, + "File to operate on (probably a .mp3)"); + ap.parse_args_or_exit(); + } + + options + }; + + let mut f = File::open(options.file)?; + let tag = id3v2::ID3v2::from_stream(&mut f)?; + println!("{:?}", tag); + + for frame in tag.frames { + println!("{:?}", frame.interpret(&EncodingOptions { latin1_as_utf8: options.latin1_as_utf8 })); + } + + Ok(()) +} diff --git a/src/options.rs b/src/options.rs new file mode 100644 index 0000000..3b36e52 --- /dev/null +++ b/src/options.rs @@ -0,0 +1,17 @@ +pub struct Options { + pub latin1_as_utf8: bool, + pub file: String, +} + +impl Default for Options { + fn default() -> Self { + Options { + latin1_as_utf8: false, + file: String::new(), + } + } +} + +pub struct EncodingOptions { + pub latin1_as_utf8: bool, +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..b17e304 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,7 @@ +pub fn read_big_endian(bytes: &[u8], bits: usize) -> usize { + bytes + .iter() + .enumerate() + .map(|(i, &b)| (b as usize) << (bits * (bytes.len() - 1 - i))) + .sum() +} -- cgit v1.2.3