summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authortomsmeding <tom.smeding@gmail.com>2020-01-05 20:44:27 +0100
committertomsmeding <tom.smeding@gmail.com>2020-01-05 20:44:27 +0100
commit8421f2c03d6f905b58b5447a6e0469519c7f8fa6 (patch)
tree6258db477e638c43a6c51cb0549c070fb0f8f9b2 /src
Initial
Diffstat (limited to 'src')
-rw-r--r--src/encoding.rs51
-rw-r--r--src/error.rs23
-rw-r--r--src/id3v2.rs146
-rw-r--r--src/main.rs41
-rw-r--r--src/options.rs17
-rw-r--r--src/util.rs7
6 files changed, 285 insertions, 0 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
new file mode 100644
index 0000000..a3851aa
--- /dev/null
+++ b/src/encoding.rs
@@ -0,0 +1,51 @@
+use std::convert::TryFrom;
+use crate::util::read_big_endian;
+
+macro_rules! guard {
+ ($cond:expr) => {
+ if !$cond {
+ return None;
+ }
+ }
+}
+
+pub fn from_latin_1(bytes: &[u8]) -> Option<String> {
+ guard!(bytes.len() >= 1);
+
+ Some(bytes.iter().map(|&b| char::from(b)).collect())
+}
+
+pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
+ let len = bytes.len();
+
+ guard!(len % 2 == 0);
+ guard!(len >= 2);
+
+ let bom = read_big_endian(&bytes[0..2], 8);
+ guard!(bom == 0xfeff || bom == 0xfffe);
+
+ let mut res = String::with_capacity(len/2 - 1);
+
+ for i in 1 .. len/2 {
+ let val = if bom == 0xfeff {
+ 256 * bytes[2*i] as u16 + bytes[2*i+1] as u16
+ } else {
+ 256 * bytes[2*i+1] as u16 + bytes[2*i] as u16
+ };
+
+ match char::try_from(val as u32) {
+ Ok(c) => res.push(c),
+ Err(_) => return None,
+ };
+ }
+
+ Some(res)
+}
+
+// pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> Option<String> {
+// guard!(latin1.chars().all(|c| (c as usize) < 256));
+// match std::str::from_utf8(latin1.as_bytes()) {
+// Ok(res) => Some(res.to_string()),
+// Err(_) => None,
+// }
+// }
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..6358bfe
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,23 @@
+use std::io;
+
+pub trait IntoIOError {
+ fn ioerr(self) -> io::Error;
+}
+
+impl IntoIOError for String {
+ fn ioerr(self) -> io::Error {
+ io::Error::new(io::ErrorKind::Other, self)
+ }
+}
+
+impl IntoIOError for &str {
+ fn ioerr(self) -> io::Error {
+ io::Error::new(io::ErrorKind::Other, self)
+ }
+}
+
+impl IntoIOError for std::string::FromUtf8Error {
+ fn ioerr(self) -> io::Error {
+ io::Error::new(io::ErrorKind::Other, self)
+ }
+}
diff --git a/src/id3v2.rs b/src/id3v2.rs
new file mode 100644
index 0000000..c16c573
--- /dev/null
+++ b/src/id3v2.rs
@@ -0,0 +1,146 @@
+// http://id3.org/id3v2.3.0
+
+use std::io::{self, Read};
+use crate::encoding::{from_latin_1, from_ucs_2_bom};
+use crate::error::IntoIOError;
+use crate::options::EncodingOptions;
+use crate::util::read_big_endian;
+
+fn parse_id3v2_header(bytes: &[u8]) -> Option<(u16, u8, usize)> {
+ if bytes.len() == 10 &&
+ bytes[0] == b'I' &&
+ bytes[1] == b'D' &&
+ bytes[2] == b'3' &&
+ bytes[3] != 0xff &&
+ bytes[4] != 0xff &&
+ bytes[6..10].iter().all(|b| (b & 0x80) == 0) {
+ Some((
+ read_big_endian(&bytes[3..5], 8) as u16,
+ bytes[5],
+ read_big_endian(&bytes[6..10], 7)
+ ))
+ } else {
+ None
+ }
+}
+
+#[derive(Debug)]
+pub struct ID3v2 {
+ header_size: usize,
+ pub frames: Vec<RawFrame>,
+}
+
+#[derive(Debug)]
+pub struct RawFrame {
+ id: String,
+ flags: u16,
+ body: Vec<u8>,
+}
+
+#[derive(Debug)]
+pub enum Frame {
+ TIT2(String),
+ TYER(String),
+ TPE1(String),
+}
+
+impl RawFrame {
+ fn parse(data: &[u8]) -> Result<Option<(Self, usize)>, String> {
+ if data.len() < 10 {
+ return Err(String::from("Frame buffer too short"));
+ }
+
+ if data[0..4].iter().all(|&b| b == 0) {
+ return Ok(None)
+ }
+
+ if !data[0..4].iter().all(|&b| (b'A' <= b && b <= b'Z') || (b'0' <= b && b <= b'9')) {
+ return Err(format!("Invalid frame type {:?}", &data[0..4]));
+ }
+
+ let id = String::from_utf8(data[0..4].to_vec()).unwrap();
+ let size = read_big_endian(&data[4..8], 8);
+ let flags = read_big_endian(&data[8..10], 8) as u16;
+
+ let body = data[10..10+size].to_vec();
+
+ Ok(Some((RawFrame { id, flags, body }, 10 + size)))
+ }
+
+ fn interpret_encoded_string(&self, encopts: &EncodingOptions) -> io::Result<String> {
+ match self.body.get(0).ok_or("String field too small".ioerr())? {
+ 0 => { // Latin-1
+ let mut i = self.body.len();
+ while i > 0 && self.body[i-1] == 0 { i -= 1; }
+ if encopts.latin1_as_utf8 {
+ String::from_utf8(self.body[1..i].to_vec()).map_err(|e| e.ioerr())
+ } else {
+ from_latin_1(&self.body[1..i]).ok_or("Invalid Latin-1 string field".ioerr())
+ }
+ }
+
+ 1 => { // UCS-2
+ let mut i = self.body.len();
+ while i > 1 && self.body[i-2] == 0 && self.body[i-1] == 0 { i -= 2; }
+ from_ucs_2_bom(&self.body[1..i]).ok_or("Invalid UCS-2 string field".ioerr())
+ }
+
+ enc => {
+ Err(format!("Unknown string encoding {}", enc).ioerr())
+ }
+ }
+ }
+
+ pub fn interpret(&self, encopts: &EncodingOptions) -> io::Result<Option<Frame>> {
+ if self.id == "TIT2" {
+ self.interpret_encoded_string(encopts).map(Frame::TIT2).map(Some)
+ } else if self.id == "TYER" {
+ self.interpret_encoded_string(encopts).map(Frame::TYER).map(Some)
+ } else if self.id == "TPE1" {
+ self.interpret_encoded_string(encopts).map(Frame::TPE1).map(Some)
+ } else {
+ Ok(None)
+ }
+ }
+}
+
+impl ID3v2 {
+ pub fn from_stream<R: Read>(stream: &mut R) -> io::Result<Self> {
+ let mut header = [0u8; 10];
+ stream.read_exact(&mut header)?;
+
+ let (id3version, flags, header_size) = parse_id3v2_header(&header).ok_or("Invalid ID3 header".ioerr())?;
+ if id3version != 0x0300 {
+ return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr())
+ }
+
+ if flags != 0 {
+ return Err(format!("No ID3 header flags supported ({:x})", flags).ioerr());
+ }
+
+ let body = {
+ let mut body = Vec::new();
+ body.resize(header_size, 0u8);
+ stream.read_exact(&mut body)?;
+ body
+ };
+
+ let mut frames = Vec::new();
+ let mut cursor = 0;
+
+ while cursor < body.len() {
+ match RawFrame::parse(&body[cursor..]).map_err(|e| e.ioerr())? {
+ Some((frame, consumed)) => {
+ frames.push(frame);
+ cursor += consumed;
+ }
+
+ None => {
+ break;
+ }
+ }
+ }
+
+ Ok(ID3v2 { frames, header_size })
+ }
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..6760ee7
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,41 @@
+use std::io;
+use std::fs::File;
+use argparse::{ArgumentParser, StoreTrue, Store};
+use crate::options::{EncodingOptions, Options};
+
+mod encoding;
+mod error;
+mod id3v2;
+mod options;
+mod util;
+
+fn main() -> io::Result<()> {
+ let options = {
+ let mut options: Options = Default::default();
+
+ {
+ let mut ap = ArgumentParser::new();
+ ap.set_description("ID3v2 tag editor/fixer. Incomplete/work-in-progress.");
+ ap.refer(&mut options.latin1_as_utf8)
+ .add_option(&["--assume-utf8"], StoreTrue,
+ "Assume that all strings specified as Latin-1 are really UTF-8.");
+ ap.refer(&mut options.file)
+ .required()
+ .add_argument("file", Store,
+ "File to operate on (probably a .mp3)");
+ ap.parse_args_or_exit();
+ }
+
+ options
+ };
+
+ let mut f = File::open(options.file)?;
+ let tag = id3v2::ID3v2::from_stream(&mut f)?;
+ println!("{:?}", tag);
+
+ for frame in tag.frames {
+ println!("{:?}", frame.interpret(&EncodingOptions { latin1_as_utf8: options.latin1_as_utf8 }));
+ }
+
+ Ok(())
+}
diff --git a/src/options.rs b/src/options.rs
new file mode 100644
index 0000000..3b36e52
--- /dev/null
+++ b/src/options.rs
@@ -0,0 +1,17 @@
+pub struct Options {
+ pub latin1_as_utf8: bool,
+ pub file: String,
+}
+
+impl Default for Options {
+ fn default() -> Self {
+ Options {
+ latin1_as_utf8: false,
+ file: String::new(),
+ }
+ }
+}
+
+pub struct EncodingOptions {
+ pub latin1_as_utf8: bool,
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..b17e304
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,7 @@
+pub fn read_big_endian(bytes: &[u8], bits: usize) -> usize {
+ bytes
+ .iter()
+ .enumerate()
+ .map(|(i, &b)| (b as usize) << (bits * (bytes.len() - 1 - i)))
+ .sum()
+}