summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Smeding <tom.smeding@gmail.com>2020-01-17 10:50:47 +0100
committerTom Smeding <tom.smeding@gmail.com>2020-01-17 10:50:47 +0100
commit2d9bca9a1eb75771a308668e2b365e82e5039e5a (patch)
treeb8d54909d8396f1c583ddb33268569a1998ef280 /src
parent771ff47545cea2b3e9a12aceb305ffcb53aa40b6 (diff)
Partial support for ID3v2.4
Diffstat (limited to 'src')
-rw-r--r--src/encoding.rs40
-rw-r--r--src/id3v2.rs93
-rw-r--r--src/main.rs6
3 files changed, 103 insertions, 36 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
index 9bc7290..598f993 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -34,15 +34,51 @@ pub fn from_ucs_2_bom(bytes: &[u8]) -> Option<String> {
256 * bytes[2*i+1] as u16 + bytes[2*i] as u16
};
- match char::try_from(val as u32) {
+ match char::try_from(u32::from(val)) {
Ok(c) => res.push(c),
Err(_) => return None,
- };
+ }
}
Some(res)
}
+pub fn from_utf16_genericbom(bytes: &[u8], bom: u16) -> Option<String> {
+ let len = bytes.len();
+ guard!(len % 2 == 0);
+
+ let mut nibbles = Vec::with_capacity(len/2);
+
+ match bom {
+ 0xfeff => {
+ for i in 1..len/2 {
+ nibbles.push((u16::from(bytes[2*i]) << 8) | u16::from(bytes[2*i+1]));
+ }
+ }
+
+ 0xfffe => {
+ for i in 1..len/2 {
+ nibbles.push((u16::from(bytes[2*i+1]) << 8) | u16::from(bytes[2*i]));
+ }
+ }
+
+ _ => { // Invalid BOM
+ return None
+ }
+ }
+
+ String::from_utf16(&nibbles).ok()
+}
+
+pub fn from_utf16_bom(bytes: &[u8]) -> Option<String> {
+ guard!(bytes.len() >= 2);
+ from_utf16_genericbom(&bytes[2..], read_big_endian(&bytes[0..2], 8) as u16)
+}
+
+pub fn from_utf16_nobom(bytes: &[u8]) -> Option<String> {
+ from_utf16_genericbom(bytes, 0xfeff)
+}
+
pub fn from_utf8_mistaken_as_latin1(latin1: &str) -> io::Result<String> {
latin1
.chars()
diff --git a/src/id3v2.rs b/src/id3v2.rs
index 10a25a8..19f5e89 100644
--- a/src/id3v2.rs
+++ b/src/id3v2.rs
@@ -4,7 +4,7 @@ use std::convert::TryFrom;
use std::io::{self, Read, Write};
use std::iter;
use std::num::TryFromIntError;
-use crate::encoding::{from_latin_1, from_ucs_2_bom, read_big_endian, write_big_endian};
+use crate::encoding::{from_latin_1, from_ucs_2_bom, from_utf16_bom, from_utf16_nobom, read_big_endian, write_big_endian};
use crate::error::IntoIOError;
fn parse_id3v2_header(bytes: &[u8]) -> Option<(u16, u8, usize)> {
@@ -58,6 +58,7 @@ pub struct ID3v2 {
pub struct RawFrame {
id: String,
flags: u16,
+ tag_version_sub: u8, // ID3v2.{}
body: Vec<u8>,
}
@@ -95,7 +96,7 @@ impl<T: Clone> Iterator for ArrayIter2<T> {
}
impl RawFrame {
- fn parse(data: &[u8]) -> Result<Option<(Self, usize)>, String> {
+ fn parse(data: &[u8], tag_version_sub: u8) -> Result<Option<(Self, usize)>, String> {
if data.len() < 10 {
return Err(String::from("Frame buffer too short"));
}
@@ -114,7 +115,7 @@ impl RawFrame {
let body = data[10..10+size].to_vec();
- Ok(Some((RawFrame { id, flags, body }, 10 + size)))
+ Ok(Some((RawFrame { id, flags, tag_version_sub, body }, 10 + size)))
}
fn encode<W: Write>(&self, mut stream: W) -> io::Result<()> {
@@ -126,29 +127,50 @@ impl RawFrame {
}
fn interpret_encoded_string(&self) -> io::Result<String> {
- match self.body.get(0).ok_or("String field too small".ioerr())? {
- 0 => { // Latin-1
- let mut i = self.body.len();
- while i > 0 && self.body[i-1] == 0 { i -= 1; }
- from_latin_1(&self.body[1..i]).ok_or("Invalid Latin-1 string field".ioerr())
- }
+ enum Encoding {
+ Latin1,
+ UCS2,
+ UTF16BOM,
+ UTF16BE,
+ UTF8,
+ }
+ let encoding = match (self.body.get(0).ok_or("String field too small".ioerr())?, self.tag_version_sub) {
+ (0, _) => Encoding::Latin1,
+ (1, 3) => Encoding::UCS2,
+ (1, 4) => Encoding::UTF16BOM,
+ (2, 3) => return Err("UTF-16BE-encoded strings unsupported in ID3v2.3".ioerr()),
+ (2, 4) => Encoding::UTF16BE,
+ (3, 3) => return Err("UTF8-encoded strings unsupported in ID3v2.3".ioerr()),
+ (3, 4) => Encoding::UTF8,
+ (enc, _) => return Err(format!("Unknown string encoding {}", enc).ioerr()),
+ };
- // TODOv2.4: in 2.4 this is UTF-16
- 1 => { // UCS-2
- let mut i = self.body.len();
- while i > 1 && self.body[i-2] == 0 && self.body[i-1] == 0 { i -= 2; }
- from_ucs_2_bom(&self.body[1..i]).ok_or("Invalid UCS-2 string field".ioerr())
- }
+ let contents = &self.body[1..]; // after the encoding byte
- // TODOv2.4: UTF-16BE
- // 2 => {}
+ macro_rules! trunc_zeros_1 {
+ ($v:expr) => {{
+ let v = $v;
+ let mut i = 0;
+ while i < v.len() && v[i] != 0 { i += 1; }
+ &v[..i]
+ }}
+ }
- // TODOv2.4: UTF-8
- // 3 => {}
+ macro_rules! trunc_zeros_2 {
+ ($v:expr) => {{
+ let v = $v;
+ let mut i = 0;
+ while i <= v.len() - 2 && v[i] != 0 && v[i+1] != 0 { i += 2; }
+ &v[..i]
+ }}
+ }
- enc => {
- Err(format!("Unknown string encoding {}", enc).ioerr())
- }
+ match encoding {
+ Encoding::Latin1 => from_latin_1(trunc_zeros_1!(contents)).ok_or("Invalid Latin-1 string field".ioerr()),
+ Encoding::UCS2 => from_ucs_2_bom(trunc_zeros_2!(contents)).ok_or("Invalid UCS-2 string field".ioerr()),
+ Encoding::UTF16BOM => from_utf16_bom(trunc_zeros_2!(contents)).ok_or("Invalid UTF-16 string field".ioerr()),
+ Encoding::UTF16BE => from_utf16_nobom(trunc_zeros_2!(contents)).ok_or("Invalid UTF-16BE string field".ioerr()),
+ Encoding::UTF8 => String::from_utf8(trunc_zeros_1!(contents).to_vec()).map_err(|e| e.ioerr()),
}
}
@@ -170,6 +192,7 @@ impl RawFrame {
Ok(Self {
id: id.to_string(),
flags: 0,
+ tag_version_sub: self.tag_version_sub,
body: encode_string(&body)?,
})
};
@@ -190,8 +213,13 @@ impl RawFrame {
}
impl Frame {
- pub fn to_raw(self) -> io::Result<RawFrame> {
- let type_t = |typ: &str, body: String| Ok(RawFrame { id: typ.to_string(), flags: 0, body: encode_string(&body)? });
+ fn to_raw(self, tag_version_sub: u8) -> io::Result<RawFrame> {
+ let type_t = |typ: &str, body: String| Ok(RawFrame {
+ id: typ.to_string(),
+ flags: 0,
+ tag_version_sub,
+ body: encode_string(&body)?
+ });
match self {
Self::TIT2(s) => type_t("TIT2", s),
@@ -222,11 +250,10 @@ impl ID3v2 {
let version_sub = match id3version {
0x0300 => 3,
- // TODOv2.4: uncomment this
- // 0x0400 => {
- // eprintln!("WARNING: ID3v2.4 tags only partially supported!");
- // 4
- // }
+ 0x0400 => {
+ eprintln!("WARNING: ID3v2.4 tags only partially supported!");
+ 4
+ }
_ => {
return Err(format!("ID3 header version {}.{} not supported", id3version / 256, id3version % 256).ioerr())
}
@@ -252,7 +279,7 @@ impl ID3v2 {
if tag.iter().all(|&b| b == 0) { break; } // zero tag indicates end of ID3 header
- match RawFrame::parse(&body[cursor..]).map_err(|e| e.ioerr())? {
+ match RawFrame::parse(&body[cursor..], version_sub).map_err(|e| e.ioerr())? {
Some((frame, consumed)) => {
frames.push(frame);
cursor += consumed;
@@ -271,7 +298,7 @@ impl ID3v2 {
let mut result = Vec::new();
result.push(b'I'); result.push(b'D'); result.push(b'3'); // magic tag
- result.push(0x03); result.push(0x00); // version
+ result.push(self.version_sub); result.push(0x00); // version
result.push(0); // flags
write_big_endian(&mut result, self.header_size, 4, 7).unwrap(); // header size
@@ -294,4 +321,8 @@ impl ID3v2 {
Ok(result)
}
+
+ pub fn to_raw(&self, frame: Frame) -> io::Result<RawFrame> {
+ frame.to_raw(self.version_sub)
+ }
}
diff --git a/src/main.rs b/src/main.rs
index ab070ee..6c15834 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -12,7 +12,7 @@ mod options;
fn parse_options_into(opt: &mut Options) {
let mut ap = ArgumentParser::new();
- ap.set_description("ID3v2 tag editor/fixer. Incomplete/work-in-progress.");
+ ap.set_description("ID3v2 tag editor/fixer. Incomplete/work-in-progress. Support for ID3v2.3, with partial support of ID3v2.4 (no footer tags supported).");
ap.refer(&mut opt.write)
.add_option(&["-w", "--write"], StoreTrue,
@@ -68,11 +68,11 @@ fn modify_tag(tag: &mut ID3v2, new_frame: Frame) -> io::Result<()> {
match indices.len() {
0 => {
- tag.frames.push(new_frame.to_raw()?);
+ tag.frames.push(tag.to_raw(new_frame)?);
}
1 => {
- tag.frames[indices[0]] = new_frame.to_raw()?;
+ tag.frames[indices[0]] = tag.to_raw(new_frame)?;
}
_ => {