Skip to content

Opus tag reading very slow #588

@milesegan

Description

@milesegan

Reproducer

Running this benchmark on a directory of tagged files:

use clap::Parser;
use lofty::probe::Probe;
use lofty::tag::{ItemValue, TagExt};
use lofty::file::TaggedFileExt;
use std::path::PathBuf;
use std::time::Instant;
use walkdir::WalkDir;
use crc32fast::Hasher;

#[derive(Parser)]
#[command(name = "lofty-benchmark")]
struct Cli {
    /// Directory to scan for audio files
    path: PathBuf,

    /// Print each tag as it is read
    #[arg(short, long)]
    debug: bool,
}

fn main() {
    let args = Cli::parse();
    let start = Instant::now();
    let mut hasher = Hasher::new();
    let mut file_count = 0;
    
    // Collect all files
    let mut files: Vec<_> = WalkDir::new(&args.path)
        .into_iter()
        .filter_map(|e| e.ok())
        .filter(|e| e.file_type().is_file())
        .collect();

    // Sort alphabetically to ensure reproducible order
    files.sort_by(|a, b| a.path().cmp(b.path()));

    for entry in files {
        // Attempt to read tags from the file
        // Probe::open guesses the format, read() parses it.
        // We ignore errors as the directory may contain non-audio files.
        if let Ok(results) = Probe::open(entry.path())
            .and_then(|p| p.read()) 
        {
            file_count += 1;
            // Iterate over all tags found in the file (e.g. ID3v2, Vorbis, etc.)
            for tag in results.tags() {
                if args.debug {
                    println!("File: {:?}, Tag Type: {:?}, Items: {}", entry.path(), tag.tag_type(), tag.len());
                }
                // Collect and sort items by key for reproducible order
                let mut items: Vec<_> = tag.items().collect();
                items.sort_by(|a, b| format!("{:?}", a.key()).cmp(&format!("{:?}", b.key())));
                
                for item in items {
                    if args.debug {
                            println!("\t{:?}: {:?}", item.key(), item.value());
                    }
                    // Update checksum with key
                    hasher.update(format!("{:?}", item.key()).as_bytes());
                    
                    // Update checksum with value
                    match item.value() {
                        ItemValue::Text(s) => hasher.update(s.as_bytes()),
                        ItemValue::Locator(s) => hasher.update(s.as_bytes()),
                        ItemValue::Binary(b) => hasher.update(b),
                    }
                }
            }
        }
    }
    
    let duration = start.elapsed();
    println!("Scanned {} files in {:.3?}", file_count, duration);
    println!("Checksum: {:08x}", hasher.finalize());
}

Summary

➜  lofty-benchmark git:(master) cargo run ~/Desktop/music/opus/autechre/
   Compiling lofty v0.22.4 (/Users/miles/c/lofty-rs/lofty)
   Compiling lofty-benchmark v0.1.0 (/Users/miles/c/lofty-benchmark)
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 1.65s
     Running `target/debug/lofty-benchmark /Users/miles/Desktop/music/opus/autechre/`
Scanned 262 files in 6.362s
Checksum: d8b76029
➜  lofty-benchmark git:(master) cargo run ~/Desktop/music/aac/autechre/
   Compiling lofty v0.22.4 (/Users/miles/c/lofty-rs/lofty)
   Compiling lofty-benchmark v0.1.0 (/Users/miles/c/lofty-benchmark)
    Finished `dev` profile [unoptimized + debuginfo] target(s) in 1.53s
     Running `target/debug/lofty-benchmark /Users/miles/Desktop/music/aac/autechre/`
Scanned 262 files in 150.477ms
Checksum: 6f172600

I also noticed TagLib is massively faster than lofty for this case.

I have an experimental branch that changes this code to read backwards from the end of the file that seems to bring opus speed in line with AAC. If you're interested I could put up a PR or just paste it here.

fn find_last_page<R>(data: &mut R) -> Result<Page>
where
R: Read + Seek,
{
let mut last_page_header = PageHeader::read(data)?;
data.seek(SeekFrom::Current(last_page_header.content_size() as i64))?;
while let Ok(header) = PageHeader::read(data) {
last_page_header = header;
data.seek(SeekFrom::Current(last_page_header.content_size() as i64))?;
}
data.seek(SeekFrom::Start(last_page_header.start))?;
Ok(Page::read(data)?)
}

Expected behavior

Opus tag reading shouldn't be this much slower than m4a.

Assets

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions