Skip to content

Commit e00ff6b

Browse files
committed
cmp: stop allocating for byte printing
This makes verbose comparison of 37MB completely different files 2.34x faster than our own baseline, putting our cmp at almost 6x faster than GNU cmp (/opt/homebrew/bin/cmp) on my M4 Pro Mac. The output remains identical to that of GNU cmp. Mostly equal and smaller files do not regress. Benchmark 1: ./bin/baseline/diffutils cmp -lb t/huge t/eguh Time (mean ± σ): 1.669 s ± 0.011 s [User: 1.594 s, System: 0.073 s] Range (min … max): 1.654 s … 1.689 s 10 runs Warning: Ignoring non-zero exit code. Benchmark 2: ./target/release/diffutils cmp -lb t/huge t/eguh Time (mean ± σ): 714.2 ms ± 4.1 ms [User: 629.3 ms, System: 82.7 ms] Range (min … max): 707.2 ms … 721.5 ms 10 runs Warning: Ignoring non-zero exit code. Benchmark 3: /opt/homebrew/bin/cmp -lb t/huge t/eguh Time (mean ± σ): 4.213 s ± 0.050 s [User: 4.128 s, System: 0.081 s] Range (min … max): 4.160 s … 4.316 s 10 runs Warning: Ignoring non-zero exit code. Benchmark 4: /usr/bin/cmp -lb t/huge t/eguh Time (mean ± σ): 3.892 s ± 0.048 s [User: 3.819 s, System: 0.070 s] Range (min … max): 3.808 s … 3.976 s 10 runs Warning: Ignoring non-zero exit code. Summary ./target/release/diffutils cmp -lb t/huge t/eguh ran 2.34 ± 0.02 times faster than ./bin/baseline/diffutils cmp -lb t/huge t/eguh 5.45 ± 0.07 times faster than /usr/bin/cmp -lb t/huge t/eguh 5.90 ± 0.08 times faster than /opt/homebrew/bin/cmp -lb t/huge t/eguh
1 parent c38fe5f commit e00ff6b

File tree

1 file changed

+60
-37
lines changed

1 file changed

+60
-37
lines changed

src/cmp.rs

Lines changed: 60 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -500,12 +500,6 @@ pub fn main(opts: Peekable<ArgsOs>) -> ExitCode {
500500
}
501501
}
502502

503-
#[inline]
504-
fn is_ascii_printable(byte: u8) -> bool {
505-
let c = byte as char;
506-
c.is_ascii() && !c.is_ascii_control()
507-
}
508-
509503
#[inline]
510504
fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
511505
*buf = [b' ', b' ', b'0'];
@@ -525,32 +519,68 @@ fn format_octal(byte: u8, buf: &mut [u8; 3]) -> &str {
525519
}
526520

527521
#[inline]
528-
fn format_byte(byte: u8) -> String {
529-
let mut byte = byte;
530-
let mut quoted = vec![];
531-
532-
if !is_ascii_printable(byte) {
533-
if byte >= 128 {
534-
quoted.push(b'M');
535-
quoted.push(b'-');
536-
byte -= 128;
522+
fn write_visible_byte(output: &mut Vec<u8>, byte: u8) -> usize {
523+
match byte {
524+
// Control characters: ^@, ^A, ..., ^_
525+
0..=31 => {
526+
output.push(b'^');
527+
output.push(byte + 64);
528+
2
537529
}
538-
539-
if byte < 32 {
540-
quoted.push(b'^');
541-
byte += 64;
542-
} else if byte == 127 {
543-
quoted.push(b'^');
544-
byte = b'?';
530+
// Printable ASCII (space through ~)
531+
32..=126 => {
532+
output.push(byte);
533+
1
534+
}
535+
// DEL: ^?
536+
127 => {
537+
output.extend_from_slice(b"^?");
538+
2
539+
}
540+
// High bytes with control equivalents: M-^@, M-^A, ..., M-^_
541+
128..=159 => {
542+
output.push(b'M');
543+
output.push(b'-');
544+
output.push(b'^');
545+
output.push(byte - 64);
546+
4
547+
}
548+
// High bytes: M-<space>, M-!, ..., M-~
549+
160..=254 => {
550+
output.push(b'M');
551+
output.push(b'-');
552+
output.push(byte - 128);
553+
3
554+
}
555+
// Byte 255: M-^?
556+
255 => {
557+
output.extend_from_slice(b"M-^?");
558+
4
545559
}
546-
assert!((byte as char).is_ascii());
547560
}
561+
}
548562

549-
quoted.push(byte);
563+
/// Writes a byte in visible form with right-padding to 4 spaces.
564+
#[inline]
565+
fn write_visible_byte_padded(output: &mut Vec<u8>, byte: u8) {
566+
const SPACES: &[u8] = b" ";
567+
const WIDTH: usize = SPACES.len();
568+
569+
let display_width = write_visible_byte(output, byte);
550570

551-
// SAFETY: the checks and shifts we do above match what cat and GNU
571+
// Add right-padding spaces
572+
let padding = WIDTH.saturating_sub(display_width);
573+
output.extend_from_slice(&SPACES[..padding]);
574+
}
575+
576+
/// Formats a byte as a visible string (for non-performance-critical path)
577+
#[inline]
578+
fn format_visible_byte(byte: u8) -> String {
579+
let mut result = Vec::with_capacity(4);
580+
write_visible_byte(&mut result, byte);
581+
// SAFETY: the checks and shifts in write_visible_byte match what cat and GNU
552582
// cmp do to ensure characters fall inside the ascii range.
553-
unsafe { String::from_utf8_unchecked(quoted) }
583+
unsafe { String::from_utf8_unchecked(result) }
554584
}
555585

556586
// This function has been optimized to not use the Rust fmt system, which
@@ -588,22 +618,15 @@ fn format_verbose_difference(
588618

589619
output.push(b' ');
590620

591-
let from_byte_str = format_byte(from_byte);
592-
let from_byte_padding = 4 - from_byte_str.len();
593-
594-
output.extend_from_slice(from_byte_str.as_bytes());
595-
596-
for _ in 0..from_byte_padding {
597-
output.push(b' ')
598-
}
621+
write_visible_byte_padded(output, from_byte);
599622

600623
output.push(b' ');
601624

602625
output.extend_from_slice(format_octal(to_byte, &mut to_oct).as_bytes());
603626

604627
output.push(b' ');
605628

606-
output.extend_from_slice(format_byte(to_byte).as_bytes());
629+
write_visible_byte(output, to_byte);
607630

608631
output.push(b'\n');
609632
} else {
@@ -706,9 +729,9 @@ fn report_difference(from_byte: u8, to_byte: u8, at_byte: usize, at_line: usize,
706729
print!(
707730
" is {:>3o} {:char_width$} {:>3o} {:char_width$}",
708731
from_byte,
709-
format_byte(from_byte),
732+
format_visible_byte(from_byte),
710733
to_byte,
711-
format_byte(to_byte)
734+
format_visible_byte(to_byte)
712735
);
713736
}
714737
println!();

0 commit comments

Comments
 (0)