Skip to content

Commit 6b18317

Browse files
tac: support non-UTF-8 separator
1 parent 54566dd commit 6b18317

File tree

2 files changed

+25
-7
lines changed

2 files changed

+25
-7
lines changed

src/uu/tac/src/tac.rs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,14 @@ mod error;
1010
use clap::{Arg, ArgAction, Command};
1111
use memchr::memmem;
1212
use memmap2::Mmap;
13-
use std::ffi::OsString;
13+
use std::ffi::{OsStr, OsString};
1414
use std::io::{BufWriter, Read, Write, stdin, stdout};
1515
use std::{
1616
fs::{File, read},
1717
io::copy,
1818
path::Path,
1919
};
20+
use uucore::error::USimpleError;
2021
#[cfg(unix)]
2122
use uucore::error::set_exit_code;
2223
use uucore::error::{UError, UResult};
@@ -40,10 +41,11 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
4041
let before = matches.get_flag(options::BEFORE);
4142
let regex = matches.get_flag(options::REGEX);
4243
let raw_separator = matches
43-
.get_one::<String>(options::SEPARATOR)
44-
.map_or("\n", |s| s.as_str());
44+
.get_one::<OsString>(options::SEPARATOR)
45+
.map_or(OsStr::new("\n"), |s| s.as_os_str());
46+
4547
let separator = if raw_separator.is_empty() {
46-
"\0"
48+
OsStr::new("\0")
4749
} else {
4850
raw_separator
4951
};
@@ -82,6 +84,7 @@ pub fn uu_app() -> Command {
8284
.short('s')
8385
.long(options::SEPARATOR)
8486
.help(translate!("tac-help-separator"))
87+
.value_parser(clap::value_parser!(OsString))
8588
.value_name("STRING"),
8689
)
8790
.arg(
@@ -183,7 +186,7 @@ fn buffer_tac_regex(
183186
/// If `before` is `true`, then this function assumes that the
184187
/// `separator` appears at the beginning of each line, as in
185188
/// `"/abc/def"`.
186-
fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()> {
189+
fn buffer_tac(data: &[u8], before: bool, separator: &OsStr) -> std::io::Result<()> {
187190
let out = stdout();
188191
let mut out = BufWriter::new(out.lock());
189192

@@ -206,7 +209,7 @@ fn buffer_tac(data: &[u8], before: bool, separator: &str) -> std::io::Result<()>
206209
// The `before` flag controls whether the line separator appears at
207210
// the end of the line (as in "abc\ndef\n") or at the beginning of
208211
// the line (as in "/abc/def").
209-
for i in memmem::rfind_iter(data, separator) {
212+
for i in memmem::rfind_iter(data, separator.as_encoded_bytes()) {
210213
if before {
211214
out.write_all(&data[i..following_line_start])?;
212215
following_line_start = i;
@@ -309,9 +312,12 @@ fn translate_regex_flavor(regex: &str) -> String {
309312
}
310313

311314
#[allow(clippy::cognitive_complexity)]
312-
fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &str) -> UResult<()> {
315+
fn tac(filenames: &[OsString], before: bool, regex: bool, separator: &OsStr) -> UResult<()> {
313316
// Compile the regular expression pattern if it is provided.
314317
let maybe_pattern = if regex {
318+
let Some(separator) = separator.to_str() else {
319+
return Err(USimpleError::new(1, "non-UTF-8 separator is not supported"));
320+
};
315321
match regex::bytes::RegexBuilder::new(&translate_regex_flavor(separator))
316322
.multi_line(true)
317323
.build()

tests/by-util/test_tac.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,18 @@ fn test_null_separator() {
235235
.stdout_is("b\0a\0");
236236
}
237237

238+
#[test]
239+
fn test_non_utf8_separator() {
240+
use std::os::unix::ffi::OsStringExt;
241+
new_ucmd!()
242+
.arg("-s")
243+
.arg(std::ffi::OsString::from_vec(b"\xe9".to_vec()))
244+
.pipe_in(b"1\xe92".to_vec())
245+
.succeeds()
246+
.no_stderr()
247+
.stdout_is_bytes(b"21\xe9");
248+
}
249+
238250
#[test]
239251
fn test_regex() {
240252
new_ucmd!()

0 commit comments

Comments
 (0)