diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 0c807c8c481..1a3c51b3e70 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -4,8 +4,9 @@ // file that was distributed with this source code. // spell-checker:ignore (ToDO) fname, algo +use clap::builder::ValueParser; use clap::{crate_version, value_parser, Arg, ArgAction, Command}; -use std::ffi::OsStr; +use std::ffi::{OsStr, OsString}; use std::fs::File; use std::io::{self, stdin, stdout, BufReader, Read, Write}; use std::iter; @@ -18,7 +19,7 @@ use uucore::checksum::{ use uucore::{ encoding, error::{FromIo, UResult, USimpleError}, - format_usage, help_about, help_section, help_usage, show, + format_usage, help_about, help_section, help_usage, os_str_as_bytes, show, sum::{div_ceil, Digest}, }; @@ -116,52 +117,64 @@ where }; // The BSD checksum output is 5 digit integer let bsd_width = 5; - match (options.algo_name, not_file) { - (ALGORITHM_OPTIONS_SYSV, true) => println!( - "{} {}", - sum.parse::().unwrap(), - div_ceil(sz, options.output_bits) + let (before_filename, should_print_filename, after_filename) = match options.algo_name { + ALGORITHM_OPTIONS_SYSV => ( + format!( + "{} {}{}", + sum.parse::().unwrap(), + div_ceil(sz, options.output_bits), + if not_file { "" } else { " " } + ), + !not_file, + String::new(), ), - (ALGORITHM_OPTIONS_SYSV, false) => println!( - "{} {} {}", - sum.parse::().unwrap(), - div_ceil(sz, options.output_bits), - filename.display() + ALGORITHM_OPTIONS_BSD => ( + format!( + "{:0bsd_width$} {:bsd_width$}{}", + sum.parse::().unwrap(), + div_ceil(sz, options.output_bits), + if not_file { "" } else { " " } + ), + !not_file, + String::new(), ), - (ALGORITHM_OPTIONS_BSD, true) => println!( - "{:0bsd_width$} {:bsd_width$}", - sum.parse::().unwrap(), - div_ceil(sz, options.output_bits) + ALGORITHM_OPTIONS_CRC => ( + format!("{sum} {sz}{}", if not_file { "" } else { " " }), + !not_file, + String::new(), ), - (ALGORITHM_OPTIONS_BSD, false) => println!( - "{:0bsd_width$} {:bsd_width$} {}", - sum.parse::().unwrap(), - div_ceil(sz, options.output_bits), - filename.display() - ), - (ALGORITHM_OPTIONS_CRC, true) => println!("{sum} {sz}"), - (ALGORITHM_OPTIONS_CRC, false) => println!("{sum} {sz} {}", filename.display()), - (ALGORITHM_OPTIONS_BLAKE2B, _) if options.tag => { - if let Some(length) = options.length { - // Multiply by 8 here, as we want to print the length in bits. - println!("BLAKE2b-{} ({}) = {sum}", length * 8, filename.display()); - } else { - println!("BLAKE2b ({}) = {sum}", filename.display()); - } + ALGORITHM_OPTIONS_BLAKE2B if options.tag => { + ( + if let Some(length) = options.length { + // Multiply by 8 here, as we want to print the length in bits. + format!("BLAKE2b-{} (", length * 8) + } else { + "BLAKE2b (".to_owned() + }, + true, + format!(") = {sum}"), + ) } _ => { if options.tag { - println!( - "{} ({}) = {sum}", - options.algo_name.to_ascii_uppercase(), - filename.display() - ); + ( + format!("{} (", options.algo_name.to_ascii_uppercase()), + true, + format!(") = {sum}"), + ) } else { let prefix = if options.asterisk { "*" } else { " " }; - println!("{sum} {prefix}{}", filename.display()); + (format!("{sum} {prefix}"), true, String::new()) } } + }; + print!("{}", before_filename); + if should_print_filename { + // The filename might not be valid UTF-8, and filename.display() would mangle the names. + // Therefore, emit the bytes directly to stdout, without any attempt at encoding them. + let _dropped_result = stdout().write_all(os_str_as_bytes(filename.as_os_str())?); } + println!("{}", after_filename); } Ok(()) @@ -209,7 +222,7 @@ fn prompt_asterisk(tag: bool, binary: bool, had_reset: bool) -> bool { * Don't do it with clap because if it struggling with the --overrides_with * marking the value as set even if not present */ -fn had_reset(args: &[String]) -> bool { +fn had_reset(args: &[OsString]) -> bool { // Indices where "--binary" or "-b", "--tag", and "--untagged" are found let binary_index = args.iter().position(|x| x == "--binary" || x == "-b"); let tag_index = args.iter().position(|x| x == "--tag"); @@ -234,7 +247,7 @@ fn handle_tag_text_binary_flags(matches: &clap::ArgMatches) -> UResult<(bool, bo let binary_flag: bool = matches.get_flag(options::BINARY); - let args: Vec = std::env::args().collect(); + let args: Vec = std::env::args_os().collect(); let had_reset = had_reset(&args); let asterisk: bool = prompt_asterisk(tag, binary_flag, had_reset); @@ -298,7 +311,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { // Execute the checksum validation based on the presence of files or the use of stdin - let files = matches.get_many::(options::FILE).map_or_else( + let files = matches.get_many::(options::FILE).map_or_else( || iter::once(OsStr::new("-")).collect::>(), |files| files.map(OsStr::new).collect::>(), ); @@ -337,7 +350,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { asterisk, }; - match matches.get_many::(options::FILE) { + match matches.get_many::(options::FILE) { Some(files) => cksum(opts, files.map(OsStr::new))?, None => cksum(opts, iter::once(OsStr::new("-")))?, }; @@ -356,6 +369,7 @@ pub fn uu_app() -> Command { Arg::new(options::FILE) .hide(true) .action(clap::ArgAction::Append) + .value_parser(ValueParser::os_string()) .value_hint(clap::ValueHint::FilePath), ) .arg( @@ -469,61 +483,62 @@ mod tests { use super::had_reset; use crate::calculate_blake2b_length; use crate::prompt_asterisk; + use std::ffi::OsString; #[test] fn test_had_reset() { let args = ["--binary", "--tag", "--untagged"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(had_reset(&args)); let args = ["-b", "--tag", "--untagged"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(had_reset(&args)); let args = ["-b", "--binary", "--tag", "--untagged"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(had_reset(&args)); let args = ["--untagged", "--tag", "--binary"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(!had_reset(&args)); let args = ["--untagged", "--tag", "-b"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(!had_reset(&args)); let args = ["--binary", "--tag"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(!had_reset(&args)); let args = ["--tag", "--untagged"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(!had_reset(&args)); let args = ["--text", "--untagged"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(!had_reset(&args)); let args = ["--binary", "--untagged"] .iter() - .map(|&s| s.to_string()) - .collect::>(); + .map(|&s| s.into()) + .collect::>(); assert!(!had_reset(&args)); } diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 044267d25ea..b4b353e3e96 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -222,9 +222,9 @@ pub fn read_yes() -> bool { } } -// Helper function for processing delimiter values (which could be non UTF-8) -// It converts OsString to &[u8] for unix targets only -// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 +/// Helper function for processing delimiter values (which could be non UTF-8) +/// It converts OsString to &[u8] for unix targets only +/// On non-unix (i.e. Windows) it will just return an error if delimiter value is not UTF-8 pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> { #[cfg(unix)] let bytes = os_string.as_bytes(); diff --git a/tests/by-util/test_cksum.rs b/tests/by-util/test_cksum.rs index b2aafc2cbd7..117e54e1ef8 100644 --- a/tests/by-util/test_cksum.rs +++ b/tests/by-util/test_cksum.rs @@ -2,7 +2,7 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (words) asdf algo algos mgmt +// spell-checker:ignore (words) asdf algo algos asha mgmt xffname use crate::common::util::TestScenario; @@ -1250,3 +1250,30 @@ fn test_several_files_error_mgmt() { .stderr_contains("empty: no properly ") .stderr_contains("incorrect: no properly "); } + +#[cfg(target_os = "linux")] +#[test] +fn test_non_utf8_filename() { + use std::ffi::OsString; + use std::os::unix::ffi::OsStringExt; + + let scene = TestScenario::new(util_name!()); + let at = &scene.fixtures; + let filename: OsString = OsStringExt::from_vec(b"funky\xffname".to_vec()); + + at.touch(&filename); + + scene + .ucmd() + .arg(&filename) + .succeeds() + .stdout_is_bytes(b"4294967295 0 funky\xffname\n") + .no_stderr(); + scene + .ucmd() + .arg("-asha256") + .arg(filename) + .succeeds() + .stdout_is_bytes(b"SHA256 (funky\xffname) = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n") + .no_stderr(); +}