From c3b06e10a65da393ae7d82153f7acaca6ad1eef5 Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Fri, 31 Mar 2023 19:18:45 -0700 Subject: [PATCH 1/6] wc: clean up of settings The Settings object did not need a QuotingStyle member, it was basically a static. --- src/uu/wc/src/wc.rs | 134 +++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 65 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 9d7a23d1fbc..c697cb33b7d 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -42,23 +42,36 @@ struct Settings { show_words: bool, show_max_line_length: bool, files0_from_stdin_mode: bool, - title_quoting_style: QuotingStyle, total_when: TotalWhen, } +impl Default for Settings { + fn default() -> Self { + // Defaults if none of -c, -m, -l, -w, nor -L are specified. + Self { + show_bytes: true, + show_chars: false, + show_lines: true, + show_words: true, + show_max_line_length: false, + files0_from_stdin_mode: false, + total_when: TotalWhen::default(), + } + } +} + impl Settings { fn new(matches: &ArgMatches) -> Self { - let title_quoting_style = QuotingStyle::Shell { - escape: true, - always_quote: false, - show_control: false, - }; - let files0_from_stdin_mode = match matches.get_one::(options::FILES0_FROM) { Some(files_0_from) => files_0_from == STDIN_REPR, None => false, }; + let total_when = matches + .get_one::(options::TOTAL) + .map(Into::into) + .unwrap_or_default(); + let settings = Self { show_bytes: matches.get_flag(options::BYTES), show_chars: matches.get_flag(options::CHAR), @@ -66,46 +79,38 @@ impl Settings { show_words: matches.get_flag(options::WORDS), show_max_line_length: matches.get_flag(options::MAX_LINE_LENGTH), files0_from_stdin_mode, - title_quoting_style, - total_when: matches.get_one::(options::TOTAL).unwrap().into(), + total_when, }; - if settings.show_bytes - || settings.show_chars - || settings.show_lines - || settings.show_words - || settings.show_max_line_length - { - return settings; - } - - Self { - show_bytes: true, - show_chars: false, - show_lines: true, - show_words: true, - show_max_line_length: false, - files0_from_stdin_mode, - title_quoting_style: settings.title_quoting_style, - total_when: settings.total_when, + if settings.number_enabled() > 0 { + settings + } else { + Self { + files0_from_stdin_mode, + total_when, + ..Default::default() + } } } fn number_enabled(&self) -> u32 { - let mut result = 0; - result += self.show_bytes as u32; - result += self.show_chars as u32; - result += self.show_lines as u32; - result += self.show_max_line_length as u32; - result += self.show_words as u32; - result + [ + self.show_bytes, + self.show_chars, + self.show_lines, + self.show_max_line_length, + self.show_words, + ] + .into_iter() + .map(Into::::into) + .sum() } } const ABOUT: &str = help_about!("wc.md"); const USAGE: &str = help_usage!("wc.md"); -pub mod options { +mod options { pub static BYTES: &str = "bytes"; pub static CHAR: &str = "chars"; pub static FILES0_FROM: &str = "files0-from"; @@ -118,6 +123,12 @@ pub mod options { static ARG_FILES: &str = "files"; static STDIN_REPR: &str = "-"; +static QS_ESCAPE: &QuotingStyle = &QuotingStyle::Shell { + escape: true, + always_quote: false, + show_control: false, +}; + enum StdinKind { /// Stdin specified on command-line with "-". Explicit, @@ -147,35 +158,34 @@ impl From<&OsStr> for Input { impl Input { /// Converts input to title that appears in stats. - fn to_title(&self, quoting_style: &QuotingStyle) -> Option { + fn to_title(&self) -> Option { match self { - Self::Path(path) => Some(escape_name(&path.clone().into_os_string(), quoting_style)), - Self::Stdin(StdinKind::Explicit) => { - Some(escape_name(OsStr::new(STDIN_REPR), quoting_style)) - } + Self::Path(path) => Some(escape_name(path.as_os_str(), QS_ESCAPE)), + Self::Stdin(StdinKind::Explicit) => Some(STDIN_REPR.into()), Self::Stdin(StdinKind::Implicit) => None, } } - fn path_display(&self, quoting_style: &QuotingStyle) -> String { + fn path_display(&self) -> String { match self { - Self::Path(path) => escape_name(&path.clone().into_os_string(), quoting_style), - Self::Stdin(_) => escape_name(OsStr::new("standard input"), quoting_style), + Self::Path(path) => escape_name(OsStr::new(&path), QS_ESCAPE), + Self::Stdin(_) => escape_name(OsStr::new("standard input"), QS_ESCAPE), } } } /// When to show the "total" line -#[derive(PartialEq)] +#[derive(Clone, Copy, Default, PartialEq)] enum TotalWhen { + #[default] Auto, Always, Only, Never, } -impl From<&String> for TotalWhen { - fn from(s: &String) -> Self { +impl> From for TotalWhen { + fn from(s: T) -> Self { match s.as_ref() { "auto" => Self::Auto, "always" => Self::Always, @@ -261,11 +271,11 @@ pub fn uu_app() -> Command { Arg::new(options::FILES0_FROM) .long(options::FILES0_FROM) .value_name("F") - .help( - "read input from the files specified by - NUL-terminated names in file F; - If F is - then read names from standard input", - ) + .help(concat!( + "read input from the files specified by\n", + " NUL-terminated names in file F;\n", + " If F is - then read names from standard input" + )) .value_hint(clap::ValueHint::FilePath), ) .arg( @@ -286,10 +296,12 @@ pub fn uu_app() -> Command { Arg::new(options::TOTAL) .long(options::TOTAL) .value_parser(["auto", "always", "only", "never"]) - .default_value("auto") - .hide_default_value(true) .value_name("WHEN") - .help("when to print a line with total counts"), + .hide_possible_values(true) + .help(concat!( + "when to print a line with total counts;\n", + " WHEN can be: auto, always, only, never" + )), ) .arg( Arg::new(options::WORDS) @@ -627,28 +639,20 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { CountResult::Interrupted(word_count, error) => { show!(USimpleError::new( 1, - format!( - "{}: {}", - input.path_display(&settings.title_quoting_style), - error - ) + format!("{}: {}", input.path_display(), error) )); word_count } CountResult::Failure(error) => { show!(USimpleError::new( 1, - format!( - "{}: {}", - input.path_display(&settings.title_quoting_style), - error - ) + format!("{}: {}", input.path_display(), error) )); continue; } }; total_word_count += word_count; - let result = word_count.with_title(input.to_title(&settings.title_quoting_style)); + let result = word_count.with_title(input.to_title()); if are_stats_visible { if let Err(err) = print_stats(settings, &result, number_width) { From 38b4825e7f484683ea54da995f1a7529e6b2aced Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Fri, 31 Mar 2023 19:52:52 -0700 Subject: [PATCH 2/6] wc: avoid excess String allocations print_stats will now take advantage of the buffer built into io::stdout(). We can also waste fewer lines on show! by making a helper macro. --- src/uu/wc/src/wc.rs | 103 ++++++++++++++---------------------- src/uu/wc/src/word_count.rs | 16 ------ tests/by-util/test_wc.rs | 5 +- 3 files changed, 42 insertions(+), 82 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index c697cb33b7d..60f2a5800c7 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -17,7 +17,7 @@ use countable::WordCountable; use unicode_width::UnicodeWidthChar; use utf8::{BufReadDecoder, BufReadDecoderError}; use uucore::{format_usage, help_about, help_usage, show}; -use word_count::{TitledWordCount, WordCount}; +use word_count::WordCount; use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; @@ -29,7 +29,7 @@ use std::fs::{self, File}; use std::io::{self, Read, Write}; use std::path::PathBuf; -use uucore::error::{UError, UResult, USimpleError}; +use uucore::error::{FromIo, UError, UResult}; use uucore::quoting_style::{escape_name, QuotingStyle}; /// The minimum character width for formatting counts when reading from stdin. @@ -621,93 +621,72 @@ fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize { fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { let mut total_word_count = WordCount::default(); - let (number_width, are_stats_visible, total_row_title) = - if settings.total_when == TotalWhen::Only { - (1, false, None) - } else { - let number_width = compute_number_width(inputs, settings); - let title = Some(String::from("total")); - - (number_width, true, title) - }; - + let (number_width, are_stats_visible) = match settings.total_when { + TotalWhen::Only => (1, false), + _ => (compute_number_width(inputs, settings), true), + }; let is_total_row_visible = settings.total_when.is_total_row_visible(inputs.len()); for input in inputs { let word_count = match word_count_from_input(input, settings) { CountResult::Success(word_count) => word_count, - CountResult::Interrupted(word_count, error) => { - show!(USimpleError::new( - 1, - format!("{}: {}", input.path_display(), error) - )); + CountResult::Interrupted(word_count, err) => { + show!(err.map_err_context(|| input.path_display())); word_count } - CountResult::Failure(error) => { - show!(USimpleError::new( - 1, - format!("{}: {}", input.path_display(), error) - )); + CountResult::Failure(err) => { + show!(err.map_err_context(|| input.path_display())); continue; } }; total_word_count += word_count; - let result = word_count.with_title(input.to_title()); - if are_stats_visible { - if let Err(err) = print_stats(settings, &result, number_width) { - show!(USimpleError::new( - 1, - format!( - "failed to print result for {}: {}", - &result.title.unwrap_or_else(|| String::from("")), - err, - ), - )); + let maybe_title = input.to_title(); + let maybe_title_str = maybe_title.as_deref(); + if let Err(err) = print_stats(settings, &word_count, maybe_title_str, number_width) { + let title = maybe_title_str.unwrap_or(""); + show!(err.map_err_context(|| format!("failed to print result for {title}"))); } } } if is_total_row_visible { - let total_result = total_word_count.with_title(total_row_title); - if let Err(err) = print_stats(settings, &total_result, number_width) { - show!(USimpleError::new( - 1, - format!("failed to print total: {err}") - )); + let title = are_stats_visible.then_some("total"); + if let Err(err) = print_stats(settings, &total_word_count, title, number_width) { + show!(err.map_err_context(|| "failed to print total".into())); } } - // Although this appears to be returning `Ok`, the exit code may - // have been set to a non-zero value by a call to `show!()` above. + // Although this appears to be returning `Ok`, the exit code may have been set to a non-zero + // value by a call to `record_error!()` above. Ok(()) } fn print_stats( settings: &Settings, - result: &TitledWordCount, + result: &WordCount, + title: Option<&str>, number_width: usize, ) -> io::Result<()> { - let mut columns = Vec::new(); - - if settings.show_lines { - columns.push(format!("{:1$}", result.count.lines, number_width)); - } - if settings.show_words { - columns.push(format!("{:1$}", result.count.words, number_width)); - } - if settings.show_chars { - columns.push(format!("{:1$}", result.count.chars, number_width)); - } - if settings.show_bytes { - columns.push(format!("{:1$}", result.count.bytes, number_width)); - } - if settings.show_max_line_length { - columns.push(format!("{:1$}", result.count.max_line_length, number_width)); - } - if let Some(title) = &result.title { - columns.push(title.clone()); + let mut stdout = io::stdout().lock(); + + let maybe_cols = [ + (settings.show_lines, result.lines), + (settings.show_words, result.words), + (settings.show_chars, result.chars), + (settings.show_bytes, result.bytes), + (settings.show_max_line_length, result.max_line_length), + ]; + + let mut space = ""; + for (_, num) in maybe_cols.iter().filter(|(show, _)| *show) { + write!(stdout, "{space}{num:number_width$}")?; + space = " "; } - writeln!(io::stdout().lock(), "{}", columns.join(" ")) + if let Some(title) = title { + writeln!(stdout, "{space}{title}") + } else { + writeln!(stdout) + } } diff --git a/src/uu/wc/src/word_count.rs b/src/uu/wc/src/word_count.rs index b2dd7045024..cf839175f96 100644 --- a/src/uu/wc/src/word_count.rs +++ b/src/uu/wc/src/word_count.rs @@ -29,19 +29,3 @@ impl AddAssign for WordCount { *self = *self + other; } } - -impl WordCount { - pub fn with_title(self, title: Option) -> TitledWordCount { - TitledWordCount { title, count: self } - } -} - -/// This struct supplements the actual word count with an optional title that is -/// displayed to the user at the end of the program. -/// The reason we don't simply include title in the `WordCount` struct is that -/// it would result in unnecessary copying of `String`. -#[derive(Debug, Default, Clone)] -pub struct TitledWordCount { - pub title: Option, - pub count: WordCount, -} diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 64e3b70da83..1ce27f18d8b 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -375,7 +375,7 @@ fn test_read_from_directory_error() { #[cfg(not(windows))] const STDERR: &str = ".: Is a directory"; #[cfg(windows)] - const STDERR: &str = ".: Access is denied"; + const STDERR: &str = ".: Permission denied"; #[cfg(not(windows))] const STDOUT: &str = " 0 0 0 .\n"; @@ -392,10 +392,7 @@ fn test_read_from_directory_error() { /// Test that getting counts from nonexistent file is an error. #[test] fn test_read_from_nonexistent_file() { - #[cfg(not(windows))] const MSG: &str = "bogusfile: No such file or directory"; - #[cfg(windows)] - const MSG: &str = "bogusfile: The system cannot find the file specified"; new_ucmd!() .args(&["bogusfile"]) .fails() From 36b45e2249dcd6e9ec9d60dc7131147a888c6e57 Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Fri, 31 Mar 2023 22:58:49 -0700 Subject: [PATCH 3/6] wc: touch up errors, quote more like GNU WcError should not hold Strings which are clones of `&'static str`s. thiserror provides a convenient API to declare errors with their Display messages. wc quotes the --files0-from source slightly differently when reporting errors about empty file names. --- Cargo.lock | 1 + src/uu/wc/Cargo.toml | 1 + src/uu/wc/src/wc.rs | 160 ++++++++++++++++++++++++------------------- 3 files changed, 93 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a9f9532b65a..c219fa3bccf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3352,6 +3352,7 @@ dependencies = [ "clap", "libc", "nix", + "thiserror", "unicode-width", "uucore", ] diff --git a/src/uu/wc/Cargo.toml b/src/uu/wc/Cargo.toml index 1b1bd41d310..2b42affb58e 100644 --- a/src/uu/wc/Cargo.toml +++ b/src/uu/wc/Cargo.toml @@ -18,6 +18,7 @@ path = "src/wc.rs" clap = { workspace=true } uucore = { workspace=true, features=["pipes"] } bytecount = { workspace=true } +thiserror = { workspace=true } unicode-width = { workspace=true } [target.'cfg(unix)'.dependencies] diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 60f2a5800c7..00f8988683c 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -11,26 +11,33 @@ mod count_fast; mod countable; mod utf8; mod word_count; -use clap::builder::ValueParser; -use count_fast::{count_bytes_chars_and_lines_fast, count_bytes_fast}; -use countable::WordCountable; + +use std::{ + borrow::Cow, + cmp::max, + ffi::{OsStr, OsString}, + fs::{self, File}, + io::{self, Read, Write}, + path::{Path, PathBuf}, +}; + +use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgMatches, Command}; +use thiserror::Error; use unicode_width::UnicodeWidthChar; use utf8::{BufReadDecoder, BufReadDecoderError}; -use uucore::{format_usage, help_about, help_usage, show}; -use word_count::WordCount; - -use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; -use std::cmp::max; -use std::error::Error; -use std::ffi::{OsStr, OsString}; -use std::fmt::Display; -use std::fs::{self, File}; -use std::io::{self, Read, Write}; -use std::path::PathBuf; +use uucore::{ + error::{FromIo, UError, UResult}, + format_usage, help_about, help_usage, + quoting_style::{escape_name, QuotingStyle}, + show, +}; -use uucore::error::{FromIo, UError, UResult}; -use uucore::quoting_style::{escape_name, QuotingStyle}; +use crate::{ + count_fast::{count_bytes_chars_and_lines_fast, count_bytes_fast}, + countable::WordCountable, + word_count::WordCount, +}; /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; @@ -41,7 +48,7 @@ struct Settings { show_lines: bool, show_words: bool, show_max_line_length: bool, - files0_from_stdin_mode: bool, + files0_from_path: Option, total_when: TotalWhen, } @@ -54,7 +61,7 @@ impl Default for Settings { show_lines: true, show_words: true, show_max_line_length: false, - files0_from_stdin_mode: false, + files0_from_path: None, total_when: TotalWhen::default(), } } @@ -62,10 +69,9 @@ impl Default for Settings { impl Settings { fn new(matches: &ArgMatches) -> Self { - let files0_from_stdin_mode = match matches.get_one::(options::FILES0_FROM) { - Some(files_0_from) => files_0_from == STDIN_REPR, - None => false, - }; + let files0_from_path = matches + .get_one::(options::FILES0_FROM) + .map(Into::into); let total_when = matches .get_one::(options::TOTAL) @@ -78,7 +84,7 @@ impl Settings { show_lines: matches.get_flag(options::LINES), show_words: matches.get_flag(options::WORDS), show_max_line_length: matches.get_flag(options::MAX_LINE_LENGTH), - files0_from_stdin_mode, + files0_from_path, total_when, }; @@ -86,7 +92,7 @@ impl Settings { settings } else { Self { - files0_from_stdin_mode, + files0_from_path: settings.files0_from_path, total_when, ..Default::default() } @@ -119,7 +125,6 @@ mod options { pub static TOTAL: &str = "total"; pub static WORDS: &str = "words"; } - static ARG_FILES: &str = "files"; static STDIN_REPR: &str = "-"; @@ -128,12 +133,16 @@ static QS_ESCAPE: &QuotingStyle = &QuotingStyle::Shell { always_quote: false, show_control: false, }; +static QS_QUOTE_ESCAPE: &QuotingStyle = &QuotingStyle::Shell { + escape: true, + always_quote: true, + show_control: false, +}; enum StdinKind { - /// Stdin specified on command-line with "-". + /// Specified on command-line with "-" (STDIN_REPR) Explicit, - - /// Stdin implicitly specified on command-line by not passing any positional argument. + /// Implied by the lack of any arguments Implicit, } @@ -158,18 +167,22 @@ impl From<&OsStr> for Input { impl Input { /// Converts input to title that appears in stats. - fn to_title(&self) -> Option { + fn to_title(&self) -> Option> { match self { - Self::Path(path) => Some(escape_name(path.as_os_str(), QS_ESCAPE)), - Self::Stdin(StdinKind::Explicit) => Some(STDIN_REPR.into()), + Self::Path(path) => Some(match path.to_str() { + Some(s) if !s.contains('\n') => Cow::Borrowed(s), + _ => Cow::Owned(escape_name(path.as_os_str(), QS_ESCAPE)), + }), + Self::Stdin(StdinKind::Explicit) => Some(Cow::Borrowed(STDIN_REPR)), Self::Stdin(StdinKind::Implicit) => None, } } + /// Converts input into the form that appears in errors. fn path_display(&self) -> String { match self { - Self::Path(path) => escape_name(OsStr::new(&path), QS_ESCAPE), - Self::Stdin(_) => escape_name(OsStr::new("standard input"), QS_ESCAPE), + Self::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE), + Self::Stdin(_) => String::from("standard input"), } } } @@ -206,33 +219,33 @@ impl TotalWhen { } } -#[derive(Debug)] +#[derive(Debug, Error)] enum WcError { - FilesDisabled(String), - StdinReprNotAllowed(String), + #[error("file operands cannot be combined with --files0-from")] + FilesDisabled, + #[error("when reading file names from stdin, no file name of '-' allowed")] + StdinReprNotAllowed, + #[error("invalid zero-length file name")] + ZeroLengthFileName, + #[error("{path}:{idx}: invalid zero-length file name")] + ZeroLengthFileNameCtx { path: String, idx: usize }, } -impl UError for WcError { - fn code(&self) -> i32 { - match self { - Self::FilesDisabled(_) | Self::StdinReprNotAllowed(_) => 1, +impl WcError { + fn zero_len(ctx: Option<(&Path, usize)>) -> Self { + match ctx { + Some((path, idx)) => { + let path = escape_name(path.as_os_str(), QS_ESCAPE); + Self::ZeroLengthFileNameCtx { path, idx } + } + None => Self::ZeroLengthFileName, } } - - fn usage(&self) -> bool { - matches!(self, Self::FilesDisabled(_)) - } } -impl Error for WcError {} - -impl Display for WcError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::FilesDisabled(message) | Self::StdinReprNotAllowed(message) => { - write!(f, "{message}") - } - } +impl UError for WcError { + fn usage(&self) -> bool { + matches!(self, Self::FilesDisabled) } } @@ -276,6 +289,7 @@ pub fn uu_app() -> Command { " NUL-terminated names in file F;\n", " If F is - then read names from standard input" )) + .value_parser(ValueParser::os_string()) .value_hint(clap::ValueHint::FilePath), ) .arg( @@ -322,38 +336,39 @@ fn inputs(matches: &ArgMatches) -> UResult> { match matches.get_many::(ARG_FILES) { Some(os_values) => { if matches.contains_id(options::FILES0_FROM) { - return Err(WcError::FilesDisabled( - "file operands cannot be combined with --files0-from".into(), - ) - .into()); + return Err(WcError::FilesDisabled.into()); } Ok(os_values.map(|s| Input::from(s.as_os_str())).collect()) } - None => match matches.get_one::(options::FILES0_FROM) { - Some(files_0_from) => create_paths_from_files0(files_0_from), + None => match matches.get_one::(options::FILES0_FROM) { + Some(files_0_from) => create_paths_from_files0(Path::new(files_0_from)), None => Ok(vec![Input::Stdin(StdinKind::Implicit)]), }, } } -fn create_paths_from_files0(files_0_from: &str) -> UResult> { +fn create_paths_from_files0(files_0_from: &Path) -> UResult> { let mut paths = String::new(); - let read_from_stdin = files_0_from == STDIN_REPR; + let read_from_stdin = files_0_from.as_os_str() == STDIN_REPR; if read_from_stdin { io::stdin().lock().read_to_string(&mut paths)?; } else { - File::open(files_0_from)?.read_to_string(&mut paths)?; + File::open(files_0_from) + .map_err_context(|| { + format!( + "cannot open {} for reading", + escape_name(files_0_from.as_os_str(), QS_QUOTE_ESCAPE) + ) + })? + .read_to_string(&mut paths)?; } let paths: Vec<&str> = paths.split_terminator('\0').collect(); if read_from_stdin && paths.contains(&STDIN_REPR) { - return Err(WcError::StdinReprNotAllowed( - "when reading file names from stdin, no file name of '-' allowed".into(), - ) - .into()); + return Err(WcError::StdinReprNotAllowed.into()); } Ok(paths.iter().map(OsStr::new).map(Input::from).collect()) @@ -589,8 +604,10 @@ fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult { /// is close enough to pass the GNU test suite. fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize { if inputs.is_empty() - || (inputs.len() == 1 && settings.number_enabled() == 1) - || (settings.files0_from_stdin_mode && settings.number_enabled() == 1) + || settings.number_enabled() == 1 + && (inputs.len() == 1 + || settings.files0_from_path.as_ref().map(|p| p.as_os_str()) + == Some(OsStr::new(STDIN_REPR))) { return 1; } @@ -627,7 +644,12 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { }; let is_total_row_visible = settings.total_when.is_total_row_visible(inputs.len()); - for input in inputs { + for (idx, input) in inputs.iter().enumerate() { + if matches!(input, Input::Path(p) if p.as_os_str().is_empty()) { + let err = WcError::zero_len(settings.files0_from_path.as_deref().map(|s| (s, idx))); + show!(err); + continue; + } let word_count = match word_count_from_input(input, settings) { CountResult::Success(word_count) => word_count, CountResult::Interrupted(word_count, err) => { From d58ee5a28a5ffbf7da142a233104d75fe48d96ff Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Mon, 24 Apr 2023 23:41:59 -0700 Subject: [PATCH 4/6] wc: skip String to measure number length Sadly ilog10 isn't available until we use 1.67, but we can get close in the meantime. --- src/uu/wc/src/wc.rs | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 00f8988683c..2ed43e16ae9 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -5,7 +5,7 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// cSpell:ignore wc wc's +// cSpell:ignore ilog wc wc's mod count_fast; mod countable; @@ -632,7 +632,14 @@ fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize { } } - max(minimum_width, total.to_string().len()) + if total == 0 { + minimum_width + } else { + let total_width = (1 + ilog10_u64(total)) + .try_into() + .expect("ilog of a u64 should fit into a usize"); + max(total_width, minimum_width) + } } fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { @@ -712,3 +719,29 @@ fn print_stats( writeln!(stdout) } } + +// TODO: remove and just use usize::ilog10 once the MSRV is >= 1.67. +fn ilog10_u64(mut u: u64) -> u32 { + if u == 0 { + panic!("cannot compute log of 0") + } + let mut log = 0; + if u >= 10_000_000_000 { + log += 10; + u /= 10_000_000_000; + } + if u >= 100_000 { + log += 5; + u /= 100_000; + } + // Rust's standard library in versions >= 1.67 does something even more clever than this, but + // this should work just fine for the time being. + log + match u { + 1..=9 => 0, + 10..=99 => 1, + 100..=999 => 2, + 1000..=9999 => 3, + 10000..=99999 => 4, + _ => unreachable!(), + } +} From c4b53a44b5629b17fb85a0818c803a475a6071ea Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Fri, 31 Mar 2023 23:57:53 -0700 Subject: [PATCH 5/6] wc: make --files0-from work with streams --- src/uu/wc/src/countable.rs | 1 + src/uu/wc/src/wc.rs | 444 +++++++++++++++++++++++++------------ tests/by-util/test_wc.rs | 22 +- 3 files changed, 313 insertions(+), 154 deletions(-) diff --git a/src/uu/wc/src/countable.rs b/src/uu/wc/src/countable.rs index 5596decb3e4..b86b96fa2f1 100644 --- a/src/uu/wc/src/countable.rs +++ b/src/uu/wc/src/countable.rs @@ -28,6 +28,7 @@ impl WordCountable for StdinLock<'_> { self } } + impl WordCountable for File { type Buffered = BufReader; diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 2ed43e16ae9..08d00db188f 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -13,11 +13,12 @@ mod utf8; mod word_count; use std::{ - borrow::Cow, + borrow::{Borrow, Cow}, cmp::max, - ffi::{OsStr, OsString}, + ffi::OsString, fs::{self, File}, - io::{self, Read, Write}, + io::{self, Write}, + iter, path::{Path, PathBuf}, }; @@ -42,17 +43,17 @@ use crate::{ /// The minimum character width for formatting counts when reading from stdin. const MINIMUM_WIDTH: usize = 7; -struct Settings { +struct Settings<'a> { show_bytes: bool, show_chars: bool, show_lines: bool, show_words: bool, show_max_line_length: bool, - files0_from_path: Option, + files0_from: Option>, total_when: TotalWhen, } -impl Default for Settings { +impl Default for Settings<'_> { fn default() -> Self { // Defaults if none of -c, -m, -l, -w, nor -L are specified. Self { @@ -61,15 +62,15 @@ impl Default for Settings { show_lines: true, show_words: true, show_max_line_length: false, - files0_from_path: None, + files0_from: None, total_when: TotalWhen::default(), } } } -impl Settings { - fn new(matches: &ArgMatches) -> Self { - let files0_from_path = matches +impl<'a> Settings<'a> { + fn new(matches: &'a ArgMatches) -> Self { + let files0_from = matches .get_one::(options::FILES0_FROM) .map(Into::into); @@ -84,7 +85,7 @@ impl Settings { show_lines: matches.get_flag(options::LINES), show_words: matches.get_flag(options::WORDS), show_max_line_length: matches.get_flag(options::MAX_LINE_LENGTH), - files0_from_path, + files0_from, total_when, }; @@ -92,7 +93,7 @@ impl Settings { settings } else { Self { - files0_from_path: settings.files0_from_path, + files0_from: settings.files0_from, total_when, ..Default::default() } @@ -139,6 +140,74 @@ static QS_QUOTE_ESCAPE: &QuotingStyle = &QuotingStyle::Shell { show_control: false, }; +/// Supported inputs. +#[derive(Debug)] +enum Inputs<'a> { + /// Default Standard input, i.e. no arguments. + Stdin, + /// Files; "-" means stdin, possibly multiple times! + Paths(Vec>), + /// --files0-from; "-" means stdin. + Files0From(Input<'a>), +} + +impl<'a> Inputs<'a> { + fn new(matches: &'a ArgMatches) -> UResult { + let arg_files = matches.get_many::(ARG_FILES); + let files0_from = matches.get_one::(options::FILES0_FROM); + + match (arg_files, files0_from) { + (None, None) => Ok(Self::Stdin), + (Some(files), None) => Ok(Self::Paths(files.map(Into::into).collect())), + (None, Some(path)) => { + // If path is a file, and the file isn't too large, we'll load it ahead + // of time. Every path within the file will have its length checked to + // hopefully better align the output columns. + let input = Input::from(path); + match input.try_as_files0()? { + Some(paths) => Ok(Self::Paths(paths)), + None => Ok(Self::Files0From(input)), + } + } + (Some(_), Some(_)) => Err(WcError::FilesDisabled.into()), + } + } + + // Creates an iterator which yields values borrowed from the command line arguments. + // Returns an error if the file specified in --files0-from cannot be opened. + fn try_iter( + &'a self, + settings: &'a Settings<'a>, + ) -> UResult>> { + let base: Box> = match self { + Self::Stdin => Box::new(iter::once(Ok(Input::Stdin(StdinKind::Implicit)))), + Self::Paths(inputs) => Box::new(inputs.iter().map(|i| Ok(i.as_borrowed()))), + Self::Files0From(input) => match input { + Input::Path(path) => Box::new(files0_iter_file(path)?), + Input::Stdin(_) => Box::new(files0_iter_stdin()), + }, + }; + + // The index of each yielded item must be tracked for error reporting. + let mut with_idx = base.enumerate(); + let files0_from_path = settings.files0_from.as_ref().map(|i| i.as_borrowed()); + + let iter = iter::from_fn(move || { + let (idx, next) = with_idx.next()?; + match next { + // filter zero length file names... + Ok(Input::Path(p)) if p.as_os_str().is_empty() => Some(Err({ + let maybe_ctx = files0_from_path.as_ref().map(|p| (p, idx)); + WcError::zero_len(maybe_ctx).into() + })), + _ => Some(next), + } + }); + Ok(iter) + } +} + +#[derive(Clone, Copy, Debug)] enum StdinKind { /// Specified on command-line with "-" (STDIN_REPR) Explicit, @@ -146,26 +215,44 @@ enum StdinKind { Implicit, } -/// Supported inputs. -enum Input { - /// A regular file. - Path(PathBuf), - - /// Standard input. +/// Represents a single input, either to be counted or processed for other files names via +/// --files0-from. +#[derive(Debug)] +enum Input<'a> { + Path(Cow<'a, Path>), Stdin(StdinKind), } -impl From<&OsStr> for Input { - fn from(input: &OsStr) -> Self { - if input == STDIN_REPR { +impl From for Input<'_> { + fn from(p: PathBuf) -> Self { + if p.as_os_str() == STDIN_REPR { + Self::Stdin(StdinKind::Explicit) + } else { + Self::Path(Cow::Owned(p)) + } + } +} + +impl<'a, T: AsRef + ?Sized> From<&'a T> for Input<'a> { + fn from(p: &'a T) -> Self { + let p = p.as_ref(); + if p.as_os_str() == STDIN_REPR { Self::Stdin(StdinKind::Explicit) } else { - Self::Path(input.into()) + Self::Path(Cow::Borrowed(p)) } } } -impl Input { +impl<'a> Input<'a> { + /// Translates Path(Cow::Owned(_)) to Path(Cow::Borrowed(_)). + fn as_borrowed(&'a self) -> Self { + match self { + Self::Path(p) => Self::Path(Cow::Borrowed(p.borrow())), + Self::Stdin(k) => Self::Stdin(*k), + } + } + /// Converts input to title that appears in stats. fn to_title(&self) -> Option> { match self { @@ -185,6 +272,56 @@ impl Input { Self::Stdin(_) => String::from("standard input"), } } + + /// When given --files0-from, we may be given a path or stdin. Either may be a stream or + /// a regular file. If given a file less than 10 MiB, it will be consumed and turned into + /// a Vec of Input::Paths which can be scanned to determine the widths of the columns that + /// will ultimately be printed. + fn try_as_files0(&self) -> UResult>>> { + match self { + Self::Path(path) => match fs::metadata(path) { + Ok(meta) if meta.is_file() && meta.len() <= (10 << 20) => Ok(Some( + files0_iter_file(path)?.collect::, _>>()?, + )), + _ => Ok(None), + }, + Self::Stdin(_) if is_stdin_small_file() => { + Ok(Some(files0_iter_stdin().collect::, _>>()?)) + } + Self::Stdin(_) => Ok(None), + } + } +} + +#[cfg(unix)] +fn is_stdin_small_file() -> bool { + use std::os::unix::io::{AsRawFd, FromRawFd}; + // Safety: we'll rely on Rust to give us a valid RawFd for stdin with which we can attempt to + // open a File, but only for the sake of fetching .metadata(). ManuallyDrop will ensure we + // don't do anything else to the FD if anything unexpected happens. + let f = std::mem::ManuallyDrop::new(unsafe { File::from_raw_fd(io::stdin().as_raw_fd()) }); + matches!(f.metadata(), Ok(meta) if meta.is_file() && meta.len() <= (10 << 20)) +} + +#[cfg(windows)] +fn is_stdin_small_file() -> bool { + use std::os::windows::io::{AsRawHandle, FromRawHandle}; + // Safety: we'll rely on Rust to give us a valid RawHandle for stdin with which we can attempt + // to open a File, but only for the sake of fetching .metadata(). ManuallyDrop will ensure we + // don't do anything else to the FD if anything unexpected happens. + let f = std::mem::ManuallyDrop::new(unsafe { + let h = io::stdin().as_raw_handle(); + if h.is_null() { + return false; + } + File::from_raw_handle(h) + }); + matches!(f.metadata(), Ok(meta) if meta.is_file() && meta.len() <= (10 << 20)) +} + +#[cfg(not(any(unix, windows)))] +fn is_stdin_small_file() -> bool { + false } /// When to show the "total" line @@ -228,14 +365,17 @@ enum WcError { #[error("invalid zero-length file name")] ZeroLengthFileName, #[error("{path}:{idx}: invalid zero-length file name")] - ZeroLengthFileNameCtx { path: String, idx: usize }, + ZeroLengthFileNameCtx { path: Cow<'static, str>, idx: usize }, } impl WcError { - fn zero_len(ctx: Option<(&Path, usize)>) -> Self { + fn zero_len(ctx: Option<(&Input, usize)>) -> Self { match ctx { - Some((path, idx)) => { - let path = escape_name(path.as_os_str(), QS_ESCAPE); + Some((input, idx)) => { + let path = match input { + Input::Stdin(_) => STDIN_REPR.into(), + Input::Path(path) => escape_name(path.as_os_str(), QS_ESCAPE).into(), + }; Self::ZeroLengthFileNameCtx { path, idx } } None => Self::ZeroLengthFileName, @@ -253,9 +393,8 @@ impl UError for WcError { pub fn uumain(args: impl uucore::Args) -> UResult<()> { let matches = uu_app().try_get_matches_from(args)?; - let inputs = inputs(&matches)?; - let settings = Settings::new(&matches); + let inputs = Inputs::new(&matches)?; wc(&inputs, &settings) } @@ -332,48 +471,6 @@ pub fn uu_app() -> Command { ) } -fn inputs(matches: &ArgMatches) -> UResult> { - match matches.get_many::(ARG_FILES) { - Some(os_values) => { - if matches.contains_id(options::FILES0_FROM) { - return Err(WcError::FilesDisabled.into()); - } - - Ok(os_values.map(|s| Input::from(s.as_os_str())).collect()) - } - None => match matches.get_one::(options::FILES0_FROM) { - Some(files_0_from) => create_paths_from_files0(Path::new(files_0_from)), - None => Ok(vec![Input::Stdin(StdinKind::Implicit)]), - }, - } -} - -fn create_paths_from_files0(files_0_from: &Path) -> UResult> { - let mut paths = String::new(); - let read_from_stdin = files_0_from.as_os_str() == STDIN_REPR; - - if read_from_stdin { - io::stdin().lock().read_to_string(&mut paths)?; - } else { - File::open(files_0_from) - .map_err_context(|| { - format!( - "cannot open {} for reading", - escape_name(files_0_from.as_os_str(), QS_QUOTE_ESCAPE) - ) - })? - .read_to_string(&mut paths)?; - } - - let paths: Vec<&str> = paths.split_terminator('\0').collect(); - - if read_from_stdin && paths.contains(&STDIN_REPR) { - return Err(WcError::StdinReprNotAllowed.into()); - } - - Ok(paths.iter().map(OsStr::new).map(Input::from).collect()) -} - fn word_count_from_reader( mut reader: T, settings: &Settings, @@ -555,109 +652,166 @@ enum CountResult { Failure(io::Error), } -/// If we fail opening a file we only show the error. If we fail reading it -/// we show a count for what we managed to read. +/// If we fail opening a file, we only show the error. If we fail reading the +/// file, we show a count for what we managed to read. /// -/// Therefore the reading implementations always return a total and sometimes +/// Therefore, the reading implementations always return a total and sometimes /// return an error: (WordCount, Option). -fn word_count_from_input(input: &Input, settings: &Settings) -> CountResult { - match input { - Input::Stdin(_) => { - let stdin = io::stdin(); - let stdin_lock = stdin.lock(); - let count = word_count_from_reader(stdin_lock, settings); - match count { - (total, Some(error)) => CountResult::Interrupted(total, error), - (total, None) => CountResult::Success(total), - } - } +fn word_count_from_input(input: &Input<'_>, settings: &Settings) -> CountResult { + let (total, maybe_err) = match input { + Input::Stdin(_) => word_count_from_reader(io::stdin().lock(), settings), Input::Path(path) => match File::open(path) { - Err(error) => CountResult::Failure(error), - Ok(file) => match word_count_from_reader(file, settings) { - (total, Some(error)) => CountResult::Interrupted(total, error), - (total, None) => CountResult::Success(total), - }, + Ok(f) => word_count_from_reader(f, settings), + Err(err) => return CountResult::Failure(err), }, + }; + match maybe_err { + None => CountResult::Success(total), + Some(err) => CountResult::Interrupted(total, err), } } /// Compute the number of digits needed to represent all counts in all inputs. /// -/// `inputs` may include zero or more [`Input::Stdin`] entries, each of -/// which represents reading from `stdin`. The presence of any such -/// entry causes this function to return a width that is at least -/// [`MINIMUM_WIDTH`]. +/// For [`Inputs::Stdin`], [`MINIMUM_WIDTH`] is returned, unless there is only one counter number +/// to be printed, in which case 1 is returned. /// -/// If `input` is empty, or if only one number needs to be printed (for just -/// one file) then this function is optimized to return 1 without making any -/// calls to get file metadata. +/// For [`Inputs::Files0From`], [`MINIMUM_WIDTH`] is returned. /// -/// If file metadata could not be read from any of the [`Input::Path`] input, -/// that input does not affect number width computation +/// An [`Inputs::Paths`] may include zero or more "-" entries, each of which represents reading +/// from `stdin`. The presence of any such entry causes this function to return a width that is at +/// least [`MINIMUM_WIDTH`]. /// -/// Otherwise, the file sizes in the file metadata are summed and the number of -/// digits in that total size is returned as the number width +/// If an [`Inputs::Paths`] contains only one path and only one number needs to be printed then +/// this function is optimized to return 1 without making any calls to get file metadata. /// -/// To mirror GNU wc's behavior a special case is added. If --files0-from is -/// used and input is read from stdin and there is only one calculation enabled -/// columns will not be aligned. This is not exactly GNU wc's behavior, but it -/// is close enough to pass the GNU test suite. -fn compute_number_width(inputs: &[Input], settings: &Settings) -> usize { - if inputs.is_empty() - || settings.number_enabled() == 1 - && (inputs.len() == 1 - || settings.files0_from_path.as_ref().map(|p| p.as_os_str()) - == Some(OsStr::new(STDIN_REPR))) - { - return 1; - } - - let mut minimum_width = 1; - let mut total = 0; - - for input in inputs { - match input { - Input::Stdin(_) => { - minimum_width = MINIMUM_WIDTH; +/// If file metadata could not be read from any of the [`Input::Path`] input, that input does not +/// affect number width computation. Otherwise, the file sizes from the files' metadata are summed +/// and the number of digits in that total size is returned. +fn compute_number_width(inputs: &Inputs, settings: &Settings) -> usize { + match inputs { + Inputs::Stdin if settings.number_enabled() == 1 => 1, + Inputs::Stdin => MINIMUM_WIDTH, + Inputs::Files0From(_) => 1, + Inputs::Paths(inputs) => { + if settings.number_enabled() == 1 && inputs.len() == 1 { + return 1; } - Input::Path(path) => { - if let Ok(meta) = fs::metadata(path) { - if meta.is_file() { - total += meta.len(); - } else { - minimum_width = MINIMUM_WIDTH; + + let mut minimum_width = 1; + let mut total: u64 = 0; + for input in inputs.iter() { + match input { + Input::Stdin(_) => minimum_width = MINIMUM_WIDTH, + Input::Path(path) => { + if let Ok(meta) = fs::metadata(path) { + if meta.is_file() { + total += meta.len(); + } else { + minimum_width = MINIMUM_WIDTH; + } + } } } } + + if total == 0 { + minimum_width + } else { + let total_width = (1 + ilog10_u64(total)) + .try_into() + .expect("ilog of a u64 should fit into a usize"); + max(total_width, minimum_width) + } } } +} - if total == 0 { - minimum_width - } else { - let total_width = (1 + ilog10_u64(total)) - .try_into() - .expect("ilog of a u64 should fit into a usize"); - max(total_width, minimum_width) +type InputIterItem<'a> = Result, Box>; + +/// To be used with `--files0-from=-`, this applies a filter on the results of files0_iter to +/// translate '-' into the appropriate error. +fn files0_iter_stdin<'a>() -> impl Iterator> { + files0_iter(io::stdin().lock(), STDIN_REPR.into()).map(|i| match i { + Ok(Input::Stdin(_)) => Err(WcError::StdinReprNotAllowed.into()), + _ => i, + }) +} + +fn files0_iter_file<'a>(path: &Path) -> UResult>> { + match File::open(path) { + Ok(f) => Ok(files0_iter(f, path.into())), + Err(e) => Err(e.map_err_context(|| { + format!( + "cannot open {} for reading", + escape_name(path.as_os_str(), QS_QUOTE_ESCAPE) + ) + })), } } -fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { +fn files0_iter<'a>( + r: impl io::Read + 'static, + err_path: OsString, +) -> impl Iterator> { + use std::io::BufRead; + let mut i = Some( + io::BufReader::new(r) + .split(b'\0') + .map(move |res| match res { + Ok(p) if p == STDIN_REPR.as_bytes() => Ok(Input::Stdin(StdinKind::Explicit)), + Ok(p) => { + // On Unix systems, OsStrings are just strings of bytes, not necessarily UTF-8. + #[cfg(unix)] + { + use std::os::unix::ffi::OsStringExt; + Ok(Input::Path(PathBuf::from(OsString::from_vec(p)).into())) + } + + // ...Windows does not, we must go through Strings. + #[cfg(not(unix))] + { + let s = String::from_utf8(p) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + Ok(Input::Path(PathBuf::from(s).into())) + } + } + Err(e) => Err(e.map_err_context(|| { + format!("{}: read error", escape_name(&err_path, QS_ESCAPE)) + }) as Box), + }), + ); + // Loop until there is an error; yield that error and then nothing else. + std::iter::from_fn(move || { + let next = i.as_mut().and_then(Iterator::next); + if matches!(next, Some(Err(_)) | None) { + i = None; + } + next + }) +} + +fn wc(inputs: &Inputs, settings: &Settings) -> UResult<()> { let mut total_word_count = WordCount::default(); + let mut num_inputs: usize = 0; let (number_width, are_stats_visible) = match settings.total_when { TotalWhen::Only => (1, false), _ => (compute_number_width(inputs, settings), true), }; - let is_total_row_visible = settings.total_when.is_total_row_visible(inputs.len()); - for (idx, input) in inputs.iter().enumerate() { - if matches!(input, Input::Path(p) if p.as_os_str().is_empty()) { - let err = WcError::zero_len(settings.files0_from_path.as_deref().map(|s| (s, idx))); - show!(err); - continue; - } - let word_count = match word_count_from_input(input, settings) { + for maybe_input in inputs.try_iter(settings)? { + num_inputs += 1; + + let input = match maybe_input { + Ok(input) => input, + Err(err) => { + show!(err); + continue; + } + }; + + let word_count = match word_count_from_input(&input, settings) { CountResult::Success(word_count) => word_count, CountResult::Interrupted(word_count, err) => { show!(err.map_err_context(|| input.path_display())); @@ -679,7 +833,7 @@ fn wc(inputs: &[Input], settings: &Settings) -> UResult<()> { } } - if is_total_row_visible { + if settings.total_when.is_total_row_visible(num_inputs) { let title = are_stats_visible.then_some("total"); if let Err(err) = print_stats(settings, &total_word_count, title, number_width) { show!(err.map_err_context(|| "failed to print total".into())); diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 1ce27f18d8b..01c36b7c0d9 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -424,10 +424,12 @@ fn test_files0_from() { new_ucmd!() .args(&["--files0-from=files0_list.txt"]) .run() - .stdout_is( - " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ - alice_in_wonderland.txt\n 36 370 2189 total\n", - ); + .stdout_is(concat!( + " 13 109 772 lorem_ipsum.txt\n", + " 18 204 1115 moby_dick.txt\n", + " 5 57 302 alice_in_wonderland.txt\n", + " 36 370 2189 total\n", + )); } #[test] @@ -436,7 +438,7 @@ fn test_files0_from_with_stdin() { .args(&["--files0-from=-"]) .pipe_in("lorem_ipsum.txt") .run() - .stdout_is(" 13 109 772 lorem_ipsum.txt\n"); + .stdout_is("13 109 772 lorem_ipsum.txt\n"); } #[test] @@ -445,10 +447,12 @@ fn test_files0_from_with_stdin_in_file() { .args(&["--files0-from=files0_list_with_stdin.txt"]) .pipe_in_fixture("alice_in_wonderland.txt") .run() - .stdout_is( - " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ - -\n 36 370 2189 total\n", - ); + .stdout_is(concat!( + " 13 109 772 lorem_ipsum.txt\n", + " 18 204 1115 moby_dick.txt\n", + " 5 57 302 -\n", + " 36 370 2189 total\n", + )); } #[test] From e5b46ea3eb2a5bd5c17b8dd0ef2af476bd8f5842 Mon Sep 17 00:00:00 2001 From: Jed Denlea Date: Sat, 29 Apr 2023 19:47:49 -0700 Subject: [PATCH 6/6] wc: more tests and fixes My previous commits meant to bring our wc's output and behavior in line with GNU's. There should be tests that check for these changes! I found a stupid bug in my own changes, I was not adding 1 to the indexes produced by .enumerate() when printing errors. --- src/uu/wc/src/wc.rs | 26 +-- tests/by-util/test_wc.rs | 203 ++++++++++++++++-- tests/fixtures/wc/alice in wonderland.txt | 5 + tests/fixtures/wc/dir with spaces/.keep | 0 tests/fixtures/wc/files0 with nonexistent.txt | Bin 0 -> 94 bytes 5 files changed, 199 insertions(+), 35 deletions(-) create mode 100644 tests/fixtures/wc/alice in wonderland.txt create mode 100644 tests/fixtures/wc/dir with spaces/.keep create mode 100644 tests/fixtures/wc/files0 with nonexistent.txt diff --git a/src/uu/wc/src/wc.rs b/src/uu/wc/src/wc.rs index 08d00db188f..9fb8ca7a637 100644 --- a/src/uu/wc/src/wc.rs +++ b/src/uu/wc/src/wc.rs @@ -188,9 +188,9 @@ impl<'a> Inputs<'a> { }, }; - // The index of each yielded item must be tracked for error reporting. - let mut with_idx = base.enumerate(); - let files0_from_path = settings.files0_from.as_ref().map(|i| i.as_borrowed()); + // The 1-based index of each yielded item must be tracked for error reporting. + let mut with_idx = base.enumerate().map(|(i, v)| (i + 1, v)); + let files0_from_path = settings.files0_from.as_ref().map(|p| p.as_borrowed()); let iter = iter::from_fn(move || { let (idx, next) = with_idx.next()?; @@ -303,23 +303,9 @@ fn is_stdin_small_file() -> bool { matches!(f.metadata(), Ok(meta) if meta.is_file() && meta.len() <= (10 << 20)) } -#[cfg(windows)] -fn is_stdin_small_file() -> bool { - use std::os::windows::io::{AsRawHandle, FromRawHandle}; - // Safety: we'll rely on Rust to give us a valid RawHandle for stdin with which we can attempt - // to open a File, but only for the sake of fetching .metadata(). ManuallyDrop will ensure we - // don't do anything else to the FD if anything unexpected happens. - let f = std::mem::ManuallyDrop::new(unsafe { - let h = io::stdin().as_raw_handle(); - if h.is_null() { - return false; - } - File::from_raw_handle(h) - }); - matches!(f.metadata(), Ok(meta) if meta.is_file() && meta.len() <= (10 << 20)) -} - -#[cfg(not(any(unix, windows)))] +#[cfg(not(unix))] +// Windows presents a piped stdin as a "normal file" with a length equal to however many bytes +// have been buffered at the time it's checked. To be safe, we must never assume it's a file. fn is_stdin_small_file() -> bool { false } diff --git a/tests/by-util/test_wc.rs b/tests/by-util/test_wc.rs index 01c36b7c0d9..aba5ed350a6 100644 --- a/tests/by-util/test_wc.rs +++ b/tests/by-util/test_wc.rs @@ -268,12 +268,16 @@ fn test_multiple_default() { "lorem_ipsum.txt", "moby_dick.txt", "alice_in_wonderland.txt", + "alice in wonderland.txt", ]) .run() - .stdout_is( - " 13 109 772 lorem_ipsum.txt\n 18 204 1115 moby_dick.txt\n 5 57 302 \ - alice_in_wonderland.txt\n 36 370 2189 total\n", - ); + .stdout_is(concat!( + " 13 109 772 lorem_ipsum.txt\n", + " 18 204 1115 moby_dick.txt\n", + " 5 57 302 alice_in_wonderland.txt\n", + " 5 57 302 alice in wonderland.txt\n", + " 41 427 2491 total\n", + )); } /// Test for an empty file. @@ -352,17 +356,24 @@ fn test_file_bytes_dictate_width() { new_ucmd!() .args(&["-lwc", "alice_in_wonderland.txt", "lorem_ipsum.txt"]) .run() - .stdout_is( - " 5 57 302 alice_in_wonderland.txt\n 13 109 772 \ - lorem_ipsum.txt\n 18 166 1074 total\n", - ); + .stdout_is(concat!( + " 5 57 302 alice_in_wonderland.txt\n", + " 13 109 772 lorem_ipsum.txt\n", + " 18 166 1074 total\n", + )); // . is a directory, so minimum_width should get set to 7 #[cfg(not(windows))] - const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 \ - .\n 0 0 0 total\n"; + const STDOUT: &str = concat!( + " 0 0 0 emptyfile.txt\n", + " 0 0 0 .\n", + " 0 0 0 total\n", + ); #[cfg(windows)] - const STDOUT: &str = " 0 0 0 emptyfile.txt\n 0 0 0 total\n"; + const STDOUT: &str = concat!( + " 0 0 0 emptyfile.txt\n", + " 0 0 0 total\n", + ); new_ucmd!() .args(&["-lwc", "emptyfile.txt", "."]) .run() @@ -392,12 +403,10 @@ fn test_read_from_directory_error() { /// Test that getting counts from nonexistent file is an error. #[test] fn test_read_from_nonexistent_file() { - const MSG: &str = "bogusfile: No such file or directory"; new_ucmd!() .args(&["bogusfile"]) .fails() - .stderr_contains(MSG) - .stdout_is(""); + .stderr_only("wc: bogusfile: No such file or directory\n"); } #[test] @@ -421,15 +430,30 @@ fn test_files0_disabled_files_argument() { #[test] fn test_files0_from() { + // file new_ucmd!() .args(&["--files0-from=files0_list.txt"]) .run() + .success() .stdout_is(concat!( " 13 109 772 lorem_ipsum.txt\n", " 18 204 1115 moby_dick.txt\n", " 5 57 302 alice_in_wonderland.txt\n", " 36 370 2189 total\n", )); + + // stream + new_ucmd!() + .args(&["--files0-from=-"]) + .pipe_in_fixture("files0_list.txt") + .run() + .success() + .stdout_is(concat!( + "13 109 772 lorem_ipsum.txt\n", + "18 204 1115 moby_dick.txt\n", + "5 57 302 alice_in_wonderland.txt\n", + "36 370 2189 total\n", + )); } #[test] @@ -450,7 +474,7 @@ fn test_files0_from_with_stdin_in_file() { .stdout_is(concat!( " 13 109 772 lorem_ipsum.txt\n", " 18 204 1115 moby_dick.txt\n", - " 5 57 302 -\n", + " 5 57 302 -\n", // alice_in_wonderland.txt " 36 370 2189 total\n", )); } @@ -531,3 +555,152 @@ fn test_total_only() { .run() .stdout_is("31 313 1887\n"); } + +#[test] +fn test_zero_length_files() { + // A trailing zero is ignored, but otherwise empty file names are an error... + const LIST: &str = "\0moby_dick.txt\0\0alice_in_wonderland.txt\0\0lorem_ipsum.txt\0"; + + // Try with and without the last \0 + for l in [LIST.len(), LIST.len() - 1] { + new_ucmd!() + .args(&["--files0-from=-"]) + .pipe_in(&LIST[..l]) + .run() + .failure() + .stdout_is(concat!( + "18 204 1115 moby_dick.txt\n", + "5 57 302 alice_in_wonderland.txt\n", + "13 109 772 lorem_ipsum.txt\n", + "36 370 2189 total\n", + )) + .stderr_is(concat!( + "wc: -:1: invalid zero-length file name\n", + "wc: -:3: invalid zero-length file name\n", + "wc: -:5: invalid zero-length file name\n", + )); + } + + // But, just as important, a zero-length file name may still be at the end... + new_ucmd!() + .args(&["--files0-from=-"]) + .pipe_in( + LIST.as_bytes() + .iter() + .chain(b"\0") + .copied() + .collect::>(), + ) + .run() + .failure() + .stdout_is(concat!( + "18 204 1115 moby_dick.txt\n", + "5 57 302 alice_in_wonderland.txt\n", + "13 109 772 lorem_ipsum.txt\n", + "36 370 2189 total\n", + )) + .stderr_is(concat!( + "wc: -:1: invalid zero-length file name\n", + "wc: -:3: invalid zero-length file name\n", + "wc: -:5: invalid zero-length file name\n", + "wc: -:7: invalid zero-length file name\n", + )); +} + +#[test] +fn test_files0_errors_quoting() { + new_ucmd!() + .args(&["--files0-from=files0 with nonexistent.txt"]) + .run() + .failure() + .stderr_is(concat!( + "wc: this_file_does_not_exist.txt: No such file or directory\n", + "wc: 'files0 with nonexistent.txt':2: invalid zero-length file name\n", + "wc: 'this file does not exist.txt': No such file or directory\n", + "wc: \"this files doesn't exist either.txt\": No such file or directory\n", + )) + .stdout_is("0 0 0 total\n"); +} + +#[test] +fn test_files0_progressive_stream() { + use std::process::Stdio; + // You should be able to run wc and have a back-and-forth exchange with wc... + let mut child = new_ucmd!() + .args(&["--files0-from=-"]) + .set_stdin(Stdio::piped()) + .set_stdout(Stdio::piped()) + .set_stderr(Stdio::piped()) + .run_no_wait(); + + macro_rules! chk { + ($fn:ident, $exp:literal) => { + assert_eq!(child.$fn($exp.len()), $exp.as_bytes()); + }; + } + + // File in, count out... + child.write_in("moby_dick.txt\0"); + chk!(stdout_exact_bytes, "18 204 1115 moby_dick.txt\n"); + child.write_in("lorem_ipsum.txt\0"); + chk!(stdout_exact_bytes, "13 109 772 lorem_ipsum.txt\n"); + + // Introduce an error! + child.write_in("\0"); + chk!( + stderr_exact_bytes, + "wc: -:3: invalid zero-length file name\n" + ); + + // wc is quick to forgive, let's move on... + child.write_in("alice_in_wonderland.txt\0"); + chk!(stdout_exact_bytes, "5 57 302 alice_in_wonderland.txt\n"); + + // Fin. + child + .wait() + .expect("wc should finish") + .failure() + .stdout_only("36 370 2189 total\n"); +} + +#[test] +fn files0_from_dir() { + // On Unix, `read(open("."))` fails. On Windows, `open(".")` fails. Thus, the errors happen in + // different contexts. + #[cfg(not(windows))] + macro_rules! dir_err { + ($p:literal) => { + concat!("wc: ", $p, ": read error: Is a directory\n") + }; + } + #[cfg(windows)] + macro_rules! dir_err { + ($p:literal) => { + concat!("wc: cannot open ", $p, " for reading: Permission denied\n") + }; + } + + new_ucmd!() + .args(&["--files0-from=dir with spaces"]) + .fails() + .stderr_only(dir_err!("'dir with spaces'")); + + // Those contexts have different rules about quoting in errors... + #[cfg(windows)] + const DOT_ERR: &str = dir_err!("'.'"); + #[cfg(not(windows))] + const DOT_ERR: &str = dir_err!("."); + new_ucmd!() + .args(&["--files0-from=."]) + .fails() + .stderr_only(DOT_ERR); + + // That also means you cannot `< . wc --files0-from=-` on Windows. + #[cfg(not(windows))] + new_ucmd!() + .args(&["--files0-from=-"]) + .set_stdin(std::fs::File::open(".").unwrap()) + .fails() + .stderr_only(dir_err!("-")); +} diff --git a/tests/fixtures/wc/alice in wonderland.txt b/tests/fixtures/wc/alice in wonderland.txt new file mode 100644 index 00000000000..a95562a1ce6 --- /dev/null +++ b/tests/fixtures/wc/alice in wonderland.txt @@ -0,0 +1,5 @@ +Alice was beginning to get very tired of sitting by +her sister on the bank, and of having nothing to do: once or twice +she had peeped into the book her sister was reading, but it had no +pictures or conversations in it, "and what is the use of a book," +thought Alice "without pictures or conversation?" diff --git a/tests/fixtures/wc/dir with spaces/.keep b/tests/fixtures/wc/dir with spaces/.keep new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/fixtures/wc/files0 with nonexistent.txt b/tests/fixtures/wc/files0 with nonexistent.txt new file mode 100644 index 0000000000000000000000000000000000000000..00c00b705b1675852a0f3f72509aed19b4eb851c GIT binary patch literal 94 zcmXTP$SjUe%gjlQPsvX$j?c?4iBGM_EH2S2sVHG!C;`eUfMgXwvI;<11%xbIvKTCz Rrw)}=NX;zCNG$@H005vrBUS(a literal 0 HcmV?d00001