From b0b9658f2ccc8b05df68d5c1b326253be1a648be Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Fri, 8 Apr 2022 21:39:10 +0200 Subject: [PATCH 1/6] du: add support for --exclude and --exclude-from And add an option --verbose (doesn't exist in GNU) --- Cargo.lock | 14 +++ src/uu/du/Cargo.toml | 2 + src/uu/du/src/du.rs | 219 +++++++++++++++++++++++++++++++-------- tests/by-util/test_du.rs | 164 ++++++++++++++++++++++++++++- 4 files changed, 352 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a88d9c28eb4..11cd97a8678 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -898,6 +898,19 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "globset" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd" +dependencies = [ + "aho-corasick", + "bstr", + "fnv", + "log", + "regex", +] + [[package]] name = "half" version = "1.8.2" @@ -2402,6 +2415,7 @@ version = "0.0.13" dependencies = [ "chrono", "clap 3.1.6", + "globset", "uucore", "winapi 0.3.9", ] diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index c0c64f2e774..4d8df521643 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -16,6 +16,8 @@ path = "src/du.rs" [dependencies] chrono = "^0.4.11" +# For the --exclude & --exclude-from options +globset = "0.4" clap = { version = "3.1", features = ["wrap_help", "cargo"] } uucore = { version=">=0.0.11", package="uucore", path="../../uucore" } diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 0690c6299c8..371812b1775 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -11,11 +11,15 @@ extern crate uucore; use chrono::prelude::DateTime; use chrono::Local; use clap::{crate_version, Arg, ArgMatches, Command}; +use globset::{Glob, GlobSet, GlobSetBuilder}; use std::collections::HashSet; use std::env; use std::fs; +use std::fs::File; #[cfg(not(windows))] use std::fs::Metadata; +use std::io::BufRead; +use std::io::BufReader; use std::io::{ErrorKind, Result}; use std::iter; #[cfg(not(windows))] @@ -24,7 +28,6 @@ use std::os::unix::fs::MetadataExt; use std::os::windows::fs::MetadataExt; #[cfg(windows)] use std::os::windows::io::AsRawHandle; -#[cfg(windows)] use std::path::Path; use std::path::PathBuf; use std::str::FromStr; @@ -68,6 +71,9 @@ mod options { pub const ONE_FILE_SYSTEM: &str = "one-file-system"; pub const DEREFERENCE: &str = "dereference"; pub const INODES: &str = "inodes"; + pub const EXCLUDE: &str = "exclude"; + pub const EXCLUDE_FROM: &str = "exclude-from"; + pub const VERBOSE: &str = "verbose"; pub const FILE: &str = "FILE"; } @@ -80,6 +86,12 @@ Otherwise, units default to 1024 bytes (or 512 if POSIXLY_CORRECT is set). SIZE is an integer and optional unit (example: 10M is 10*1024*1024). Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers of 1000). + +PATTERN is based on the globset crate: +https://docs.rs/globset/latest/globset/#syntax +? will match only one character +* will match zero or more characters +{a,b} will match a or b "; const USAGE: &str = "\ {} [OPTION]... [FILE]... @@ -97,6 +109,7 @@ struct Options { one_file_system: bool, dereference: bool, inodes: bool, + verbose: bool, } #[derive(PartialEq, Eq, Hash, Clone, Copy)] @@ -287,6 +300,7 @@ fn du( options: &Options, depth: usize, inodes: &mut HashSet, + glob_exclude: &GlobSet, ) -> Box> { let mut stats = vec![]; let mut futures = vec![]; @@ -307,43 +321,64 @@ fn du( for f in read { match f { - Ok(entry) => match Stat::new(entry.path(), options) { - Ok(this_stat) => { - if let Some(inode) = this_stat.inode { - if inodes.contains(&inode) { + Ok(entry) => { + match Stat::new(entry.path(), options) { + Ok(this_stat) => { + let full_path = this_stat.path.clone().into_os_string(); + if !&glob_exclude.is_empty() + && (glob_exclude.is_match(full_path) + || glob_exclude + .is_match(&entry.file_name().into_string().unwrap())) + { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", &entry.file_name().quote()); + } continue; } - inodes.insert(inode); - } - if this_stat.is_dir { - if options.one_file_system { - if let (Some(this_inode), Some(my_inode)) = - (this_stat.inode, my_stat.inode) - { - if this_inode.dev_id != my_inode.dev_id { - continue; - } + + if let Some(inode) = this_stat.inode { + if inodes.contains(&inode) { + continue; } + inodes.insert(inode); } - futures.push(du(this_stat, options, depth + 1, inodes)); - } else { - my_stat.size += this_stat.size; - my_stat.blocks += this_stat.blocks; - my_stat.inodes += 1; - if options.all { - stats.push(this_stat); + if this_stat.is_dir { + if options.one_file_system { + if let (Some(this_inode), Some(my_inode)) = + (this_stat.inode, my_stat.inode) + { + if this_inode.dev_id != my_inode.dev_id { + continue; + } + } + } + futures.push(du( + this_stat, + options, + depth + 1, + inodes, + glob_exclude, + )); + } else { + my_stat.size += this_stat.size; + my_stat.blocks += this_stat.blocks; + my_stat.inodes += 1; + if options.all { + stats.push(this_stat); + } } } + Err(error) => match error.kind() { + ErrorKind::PermissionDenied => { + let description = format!("cannot access {}", entry.path().quote()); + let error_message = "Permission denied"; + show_error_custom_description!(description, "{}", error_message); + } + _ => show_error!("cannot access {}: {}", entry.path().quote(), error), + }, } - Err(error) => match error.kind() { - ErrorKind::PermissionDenied => { - let description = format!("cannot access {}", entry.path().quote()); - let error_message = "Permission denied"; - show_error_custom_description!(description, "{}", error_message); - } - _ => show_error!("cannot access {}: {}", entry.path().quote(), error), - }, - }, + } Err(error) => show_error!("{}", error), } } @@ -401,6 +436,7 @@ enum DuError { SummarizeDepthConflict(String), InvalidTimeStyleArg(String), InvalidTimeArg(String), + InvalidGlob(String), } impl Display for DuError { @@ -431,6 +467,7 @@ Try '{} --help' for more information.", 'birth' and 'creation' arguments are not supported on this platform.", s.quote() ), + DuError::InvalidGlob(s) => write!(f, "Invalid exclude syntax: {}", s), } } } @@ -443,8 +480,77 @@ impl UError for DuError { Self::InvalidMaxDepthArg(_) | Self::SummarizeDepthConflict(_) | Self::InvalidTimeStyleArg(_) - | Self::InvalidTimeArg(_) => 1, + | Self::InvalidTimeArg(_) + | Self::InvalidGlob(_) => 1, + } + } +} + +// Read a file and return each line in a vector of String +fn file_as_vec(filename: impl AsRef) -> Vec { + let file = File::open(filename).expect("no such file"); + let buf = BufReader::new(file); + + buf.lines() + .map(|l| l.expect("Could not parse line")) + .collect() +} + +// Given the --exclude-from and/or --exclude arguments, returns the globset lists +// to ignore the files +fn get_globset_ignore(matches: &ArgMatches) -> UResult { + let mut excludes_from = if matches.is_present(options::EXCLUDE_FROM) { + match matches.values_of(options::EXCLUDE_FROM) { + Some(all_files) => { + let mut exclusion = Vec::::new(); + // Read the exclude lists from all the files + // and add them into a vector of string + let files: Vec = all_files.clone().map(|v| v.to_owned()).collect(); + for f in files { + exclusion.extend(file_as_vec(&f)); + } + exclusion + } + None => Vec::::new(), + } + } else { + Vec::::new() + }; + + let mut excludes = if matches.is_present(options::EXCLUDE) { + match matches.values_of(options::EXCLUDE) { + Some(v) => { + // Read the various arguments + v.clone().map(|v| v.to_owned()).collect() + } + None => Vec::::new(), + } + } else { + Vec::::new() + }; + + // Merge the two lines + excludes.append(&mut excludes_from); + if !&excludes.is_empty() { + let mut builder = GlobSetBuilder::new(); + // Create the globset of excludes + for f in excludes { + if matches.is_present(options::VERBOSE) { + println!("adding {:?} to the exclude list ", &f); + } + let g = match Glob::new(&f) { + Ok(glob) => glob, + Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()), + }; + builder.add(g); + } + match builder.build() { + // Handle the error. Not sure when this happens + Ok(glob_set) => Ok(glob_set), + Err(err) => Err(DuError::InvalidGlob(err.to_string()).into()), } + } else { + Ok(GlobSet::empty()) } } @@ -470,6 +576,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { one_file_system: matches.is_present(options::ONE_FILE_SYSTEM), dereference: matches.is_present(options::DEREFERENCE), inodes: matches.is_present(options::INODES), + verbose: matches.is_present(options::VERBOSE), }; let files = match matches.value_of(options::FILE) { @@ -524,8 +631,19 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { "\n" }; + let exclude_glob = get_globset_ignore(&matches)?; + let mut grand_total = 0; for path_string in files { + // Skip if we don't want to ignore anything + if !&exclude_glob.is_empty() && exclude_glob.is_match(path_string) { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", path_string.quote()); + } + continue; + } + let path = PathBuf::from(&path_string); match Stat::new(path, &options) { Ok(stat) => { @@ -533,7 +651,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if let Some(inode) = stat.inode { inodes.insert(inode); } - let iter = du(stat, &options, 0, &mut inodes); + let iter = du(stat, &options, 0, &mut inodes, &exclude_glob); let (_, len) = iter.size_hint(); let len = len.unwrap(); for (index, stat) in iter.enumerate() { @@ -758,19 +876,28 @@ pub fn uu_app<'a>() -> Command<'a> { .help("exclude entries smaller than SIZE if positive, \ or entries greater than SIZE if negative") ) - // .arg( - // Arg::new("") - // .short('x') - // .long("exclude-from") - // .value_name("FILE") - // .help("exclude files that match any pattern in FILE") - // ) - // .arg( - // Arg::new("exclude") - // .long("exclude") - // .value_name("PATTERN") - // .help("exclude files that match PATTERN") - // ) + .arg( + Arg::new(options::VERBOSE) + .short('v') + .long("verbose") + .help("verbose mode (option not present in GNU/Coreutils)") + ) + .arg( + Arg::new(options::EXCLUDE) + .long(options::EXCLUDE) + .value_name("PATTERN") + .help("exclude files that match PATTERN") + .multiple_occurrences(true) + ) + .arg( + Arg::new(options::EXCLUDE_FROM) + .short('X') + .long("exclude-from") + .value_name("FILE") + .help("exclude files that match any pattern in FILE") + .multiple_occurrences(true) + + ) .arg( Arg::new(options::TIME) .long(options::TIME) diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index b0506d07124..474ebbc453a 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -3,7 +3,9 @@ // * For the full copyright and license information, please view the LICENSE // * file that was distributed with this source code. -// spell-checker:ignore (paths) sublink subwords +// spell-checker:ignore (paths) sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty + +use std::io::Write; use crate::common::util::*; @@ -587,3 +589,163 @@ fn test_du_bytes() { ))] result.stdout_contains("21529\t./subdir\n"); } + +#[test] +fn test_du_exclude() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.symlink_dir(SUB_DEEPER_DIR, SUB_DIR_LINKS_DEEPER_SYM_DIR); + at.mkdir_all(SUB_DIR_LINKS); + + ts.ucmd() + .arg("--exclude=subdir") + .arg(SUB_DEEPER_DIR) + .succeeds() + .stdout_contains("subdir/deeper/deeper_dir"); + ts.ucmd() + .arg("--exclude=subdir") + .arg("subdir") + .succeeds() + .stdout_is(""); + ts.ucmd() + .arg("--exclude=subdir") + .arg("--verbose") + .arg("subdir") + .succeeds() + .stdout_contains("'subdir' ignored"); +} + +#[test] +fn test_du_exclude_2() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("azerty/xcwww/azeaze"); + + ts.ucmd() + .arg("azerty") + .succeeds() + .stdout_is("4\tazerty/xcwww/azeaze\n8\tazerty/xcwww\n12\tazerty\n"); + // Exact match + ts.ucmd() + .arg("--exclude=azeaze") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); + // Partial match and NOT a glob + ts.ucmd() + .arg("--exclude=azeaz") + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + // Partial match and a various glob + ts.ucmd() + .arg("--exclude=azea?") + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azea{z,b}") + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azea*") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azeaz?") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); +} + +#[test] +fn test_du_exclude_mix() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + let mut file1 = at.make_file("file-ignore1"); + file1.write_all(b"azeaze").unwrap(); + let mut file2 = at.make_file("file-ignore2"); + file2.write_all(b"amaz?ng").unwrap(); + + at.mkdir_all("azerty/xcwww/azeaze"); + at.mkdir_all("azerty/xcwww/qzerty"); + at.mkdir_all("azerty/xcwww/amazing"); + + ts.ucmd() + .arg("azerty") + .succeeds() + .stdout_contains("azerty/xcwww/azeaze"); + ts.ucmd() + .arg("--exclude=azeaze") + .arg("azerty") + .succeeds() + .stdout_does_not_contain("azerty/xcwww/azeaze"); + + // Just exclude one file name + let result = ts.ucmd().arg("--exclude=qzerty").arg("azerty").succeeds(); + assert!(!result.stdout_str().contains("qzerty")); + assert!(result.stdout_str().contains("azerty")); + assert!(result.stdout_str().contains("xcwww")); + + // Exclude from file + let result = ts + .ucmd() + .arg("--exclude-from=file-ignore1") + .arg("azerty") + .succeeds(); + assert!(!result.stdout_str().contains("azeaze")); + assert!(result.stdout_str().contains("qzerty")); + assert!(result.stdout_str().contains("xcwww")); + + // Mix two files and string + let result = ts + .ucmd() + .arg("--exclude=qzerty") + .arg("--exclude-from=file-ignore1") + .arg("--exclude-from=file-ignore2") + .arg("azerty") + .succeeds(); + assert!(!result.stdout_str().contains("amazing")); + assert!(!result.stdout_str().contains("qzerty")); + assert!(!result.stdout_str().contains("azeaze")); + assert!(result.stdout_str().contains("xcwww")); +} + +#[test] +fn test_du_exclude_several_components() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("a/b/c"); + at.mkdir_all("a/x/y"); + at.mkdir_all("a/u/y"); + + // Exact match + let result = ts + .ucmd() + .arg("--exclude=a/u") + .arg("--exclude=a/b") + .arg("a") + .succeeds(); + assert!(!result.stdout_str().contains("a/u")); + assert!(!result.stdout_str().contains("a/b")); +} + +#[test] +fn test_du_exclude_invalid_syntax() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("azerty/xcwww/azeaze"); + + ts.ucmd() + .arg("--exclude=a[ze") + .arg("azerty") + .fails() + .stderr_contains("du: Invalid exclude syntax"); +} From 27697b62b7957982a381c77950acc14f69268259 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sat, 9 Apr 2022 23:32:15 +0200 Subject: [PATCH 2/6] du: make the tests/du/no-x.sh pass --- src/uu/du/src/du.rs | 4 +++- tests/by-util/test_du.rs | 17 ++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 371812b1775..d1f91dc365d 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -34,7 +34,7 @@ use std::str::FromStr; use std::time::{Duration, UNIX_EPOCH}; use std::{error::Error, fmt::Display}; use uucore::display::{print_verbatim, Quotable}; -use uucore::error::{UError, UResult}; +use uucore::error::{set_exit_code, UError, UResult}; use uucore::format_usage; use uucore::parse_size::{parse_size, ParseSizeError}; use uucore::InvalidEncodingHandling; @@ -315,6 +315,7 @@ fn du( my_stat.path.quote(), e ); + set_exit_code(1); return Box::new(iter::once(my_stat)); } }; @@ -374,6 +375,7 @@ fn du( let description = format!("cannot access {}", entry.path().quote()); let error_message = "Permission denied"; show_error_custom_description!(description, "{}", error_message); + set_exit_code(1); } _ => show_error!("cannot access {}: {}", entry.path().quote(), error), }, diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 474ebbc453a..837fc0fb82f 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -431,7 +431,7 @@ fn test_du_no_permission() { ts.ccmd("chmod").arg("-r").arg(SUB_DIR_LINKS).succeeds(); - let result = ts.ucmd().arg(SUB_DIR_LINKS).run(); // TODO: replace with ".fails()" once `du` is fixed + let result = ts.ucmd().arg(SUB_DIR_LINKS).fails(); result.stderr_contains( "du: cannot read directory 'subdir/links': Permission denied (os error 13)", ); @@ -451,6 +451,21 @@ fn test_du_no_permission() { _du_no_permission(result.stdout_str()); } +#[cfg(not(target_os = "windows"))] +#[cfg(feature = "chmod")] +#[test] +fn test_du_no_exec_permission() { + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + + at.mkdir_all("d/no-x/y"); + + ts.ccmd("chmod").arg("u=rw").arg("d/no-x").succeeds(); + + let result = ts.ucmd().arg("d/no-x").fails(); + result.stderr_contains("du: cannot access 'd/no-x/y': Permission denied"); +} + #[cfg(target_vendor = "apple")] fn _du_no_permission(s: &str) { assert_eq!(s, "0\tsubdir/links\n"); From cbb8db6d59481a45cb3915468a8e9f24e6d58d7b Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Sun, 10 Apr 2022 23:40:58 +0200 Subject: [PATCH 3/6] rustfmt the recent change --- src/uu/du/src/du.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 13d06fa7c2c..731fe30be68 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -377,12 +377,13 @@ fn du( show_error_custom_description!(description, "{}", error_message); set_exit_code(1); } - _ => { - set_exit_code(1); - show_error!("cannot access {}: {}", entry.path().quote(), error); - } }, + _ => { + set_exit_code(1); + show_error!("cannot access {}: {}", entry.path().quote(), error); + } + }, } - }, + } Err(error) => show_error!("{}", error), } } From f72c86d6594da13745568033ce7c5247dd4d38f3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 11 Apr 2022 10:55:41 +0200 Subject: [PATCH 4/6] Move to glob instead of globset --- Cargo.lock | 15 +------- src/uu/du/Cargo.toml | 2 +- src/uu/du/src/du.rs | 82 ++++++++++++++++++++-------------------- tests/by-util/test_du.rs | 20 +++++++--- 4 files changed, 58 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b595e38305..5b78da6512c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -898,19 +898,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" -[[package]] -name = "globset" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10463d9ff00a2a068db14231982f5132edebad0d7660cd956a1c30292dbcbfbd" -dependencies = [ - "aho-corasick", - "bstr", - "fnv", - "log", - "regex", -] - [[package]] name = "half" version = "1.8.2" @@ -2415,7 +2402,7 @@ version = "0.0.13" dependencies = [ "chrono", "clap 3.1.6", - "globset", + "glob", "uucore", "winapi 0.3.9", ] diff --git a/src/uu/du/Cargo.toml b/src/uu/du/Cargo.toml index 9d5ec1be44d..1760731e3d4 100644 --- a/src/uu/du/Cargo.toml +++ b/src/uu/du/Cargo.toml @@ -17,7 +17,7 @@ path = "src/du.rs" [dependencies] chrono = "^0.4.11" # For the --exclude & --exclude-from options -globset = "0.4" +glob = "0.3.0" clap = { version = "3.1", features = ["wrap_help", "cargo"] } uucore = { version=">=0.0.11", package="uucore", path="../../uucore" } diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 731fe30be68..8971f61bd58 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -11,7 +11,7 @@ extern crate uucore; use chrono::prelude::DateTime; use chrono::Local; use clap::{crate_version, Arg, ArgMatches, Command}; -use globset::{Glob, GlobSet, GlobSetBuilder}; +use glob::Pattern; use std::collections::HashSet; use std::env; use std::fs; @@ -87,8 +87,8 @@ SIZE is an integer and optional unit (example: 10M is 10*1024*1024). Units are K, M, G, T, P, E, Z, Y (powers of 1024) or KB, MB,... (powers of 1000). -PATTERN is based on the globset crate: -https://docs.rs/globset/latest/globset/#syntax +PATTERN allows some advanced exclusions. For example, the following syntaxes +are supported: ? will match only one character * will match zero or more characters {a,b} will match a or b @@ -300,7 +300,7 @@ fn du( options: &Options, depth: usize, inodes: &mut HashSet, - glob_exclude: &GlobSet, + exclude: &[Pattern], ) -> Box> { let mut stats = vec![]; let mut futures = vec![]; @@ -320,22 +320,27 @@ fn du( } }; - for f in read { + 'file_loop: for f in read { match f { Ok(entry) => { match Stat::new(entry.path(), options) { Ok(this_stat) => { - let full_path = this_stat.path.clone().into_os_string(); - if !&glob_exclude.is_empty() - && (glob_exclude.is_match(full_path) - || glob_exclude - .is_match(&entry.file_name().into_string().unwrap())) - { - // if the directory is ignored, leave early - if options.verbose { - println!("{} ignored", &entry.file_name().quote()); + if !&exclude.is_empty() { + // We have an exclude list + for pattern in exclude { + // Look at all patterns + if pattern.matches(&this_stat.path.to_string_lossy()) + || pattern + .matches(&entry.file_name().into_string().unwrap()) + { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", &entry.file_name().quote()); + } + // Go to the next file + continue 'file_loop; + } } - continue; } if let Some(inode) = this_stat.inode { @@ -354,13 +359,7 @@ fn du( } } } - futures.push(du( - this_stat, - options, - depth + 1, - inodes, - glob_exclude, - )); + futures.push(du(this_stat, options, depth + 1, inodes, exclude)); } else { my_stat.size += this_stat.size; my_stat.blocks += this_stat.blocks; @@ -503,7 +502,7 @@ fn file_as_vec(filename: impl AsRef) -> Vec { // Given the --exclude-from and/or --exclude arguments, returns the globset lists // to ignore the files -fn get_globset_ignore(matches: &ArgMatches) -> UResult { +fn get_glob_ignore(matches: &ArgMatches) -> UResult> { let mut excludes_from = if matches.is_present(options::EXCLUDE_FROM) { match matches.values_of(options::EXCLUDE_FROM) { Some(all_files) => { @@ -537,25 +536,20 @@ fn get_globset_ignore(matches: &ArgMatches) -> UResult { // Merge the two lines excludes.append(&mut excludes_from); if !&excludes.is_empty() { - let mut builder = GlobSetBuilder::new(); + let mut builder = Vec::new(); // Create the globset of excludes for f in excludes { if matches.is_present(options::VERBOSE) { println!("adding {:?} to the exclude list ", &f); } - let g = match Glob::new(&f) { - Ok(glob) => glob, + match Pattern::new(&f) { + Ok(glob) => builder.push(glob), Err(err) => return Err(DuError::InvalidGlob(err.to_string()).into()), }; - builder.add(g); - } - match builder.build() { - // Handle the error. Not sure when this happens - Ok(glob_set) => Ok(glob_set), - Err(err) => Err(DuError::InvalidGlob(err.to_string()).into()), } + Ok(builder) } else { - Ok(GlobSet::empty()) + Ok(Vec::new()) } } @@ -636,17 +630,23 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { "\n" }; - let exclude_glob = get_globset_ignore(&matches)?; + let excludes = get_glob_ignore(&matches)?; let mut grand_total = 0; - for path_string in files { + 'loop_file: for path_string in files { // Skip if we don't want to ignore anything - if !&exclude_glob.is_empty() && exclude_glob.is_match(path_string) { - // if the directory is ignored, leave early - if options.verbose { - println!("{} ignored", path_string.quote()); + if !&excludes.is_empty() { + for pattern in &excludes { + { + if pattern.matches(path_string) { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", path_string.quote()); + } + continue 'loop_file; + } + } } - continue; } let path = PathBuf::from(&path_string); @@ -656,7 +656,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { if let Some(inode) = stat.inode { inodes.insert(inode); } - let iter = du(stat, &options, 0, &mut inodes, &exclude_glob); + let iter = du(stat, &options, 0, &mut inodes, &excludes); let (_, len) = iter.size_hint(); let len = len.unwrap(); for (index, stat) in iter.enumerate() { diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 837fc0fb82f..1deddb77fd1 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -4,7 +4,9 @@ // * file that was distributed with this source code. // spell-checker:ignore (paths) sublink subwords azerty azeaze xcwww azeaz amaz azea qzerty tazerty - +#[cfg(not(windows))] +use regex::Regex; +#[cfg(not(windows))] use std::io::Write; use crate::common::util::*; @@ -632,16 +634,21 @@ fn test_du_exclude() { } #[test] +// Disable on Windows because we are looking for / +// And the tests would be more complex if we have to support \ too +#[cfg(not(target_os = "windows"))] fn test_du_exclude_2() { let ts = TestScenario::new(util_name!()); let at = &ts.fixtures; at.mkdir_all("azerty/xcwww/azeaze"); - ts.ucmd() - .arg("azerty") - .succeeds() - .stdout_is("4\tazerty/xcwww/azeaze\n8\tazerty/xcwww\n12\tazerty\n"); + let result = ts.ucmd().arg("azerty").succeeds(); + + let path_regexp = r"(.*)azerty/xcwww/azeaze(.*)azerty/xcwww(.*)azerty"; + let re = Regex::new(path_regexp).unwrap(); + assert!(re.is_match(result.stdout_str().replace('\n', "").trim())); + // Exact match ts.ucmd() .arg("--exclude=azeaze") @@ -678,6 +685,9 @@ fn test_du_exclude_2() { } #[test] +// Disable on Windows because we are looking for / +// And the tests would be more complex if we have to support \ too +#[cfg(not(target_os = "windows"))] fn test_du_exclude_mix() { let ts = TestScenario::new(util_name!()); let at = &ts.fixtures; From 702a01b76acdd488cbc61c115aee8d07448d40dd Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 11 Apr 2022 20:01:08 +0200 Subject: [PATCH 5/6] update of the comment Co-authored-by: Terts Diepraam --- src/uu/du/src/du.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 8971f61bd58..75e16a59a1a 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -537,7 +537,7 @@ fn get_glob_ignore(matches: &ArgMatches) -> UResult> { excludes.append(&mut excludes_from); if !&excludes.is_empty() { let mut builder = Vec::new(); - // Create the globset of excludes + // Create the `Vec` of excludes for f in excludes { if matches.is_present(options::VERBOSE) { println!("adding {:?} to the exclude list ", &f); From e11f675b15ac72dc47d953368ed7ece1c3c61bf3 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Mon, 11 Apr 2022 21:06:48 +0200 Subject: [PATCH 6/6] More comments + if removed --- src/uu/du/src/du.rs | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index 75e16a59a1a..ff7a5a5b77e 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -325,21 +325,20 @@ fn du( Ok(entry) => { match Stat::new(entry.path(), options) { Ok(this_stat) => { - if !&exclude.is_empty() { - // We have an exclude list - for pattern in exclude { - // Look at all patterns - if pattern.matches(&this_stat.path.to_string_lossy()) - || pattern - .matches(&entry.file_name().into_string().unwrap()) - { - // if the directory is ignored, leave early - if options.verbose { - println!("{} ignored", &entry.file_name().quote()); - } - // Go to the next file - continue 'file_loop; + // We have an exclude list + for pattern in exclude { + // Look at all patterns with both short and long paths + // if we have 'du foo' but search to exclude 'foo/bar' + // we need the full path + if pattern.matches(&this_stat.path.to_string_lossy()) + || pattern.matches(&entry.file_name().into_string().unwrap()) + { + // if the directory is ignored, leave early + if options.verbose { + println!("{} ignored", &this_stat.path.quote()); } + // Go to the next file + continue 'file_loop; } }