8000 head: Fix bug printing large non-seekable files by karlmcdowall · Pull Request #7294 · uutils/coreutils · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

head: Fix bug printing large non-seekable files #7294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 19 additions & 45 deletions src/uu/head/src/head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

use clap::{crate_version, Arg, ArgAction, ArgMatches, Command};
use std::ffi::OsString;
use std::io::{self, ErrorKind, Read, Seek, SeekFrom, Write};
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
use std::num::TryFromIntError;
use thiserror::Error;
use uucore::display::Quotable;
Expand Down Expand Up @@ -239,10 +239,7 @@ impl HeadOptions {
}
}

fn read_n_bytes<R>(input: R, n: u64) -> std::io::Result<()>
where
R: Read,
{
fn read_n_bytes(input: impl Read, n: u64) -> std::io::Result<()> {
// Read the first `n` bytes from the `input` reader.
let mut reader = input.take(n);

Expand Down Expand Up @@ -287,48 +284,22 @@ fn catch_too_large_numbers_in_backwards_bytes_or_lines(n: u64) -> Option<usize>
}
}

/// Print to stdout all but the last `n` bytes from the given reader.
fn read_but_last_n_bytes(input: &mut impl std::io::BufRead, n: u64) -> std::io::Result<()> {
if n == 0 {
//prints everything
return read_n_bytes(input, u64::MAX);
}

fn read_but_last_n_bytes(input: impl std::io::BufRead, n: u64) -> std::io::Result<()> {
if let Some(n) = catch_too_large_numbers_in_backwards_bytes_or_lines(n) {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();

let mut ring_buffer = Vec::new();

let mut buffer = [0u8; BUF_SIZE];
let mut total_read = 0;

loop {
let read = match input.read(&mut buffer) {
Ok(0) => break,
Ok(read) => read,
Err(e) => match e.kind() {
ErrorKind::Interrupted => continue,
_ => return Err(e),
},
};

total_read += read;

if total_read <= n {
// Fill the ring buffer without exceeding n bytes
let overflow = n - total_read;
ring_buffer.extend_from_slice(&buffer[..read - overflow]);
} else {
// Write the ring buffer and the part of the buffer that exceeds n
stdout.write_all(&ring_buffer)?;
stdout.write_all(&buffer[..read - n + ring_buffer.len()])?;
ring_buffer.clear();
ring_buffer.extend_from_slice(&buffer[read - n + ring_buffer.len()..read]);
}
let stdout = stdout.lock();
// Even though stdout is buffered, it will flush on each newline in the
// input stream. This can be costly, so add an extra layer of buffering
// over the top. This gives a significant speedup (approx 4x).
let mut writer = BufWriter::with_capacity(BUF_SIZE, stdout);
for byte in take_all_but(input.bytes(), n) {
writer.write_all(&[byte?])?;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
8000 writer.flush()?;
}

Ok(())
}

Expand All @@ -343,6 +314,10 @@ fn read_but_last_n_lines(
for bytes in take_all_but(lines(input, separator), n) {
stdout.write_all(&bytes?)?;
}
// Make sure we finish writing everything to the target before
// exiting. Otherwise, when Rust is implicitly flushing, any
// error will be silently ignored.
stdout.flush()?;
}
Ok(())
}
Expand Down Expand Up @@ -440,8 +415,7 @@ fn head_backwards_without_seek_file(
input: &mut std::fs::File,
options: &HeadOptions,
) -> std::io::Result<()> {
let reader = &mut std::io::BufReader::with_capacity(BUF_SIZE, &*input);

let reader = std::io::BufReader::with_capacity(BUF_SIZE, &*input);
match options.mode {
Mode::AllButLastBytes(n) => read_but_last_n_bytes(reader, n)?,
Mode::AllButLastLines(n) => read_but_last_n_lines(reader, n, options.line_ending.into())?,
Expand Down
40 changes: 40 additions & 0 deletions tests/by-util/test_head.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
// file that was distributed with this source code.

// spell-checker:ignore (words) bogusfile emptyfile abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstu
// spell-checker:ignore (words) seekable

use crate::common::util::TestScenario;

Expand Down Expand Up @@ -392,6 +393,45 @@ fn test_presume_input_pipe_5_chars() {
.stdout_is_fixture("lorem_ipsum_5_chars.expected");
}

#[test]
fn test_all_but_last_bytes_large_file_piped() {
// Validate print-all-but-last-n-bytes with a large piped-in (i.e. non-seekable) file.
let scene = TestScenario::new(util_name!());
let fixtures = &scene.fixtures;

// First, create all our fixtures.
let seq_30000_file_name = "seq_30000";
let seq_29000_file_name = "seq_29000";
let seq_29001_30000_file_name = "seq_29001_30000";
scene
.cmd("seq")
.arg("30000")
.set_stdout(fixtures.make_file(seq_30000_file_name))
.succeeds();
scene
.cmd("seq")
.arg("29000")
.set_stdout(fixtures.make_file(seq_29000_file_name))
.succeeds();
scene
.cmd("seq")
.args(&["29001", "30000"])
.set_stdout(fixtures.make_file(seq_29001_30000_file_name))
.succeeds();

let seq_29001_30000_file_length = fixtures
.open(seq_29001_30000_file_name)
.metadata()
.unwrap()
.len();
scene
.ucmd()
.args(&["-c", &format!("-{}", seq_29001_30000_file_length)])
.pipe_in_fixture(seq_30000_file_name)
.succeeds()
.stdout_only_fixture(seq_29000_file_name);
}

#[test]
fn test_read_backwards_lines_large_file() {
// Create our fixtures on the fly. We need the input file to be at least double
Expand Down
Loading
0