From 9cbe6055fa3f5c8a84c1eb69839ce8994c2b73f8 Mon Sep 17 00:00:00 2001 From: Ben Wiederhake Date: Sun, 25 Feb 2024 12:52:21 +0100 Subject: [PATCH] tr: stream output instead of buffering This should lower memory consumption, and fixes OOM in some scenarios. --- src/uu/tr/src/operation.rs | 26 ++++++++++++++++++++++++++ src/uu/tr/src/tr.rs | 38 +++++++++++++------------------------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/src/uu/tr/src/operation.rs b/src/uu/tr/src/operation.rs index 5565de6a16d..cfc9b11cb91 100644 --- a/src/uu/tr/src/operation.rs +++ b/src/uu/tr/src/operation.rs @@ -339,6 +339,32 @@ impl Sequence { pub trait SymbolTranslator { fn translate(&mut self, current: u8) -> Option; + + /// Takes two SymbolTranslators and creates a new SymbolTranslator over both in sequence. + /// + /// This behaves pretty much identical to [`Iterator::chain`]. + fn chain(self, other: T) -> ChainedSymbolTranslator + where + Self: Sized, + { + ChainedSymbolTranslator:: { + stage_a: self, + stage_b: other, + } + } +} + +pub struct ChainedSymbolTranslator { + stage_a: A, + stage_b: B, +} + +impl SymbolTranslator for ChainedSymbolTranslator { + fn translate(&mut self, current: u8) -> Option { + self.stage_a + .translate(current) + .and_then(|c| self.stage_b.translate(c)) + } } #[derive(Debug)] diff --git a/src/uu/tr/src/tr.rs b/src/uu/tr/src/tr.rs index 968682a264b..6f78f13db94 100644 --- a/src/uu/tr/src/tr.rs +++ b/src/uu/tr/src/tr.rs @@ -9,9 +9,10 @@ mod operation; mod unicode_table; use clap::{crate_version, Arg, ArgAction, Command}; -use nom::AsBytes; -use operation::{translate_input, Sequence, SqueezeOperation, TranslateOperation}; -use std::io::{stdin, stdout, BufReader, BufWriter}; +use operation::{ + translate_input, Sequence, SqueezeOperation, SymbolTranslator, TranslateOperation, +}; +use std::io::{stdin, stdout, BufWriter}; use uucore::{format_usage, help_about, help_section, help_usage, show}; use crate::operation::DeleteOperation; @@ -117,19 +118,13 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { truncate_set1_flag, )?; + // '*_op' are the operations that need to be applied, in order. if delete_flag { if squeeze_flag { - let mut delete_buffer = vec![]; - { - let mut delete_writer = BufWriter::new(&mut delete_buffer); - let delete_op = DeleteOperation::new(set1, complement_flag); - translate_input(&mut locked_stdin, &mut delete_writer, delete_op); - } - { - let mut squeeze_reader = BufReader::new(delete_buffer.as_bytes()); - let op = SqueezeOperation::new(set2, false); - translate_input(&mut squeeze_reader, &mut buffered_stdout, op); - } + let delete_op = DeleteOperation::new(set1, complement_flag); + let squeeze_op = SqueezeOperation::new(set2, false); + let op = delete_op.chain(squeeze_op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { let op = DeleteOperation::new(set1, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); @@ -139,17 +134,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { let op = SqueezeOperation::new(set1, complement_flag); translate_input(&mut locked_stdin, &mut buffered_stdout, op); } else { - let mut translate_buffer = vec![]; - { - let mut writer = BufWriter::new(&mut translate_buffer); - let op = TranslateOperation::new(set1, set2.clone(), complement_flag)?; - translate_input(&mut locked_stdin, &mut writer, op); - } - { - let mut reader = BufReader::new(translate_buffer.as_bytes()); - let squeeze_op = SqueezeOperation::new(set2, false); - translate_input(&mut reader, &mut buffered_stdout, squeeze_op); - } + let translate_op = TranslateOperation::new(set1, set2.clone(), complement_flag)?; + let squeeze_op = SqueezeOperation::new(set2, false); + let op = translate_op.chain(squeeze_op); + translate_input(&mut locked_stdin, &mut buffered_stdout, op); } } else { let op = TranslateOperation::new(set1, set2, complement_flag)?;