From f6223df4f5f05f59f73e7f7f4729df03372079b4 Mon Sep 17 00:00:00 2001 From: Schneems Date: Wed, 28 May 2025 11:31:19 -0500 Subject: [PATCH 1/4] Error if body not fully downloaded For downloads that aren't using a checksum, we need to manually assert that the number of bytes written is the same as the content-length being reported in the HTTP request. --- CHANGELOG.md | 3 ++- libherokubuildpack/Cargo.toml | 3 ++- libherokubuildpack/src/download.rs | 30 ++++++++++++++++++++++++++++-- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d469bfe..1e0de5ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - libcnb: - Order of automatically applied environment variables by libcnb, such as `PATH=/bin`, now matches the upstream CNB lifecycle. ([#938](https://github.com/heroku/libcnb.rs/pull/938)) - +- libherokubuildpack: + - The `download_file` function now errors if bytes received does not match the content length header. ([#947](https://github.com/heroku/libcnb.rs/pull/947)) ## [0.29.0] - 2025-05-02 diff --git a/libherokubuildpack/Cargo.toml b/libherokubuildpack/Cargo.toml index a63903ea..6473f454 100644 --- a/libherokubuildpack/Cargo.toml +++ b/libherokubuildpack/Cargo.toml @@ -19,7 +19,7 @@ workspace = true [features] default = ["command", "download", "digest", "error", "inventory", "log", "inventory-semver", "inventory-sha2", "tar", "toml", "fs", "write"] -download = ["dep:ureq", "dep:thiserror"] +download = ["dep:ureq", "dep:thiserror", "dep:http"] digest = ["dep:sha2"] error = ["log", "dep:libcnb"] inventory = ["dep:hex", "dep:serde", "dep:thiserror", "dep:toml"] @@ -41,6 +41,7 @@ crossbeam-utils = { version = "0.8.21", optional = true } # As such we have to use the next best alternate backend, which is `zlib`. flate2 = { version = "1.1.1", default-features = false, features = ["zlib"], optional = true } hex = { version = "0.4.3", optional = true } +http = { version = "1", optional = true } libcnb = { workspace = true, optional = true } pathdiff = { version = "0.2.3", optional = true } semver = { version = "1.0.26", features = ["serde"], optional = true } diff --git a/libherokubuildpack/src/download.rs b/libherokubuildpack/src/download.rs index e590d328..01c51cad 100644 --- a/libherokubuildpack/src/download.rs +++ b/libherokubuildpack/src/download.rs @@ -1,3 +1,5 @@ +use http::{HeaderName, header::CONTENT_LENGTH}; +use std::num::ParseIntError; use std::{fs, io}; #[derive(thiserror::Error, Debug)] @@ -8,6 +10,18 @@ pub enum DownloadError { #[error("I/O error while downloading file: {0}")] IoError(#[from] std::io::Error), + + #[error("Missing required header: `{0}`")] + MissingRequiredHeader(HeaderName), + + #[error("Failed to convert header value for `{0}` into a string")] + HeaderEncodingError(HeaderName), + + #[error("Cannot parse into an integer: {0}")] + CannotParseInteger(ParseIntError), + + #[error("Expected `{expected}` bytes received `{received}`")] + UnexpectedBytes { expected: u64, received: u64 }, } /// Downloads a file via HTTP(S) to a local path @@ -32,9 +46,21 @@ pub fn download_file( destination: impl AsRef, ) -> Result<(), DownloadError> { let response = ureq::get(uri.as_ref()).call().map_err(Box::new)?; + let expected: u64 = response + .headers() + .get(CONTENT_LENGTH) + .ok_or_else(|| DownloadError::MissingRequiredHeader(CONTENT_LENGTH))? + .to_str() + .map_err(|_| DownloadError::HeaderEncodingError(CONTENT_LENGTH))? + .parse() + .map_err(DownloadError::CannotParseInteger)?; let mut reader = response.into_body().into_reader(); let mut file = fs::File::create(destination.as_ref())?; - io::copy(&mut reader, &mut file)?; - Ok(()) + let received = io::copy(&mut reader, &mut file)?; + if received == expected { + Ok(()) + } else { + Err(DownloadError::UnexpectedBytes { expected, received }) + } } From a11555ce25068bcb40b21711ae5b2207c42d60ba Mon Sep 17 00:00:00 2001 From: Schneems Date: Wed, 28 May 2025 12:37:49 -0500 Subject: [PATCH 2/4] Fewer temp variables --- libherokubuildpack/src/download.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libherokubuildpack/src/download.rs b/libherokubuildpack/src/download.rs index 01c51cad..41c87d99 100644 --- a/libherokubuildpack/src/download.rs +++ b/libherokubuildpack/src/download.rs @@ -54,10 +54,9 @@ pub fn download_file( .map_err(|_| DownloadError::HeaderEncodingError(CONTENT_LENGTH))? .parse() .map_err(DownloadError::CannotParseInteger)?; - let mut reader = response.into_body().into_reader(); let mut file = fs::File::create(destination.as_ref())?; - let received = io::copy(&mut reader, &mut file)?; + let received = io::copy(&mut response.into_body().into_reader(), &mut file)?; if received == expected { Ok(()) } else { From f6e55680b7e174f7e6b668584306ce09ee77b952 Mon Sep 17 00:00:00 2001 From: Schneems Date: Wed, 28 May 2025 12:39:22 -0500 Subject: [PATCH 3/4] Delete file on partial download error If a download was not successful, it's content length and bytes received will not match. In that case, expect that the developer may wish to retry the download, so delete the file that was created in this function. --- libherokubuildpack/src/download.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/libherokubuildpack/src/download.rs b/libherokubuildpack/src/download.rs index 41c87d99..0b4bbf4c 100644 --- a/libherokubuildpack/src/download.rs +++ b/libherokubuildpack/src/download.rs @@ -21,7 +21,12 @@ pub enum DownloadError { CannotParseInteger(ParseIntError), #[error("Expected `{expected}` bytes received `{received}`")] - UnexpectedBytes { expected: u64, received: u64 }, + UnexpectedBytes { + expected: u64, + received: u64, + path_deleted: Result<(), std::io::Error>, + }, +} } /// Downloads a file via HTTP(S) to a local path @@ -54,12 +59,19 @@ pub fn download_file( .map_err(|_| DownloadError::HeaderEncodingError(CONTENT_LENGTH))? .parse() .map_err(DownloadError::CannotParseInteger)?; - let mut file = fs::File::create(destination.as_ref())?; + let mut file = fs::File::create(destination.as_ref())?; let received = io::copy(&mut response.into_body().into_reader(), &mut file)?; + // Ensure file is closed + drop(file); if received == expected { Ok(()) } else { - Err(DownloadError::UnexpectedBytes { expected, received }) + let path_deleted = fs::remove_file(&destination); + Err(DownloadError::UnexpectedBytes { + expected, + received, + path_deleted, + }) } } From f568b061d6cc5c4e42caa2f7c0e01fd7ec04d856 Mon Sep 17 00:00:00 2001 From: Schneems Date: Wed, 28 May 2025 12:53:22 -0500 Subject: [PATCH 4/4] Add a convenience function for retrying a download When a download fails due to disk IO or if the file cannot be cleaned, do not suggest an (immediate) retry. This interface would allow code like this: ```retry let mut download_attempts = 0; while download_attempts <= MAX_RETRIES { match download_file(&artifact.url, &tarball_path) { Err(error) if error.retry_suggested() && download_attempts < MAX_RETRIES => { let sub_bullet = log_background_bullet.cancel(format!("{error}")); download_attempts += 1; thread::sleep(Duration::from_secs(1)); log_background_bullet = sub_bullet.start_timer("Retrying"); } result => { result?; let _ = log_background_bullet.done(); break; } } } ``` This allows developers who care less about the specific error states to defer the decision to whether or not an error can be retried to a canonical location. --- libherokubuildpack/src/download.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/libherokubuildpack/src/download.rs b/libherokubuildpack/src/download.rs index 0b4bbf4c..914cb3af 100644 --- a/libherokubuildpack/src/download.rs +++ b/libherokubuildpack/src/download.rs @@ -27,6 +27,18 @@ pub enum DownloadError { path_deleted: Result<(), std::io::Error>, }, } + +impl DownloadError { + /// Do not suggest a retry if the failure is due to disk error or if the path could not be cleaned + pub fn retry_suggested(&self) -> bool { + !matches!( + self, + DownloadError::UnexpectedBytes { + path_deleted: Err(_), + .. + } | DownloadError::IoError(_) + ) + } } /// Downloads a file via HTTP(S) to a local path