From 3434435672cc88e5c9b2a152e432ac77cde619de Mon Sep 17 00:00:00 2001 From: Chris Burr Date: Thu, 13 Mar 2025 15:37:47 +0100 Subject: [PATCH 1/2] Proof of concept for 'dematerialized' environments --- crates/rattler/src/install/installer/mod.rs | 17 +++++++ crates/rattler/src/install/link.rs | 9 ++++ crates/rattler/src/install/mod.rs | 52 ++++++++++++++++++++- py-rattler/rattler/install/installer.py | 3 ++ py-rattler/src/installer.rs | 5 +- 5 files changed, 84 insertions(+), 2 deletions(-) diff --git a/crates/rattler/src/install/installer/mod.rs b/crates/rattler/src/install/installer/mod.rs index a55f1c578..e6644bfa9 100644 --- a/crates/rattler/src/install/installer/mod.rs +++ b/crates/rattler/src/install/installer/mod.rs @@ -45,6 +45,7 @@ pub struct Installer { package_cache: Option, downloader: Option, execute_link_scripts: bool, + is_overlay: bool, io_semaphore: Option>, reporter: Option>, target_platform: Option, @@ -145,6 +146,21 @@ impl Installer { self } + /// Sets whether to build an overlay prefix or not. + #[must_use] + pub fn with_overlay(self, overlay: bool) -> Self { + Self { + is_overlay: overlay, + ..self + } + } + + /// Sets whether to build an overlay prefix or not. + pub fn set_overlay(&mut self, overlay: bool) -> &mut Self { + self.is_overlay = overlay; + self + } + /// Sets the package cache to use. #[must_use] pub fn with_package_cache(self, package_cache: PackageCache) -> Self { @@ -352,6 +368,7 @@ impl Installer { platform: Some(target_platform), python_info: transaction.python_info.clone(), apple_codesign_behavior: self.apple_code_sign_behavior, + is_overlay: self.is_overlay, ..InstallOptions::default() }; diff --git a/crates/rattler/src/install/link.rs b/crates/rattler/src/install/link.rs index c08559483..cd4ed9e85 100644 --- a/crates/rattler/src/install/link.rs +++ b/crates/rattler/src/install/link.rs @@ -39,6 +39,9 @@ pub enum LinkMethod { /// directory. Copy, + /// The file is not linked but instead the file in the cache directory is used directly. + Overlay, + /// A copy of a file is created and it is also patched. Patched(FileMode), } @@ -50,6 +53,7 @@ impl fmt::Display for LinkMethod { LinkMethod::Softlink => write!(f, "softlink"), LinkMethod::Reflink => write!(f, "reflink"), LinkMethod::Copy => write!(f, "copy"), + LinkMethod::Overlay => write!(f, "overlay"), LinkMethod::Patched(FileMode::Binary) => write!(f, "binary patched"), LinkMethod::Patched(FileMode::Text) => write!(f, "text patched"), } @@ -144,6 +148,7 @@ pub fn link_file( allow_symbolic_links: bool, allow_hard_links: bool, allow_ref_links: bool, + is_overlay: bool, target_platform: Platform, apple_codesign_behavior: AppleCodeSignBehavior, ) -> Result { @@ -258,6 +263,10 @@ pub fn link_file( } } LinkMethod::Patched(*file_mode) + } else if path_json_entry.path_type == PathType::SoftLink && is_overlay { + LinkMethod::Overlay + } else if path_json_entry.path_type == PathType::HardLink && is_overlay { + LinkMethod::Overlay } else if path_json_entry.path_type == PathType::HardLink && allow_ref_links { reflink_to_destination(&source_path, &destination_path, allow_hard_links)? } else if path_json_entry.path_type == PathType::HardLink && allow_hard_links { diff --git a/crates/rattler/src/install/mod.rs b/crates/rattler/src/install/mod.rs index aef2cef90..9e5a0a32a 100644 --- a/crates/rattler/src/install/mod.rs +++ b/crates/rattler/src/install/mod.rs @@ -72,6 +72,8 @@ use crate::install::{ entry_point::{create_unix_python_entry_point, create_windows_python_entry_point}, }; +use rattler_digest::Sha256; + /// An error that might occur when installing a package. #[derive(Debug, thiserror::Error)] pub enum InstallError { @@ -181,6 +183,12 @@ pub struct InstallOptions { /// package. pub link_json: Option>, + /// Whether or not to use overlay directories. Defaults to false if not set. + /// If set to true, the installation will only write modified files to the + /// target directory. To use the environment the prefix and package cache + /// must be mounted with something like `overlayfs` or `aufs`. + pub is_overlay: bool, + /// Whether or not to use symbolic links where possible. If this is set to /// `Some(false)` symlinks are disabled, if set to `Some(true)` symbolic /// links are always used when specified in the [`info/paths.json`] file @@ -280,6 +288,28 @@ pub async fn link_package( .await .map_err(InstallError::FailedToCreateTargetDirectory)?; + if options.is_overlay { + let underlays = target_dir.join(".dematerialized/underlays"); + tokio_fs::create_dir_all(&underlays) + .await + .map_err(InstallError::FailedToCreateTargetDirectory)?; + + let hash = rattler_digest::compute_bytes_digest::( + package_dir.as_os_str().to_string_lossy().as_bytes(), + ); + let hash_hex = format!("{:x}", hash); + let symlink_path = underlays.join(&hash_hex); + + #[cfg(unix)] + tokio::fs::symlink(&package_dir, &symlink_path) + .await + .map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?; + #[cfg(windows)] + tokio::fs::symlink_dir(&package_dir, &symlink_path) + .await + .map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?; + } + // Reuse or read the `paths.json` and `index.json` files from the package // directory let paths_json = read_paths_json(package_dir, driver, options.paths_json); @@ -405,6 +435,7 @@ pub async fn link_package( allow_symbolic_links && !cloned_entry.no_link, allow_hard_links && !cloned_entry.no_link, allow_ref_links && !cloned_entry.no_link, + options.is_overlay, platform, options.apple_codesign_behavior, ) @@ -587,6 +618,24 @@ pub fn link_package_sync( // Ensure target directory exists fs_err::create_dir_all(target_dir).map_err(InstallError::FailedToCreateTargetDirectory)?; + if options.is_overlay { + let underlays = target_dir.join(".dematerialized/underlays"); + fs_err::create_dir_all(&underlays).map_err(InstallError::FailedToCreateTargetDirectory)?; + + let hash = rattler_digest::compute_bytes_digest::( + package_dir.as_os_str().to_string_lossy().as_bytes(), + ); + let hash_hex = format!("{:x}", hash); + let symlink_path = underlays.join(&hash_hex); + + #[cfg(unix)] + std::os::unix::fs::symlink(&package_dir, &symlink_path) + .map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?; + #[cfg(windows)] + std::os::windows::fs::symlink_dir(&package_dir, &symlink_path) + .map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?; + } + // Reuse or read the `paths.json` and `index.json` files from the package // directory let paths_json = options.paths_json.map_or_else( @@ -748,7 +797,7 @@ pub fn link_package_sync( // Take care of all the reflinked files (macos only) // - Add them to the paths.json - // - Fix any occurences of the prefix in the files + // - Fix any occurrences of the prefix in the files // - Rename files that need clobber-renames let mut reflinked_paths_entries = Vec::new(); for (parent_dir, files) in reflinked_files { @@ -809,6 +858,7 @@ pub fn link_package_sync( allow_symbolic_links && !entry.no_link, allow_hard_links && !entry.no_link, allow_ref_links && !entry.no_link, + options.is_overlay, platform, options.apple_codesign_behavior, ); diff --git a/py-rattler/rattler/install/installer.py b/py-rattler/rattler/install/installer.py index 75ba0eca0..ff8a1d03c 100644 --- a/py-rattler/rattler/install/installer.py +++ b/py-rattler/rattler/install/installer.py @@ -20,6 +20,7 @@ async def install( execute_link_scripts: bool = False, show_progress: bool = True, client: Optional[Client] = None, + is_overlay: bool = False, ) -> None: """ Create an environment by downloading and linking the `dependencies` in @@ -71,6 +72,7 @@ async def install( show_progress: If set to `True` a progress bar will be shown on the CLI. client: An authenticated client to use for downloading packages. If not specified a default client will be used. + is_overlay: If set to `True` the environment will be created as an overlay environment. """ await py_install( @@ -83,4 +85,5 @@ async def install( client=client._client if client is not None else None, execute_link_scripts=execute_link_scripts, show_progress=show_progress, + is_overlay=is_overlay, ) diff --git a/py-rattler/src/installer.rs b/py-rattler/src/installer.rs index 2c9099acc..35716a668 100644 --- a/py-rattler/src/installer.rs +++ b/py-rattler/src/installer.rs @@ -17,7 +17,7 @@ use crate::{ // TODO: Accept functions to report progress #[pyfunction] #[allow(clippy::too_many_arguments)] -#[pyo3(signature = (records, target_prefix, execute_link_scripts=false, show_progress=false, platform=None, client=None, cache_dir=None, installed_packages=None, reinstall_packages=None))] +#[pyo3(signature = (records, target_prefix, execute_link_scripts=false, show_progress=false, platform=None, client=None, cache_dir=None, installed_packages=None, reinstall_packages=None, is_overlay=false))] pub fn py_install<'a>( py: Python<'a>, records: Vec>, @@ -29,6 +29,7 @@ pub fn py_install<'a>( cache_dir: Option, installed_packages: Option>>, reinstall_packages: Option>, + is_overlay: bool, ) -> PyResult> { let dependencies = records .into_iter() @@ -70,6 +71,8 @@ pub fn py_install<'a>( installer.set_download_client(client); } + installer.set_overlay(is_overlay); + if let Some(cache_dir) = cache_dir { installer.set_package_cache(PackageCache::new(cache_dir)); } From 68a7cdce7b4187f6de091133b5f947f675b9c22d Mon Sep 17 00:00:00 2001 From: Chris Burr Date: Fri, 14 Mar 2025 13:06:47 +0100 Subject: [PATCH 2/2] Add demo scripts --- py-rattler/create_example.py | 47 ++++++++ py-rattler/start_shell_in_dematerialized.py | 120 ++++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100755 py-rattler/create_example.py create mode 100755 py-rattler/start_shell_in_dematerialized.py diff --git a/py-rattler/create_example.py b/py-rattler/create_example.py new file mode 100755 index 000000000..b40daafae --- /dev/null +++ b/py-rattler/create_example.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +import asyncio +import shutil +from pathlib import Path +from tempfile import gettempdir, TemporaryDirectory + +from rattler import solve, install +from rattler.virtual_package import VirtualPackage + + +async def main(base_dir: Path, specs: list[str], cache_dir: Path): + original_prefix = base_dir / "env-materializedxx" + if original_prefix.exists(): + shutil.rmtree(original_prefix) + dematerialized_prefix = base_dir / "env-dematerialized" + if dematerialized_prefix.exists(): + shutil.rmtree(dematerialized_prefix) + + print("Solving environment:", specs) + records = await solve(channels=["conda-forge"], specs=specs, virtual_packages=VirtualPackage.detect()) + print("Will install", len(records), "packages") + + print("Populating caches") + with TemporaryDirectory() as tmpdir: + await install(records, target_prefix=Path(tmpdir) / "env", is_overlay=False, show_progress=False, cache_dir=cache_dir) + + print("Creating conventional environment") + await install(records, target_prefix=original_prefix, is_overlay=False, show_progress=True, cache_dir=cache_dir) + + print("Creating dematerialized environment") + await install(records, target_prefix=dematerialized_prefix, is_overlay=True, show_progress=True, cache_dir=cache_dir) + + +def parse_args(): + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--cache-dir", type=Path, default=Path(gettempdir()) / "rattler-cache") + parser.add_argument("--base-dir", type=Path, required=True) + parser.add_argument("specs", nargs="+") + args = parser.parse_args() + + asyncio.run(main(args.base_dir, args.specs, args.cache_dir)) + + +if __name__ == "__main__": + parse_args() diff --git a/py-rattler/start_shell_in_dematerialized.py b/py-rattler/start_shell_in_dematerialized.py new file mode 100755 index 000000000..aebeaeb3c --- /dev/null +++ b/py-rattler/start_shell_in_dematerialized.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +import argparse +import ctypes +import ctypes.util +import os +import subprocess +import sys +from pathlib import Path +from tempfile import TemporaryDirectory + +from rattler.shell import Shell, activate, ActivationVariables + +libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True) +libc.mount.argtypes = (ctypes.c_char_p, ctypes.c_char_p, ctypes.c_char_p, ctypes.c_ulong, ctypes.c_char_p) + + +def mount(source, target, fs, options=''): + ret = libc.mount(source.encode(), str(target).encode(), fs.encode(), 0, options.encode()) + if ret < 0: + errno = ctypes.get_errno() + raise OSError(errno, f"Error mounting {source} ({fs}) on {target} with options '{options}': {os.strerror(errno)}") + + +def build_overlay(temp_dir: Path, underlays: list[str], i: int = 0) -> tuple[str, dict[Path, str]]: + page_size = os.sysconf("SC_PAGE_SIZE") + mount_options: dict[Path, str] = {} + underlays = underlays.copy() + while underlays: + options = "lowerdir=" + while underlays and len(options) + len(str(underlays[0])) < page_size: + options += f"{underlays.pop(0)}:" + options = options[:-1] + mount_options[temp_dir / str(i)] = options + i += 1 + if len(mount_options) == 1: + lowerdirs = mount_options.pop(temp_dir / str(i - 1)) + else: + lowerdirs, extra_mounts = build_overlay(temp_dir, list(mount_options), i) + mount_options |= extra_mounts + return lowerdirs, mount_options + + +def mount_overlay(target: Path, options: str, overlay_method: str): + match overlay_method: + case "native": + mount("overlay", target, "overlay", options) + case "overlayfs-fuse": + subprocess.run(["fuse-overlayfs", "-o", options, target]) + case _: + raise NotImplementedError(f"Unsupported overlay method: {overlay_method}") + + +def main(prefix: Path, overlay_path: Path | None, overlay_method: str): + underlays_path = prefix / ".dematerialized" / "underlays" + if not underlays_path.exists(): + raise RuntimeError("This is not a dematerialized environment!") + cache_underlays = [path.resolve(strict=True) for path in underlays_path.iterdir() if path.is_symlink()] + + if overlay_path: + overlay_path.mkdir(parents=True, exist_ok=True) + work_path = overlay_path.with_suffix(".work") + work_path.mkdir(parents=True, exist_ok=True) + + with TemporaryDirectory() as tempdir: + lowerdirs, mount_options = build_overlay(Path(tempdir), cache_underlays) + + # Unshare mount and user namespaces, and map current user to root + uid = os.getuid() + gid = os.getgid() + os.unshare(os.CLONE_NEWNS | os.CLONE_NEWUSER) + Path("/proc/self/uid_map").write_text(f"0 {uid} 1") + Path("/proc/self/setgroups").write_text("deny") + Path("/proc/self/gid_map").write_text(f"0 {gid} 1") + + # Make the root filesystem private + MS_REC = 0x4000 + MS_PRIVATE = 0x40000 + libc.mount("none".encode(), "/".encode(), None, MS_REC | MS_PRIVATE, None) + + # It's only possible to pass up to PAGE_SIZE bytes of options to the + # mount syscall. If the options are too long, we need to mount the + # intermediate underlays first. + for path, options in mount_options.items(): + print("Mounting intermediate underlay at path:", path) + path.mkdir(parents=True, exist_ok=False) + mount_overlay(path, options, overlay_method) + + # Mount the final overlayfs instance + options = lowerdirs.replace('=', f'={prefix}:', 1) + if overlay_path: + options += f",upperdir={overlay_path},workdir={work_path}" + if len(options) > os.sysconf("SC_PAGE_SIZE"): + raise RuntimeError("Overlay options are too long!") + mount_overlay(prefix, options, overlay_method) + + # Activate the environment + activation_script = Path(tempdir) / "activate" + actvars = ActivationVariables(None, sys.path) + a = activate(prefix, actvars, Shell.bash) + activation_script.write_text( + f"unset BASH_ENV\n{a.script}" + ) + + # TODO: We can't clean up the temporary directory if we exec into bash + # because the process will be replaced. Maybe we can set an atexit hook + # to clean up the temporary directory during the activation process? + cmd = ["bash", "--norc", "--noprofile", "-c", "exec bash --norc --noprofile"] + os.execvpe("bash", cmd, os.environ | {"BASH_ENV": str(activation_script)}) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--overlay-dir", type=Path, help="Path to overlay directory") + group.add_argument("--without-writable-overlay", action="store_true", help="Disable writable overlay") + parser.add_argument("--overlay-method", choices=["native", "overlayfs-fuse"], help="Overlay method to use", required=True) + parser.add_argument("prefix", type=Path) + args = parser.parse_args() + + main(prefix=args.prefix, overlay_path=args.overlay_dir, overlay_method=args.overlay_method)