8000 Proof of concept for 'dematerialized' environments by chrisburr · Pull Request #1162 · conda/rattler · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Proof of concept for 'dematerialized' environments #1162

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions crates/rattler/src/install/installer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub struct Installer {
package_cache: Option<PackageCache>,
downloader: Option<reqwest_middleware::ClientWithMiddleware>,
execute_link_scripts: bool,
is_overlay: bool,
io_semaphore: Option<Arc<Semaphore>>,
reporter: Option<Arc<dyn Reporter>>,
target_platform: Option<Platform>,
Expand Down Expand Up @@ -145,6 +146,21 @@ impl Installer {
self
}

/// Sets whether to build an overlay prefix or not.
#[must_use]
pub fn with_overlay(self, overlay: bool) -> Self {
Self {
is_overlay: overlay,
..self
}
}

/// Sets whether to build an overlay prefix or not.
pub fn set_overlay(&mut self, overlay: bool) -> &mut Self {
self.is_overlay = overlay;
self
}

/// Sets the package cache to use.
#[must_use]
pub fn with_package_cache(self, package_cache: PackageCache) -> Self {
Expand Down Expand Up @@ -352,6 +368,7 @@ impl Installer {
platform: Some(target_platform),
python_info: transaction.python_info.clone(),
apple_codesign_behavior: self.apple_code_sign_behavior,
is_overlay: self.is_overlay,
..InstallOptions::default()
};

Expand Down
9 changes: 9 additions & 0 deletions crates/rattler/src/install/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
/// directory.
Copy,

/// The file is not linked but instead the file in the cache directory is used directly.
Overlay,

/// A copy of a file is created and it is also patched.
Patched(FileMode),
}
Expand All @@ -50,6 +53,7 @@
LinkMethod::Softlink => write!(f, "softlink"),
LinkMethod::Reflink => write!(f, "reflink"),
LinkMethod::Copy => write!(f, "copy"),
LinkMethod::Overlay => write!(f, "overlay"),
LinkMethod::Patched(FileMode::Binary) => write!(f, "binary patched"),
LinkMethod::Patched(FileMode::Text) => write!(f, "text patched"),
}
Expand Down Expand Up @@ -144,6 +148,7 @@
allow_symbolic_links: bool,
allow_hard_links: bool,
allow_ref_links: bool,
is_overlay: bool,
target_platform: Platform,
apple_codesign_behavior: AppleCodeSignBehavior,
) -> Result<LinkedFile, LinkFileError> {
Expand Down Expand Up @@ -258,6 +263,10 @@
}
}
LinkMethod::Patched(*file_mode)
} else if path_json_entry.path_type == PathType::SoftLink && is_overlay {

Check failure on line 266 in crates/rattler/src/install/link.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

this `if` has identical blocks
LinkMethod::Overlay
} else if path_json_entry.path_type == PathType::HardLink && is_overlay {
LinkMethod::Overlay
} else if path_json_entry.path_type == PathType::HardLink && allow_ref_links {
reflink_to_destination(&source_path, &destination_path, allow_hard_links)?
} else if path_json_entry.path_type == PathType::HardLink && allow_hard_links {
Expand Down
52 changes: 51 additions & 1 deletion crates/rattler/src/install/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
entry_point::{create_unix_python_entry_point, create_windows_python_entry_point},
};

use rattler_digest::Sha256;

/// An error that might occur when installing a package.
#[derive(Debug, thiserror::Error)]
pub enum InstallError {
Expand Down Expand Up @@ -181,6 +183,12 @@
/// package.
pub link_json: Option<Option<LinkJson>>,

/// Whether or not to use overlay directories. Defaults to false if not set.
/// If set to true, the installation will only write modified files to the
/// target directory. To use the environment the prefix and package cache
/// must be mounted with something like `overlayfs` or `aufs`.
pub is_overlay: bool,

/// Whether or not to use symbolic links where possible. If this is set to
/// `Some(false)` symlinks are disabled, if set to `Some(true)` symbolic
/// links are always used when specified in the [`info/paths.json`] file
Expand Down Expand Up @@ -280,6 +288,28 @@
.await
.map_err(InstallError::FailedToCreateTargetDirectory)?;

if options.is_overlay {
let underlays = target_dir.join(".dematerialized/underlays");
tokio_fs::create_dir_all(&underlays)
.await
.map_err(InstallError::FailedToCreateTargetDirectory)?;

let hash = rattler_digest::compute_bytes_digest::<Sha256>(
package_dir.as_os_str().to_string_lossy().as_bytes(),
);
let hash_hex = format!("{:x}", hash);
let symlink_path = underlays.join(&hash_hex);

#[cfg(unix)]
tokio::fs::symlink(&package_dir, &symlink_path)
.await
.map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?;
#[cfg(windows)]
tokio::fs::symlink_dir(&package_dir, &symlink_path)
.await
.map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?;
}

// Reuse or read the `paths.json` and `index.json` files from the package
// directory
let paths_json = read_paths_json(package_dir, driver, options.paths_json);
Expand Down Expand Up @@ -405,6 +435,7 @@
allow_symbolic_links && !cloned_entry.no_link,
allow_hard_links && !cloned_entry.no_link,
allow_ref_links && !cloned_entry.no_link,
options.is_overlay,
platform,
options.apple_codesign_behavior,
)
Expand Down Expand Up @@ -587,6 +618,24 @@
// Ensure target directory exists
fs_err::create_dir_all(target_dir).map_err(InstallError::FailedToCreateTargetDirectory)?;

if options.is_overlay {
let underlays = target_dir.join(".dematerialized/underlays");
fs_err::create_dir_all(&underlays).map_err(InstallError::FailedToCreateTargetDirectory)?;

let hash = rattler_digest::compute_bytes_digest::<Sha256>(
package_dir.as_os_str().to_string_lossy().as_bytes(),
);
let hash_hex = format!("{:x}", hash);
let symlink_path = underlays.join(&hash_hex);

#[cfg(unix)]
std::os::unix::fs::symlink(&package_dir, &symlink_path)

Check failure on line 632 in crates/rattler/src/install/mod.rs

View workflow job for this annotation

GitHub Actions / Format and Lint

the borrowed expression implements the required traits
.map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?;
#[cfg(windows)]
std::os::windows::fs::symlink_dir(&package_dir, &symlink_path)
.map_err(|e| InstallError::FailedToCreateDirectory(symlink_path.clone(), e))?;
}

// Reuse or read the `paths.json` and `index.json` files from the package
// directory
let paths_json = options.paths_json.map_or_else(
Expand Down Expand Up @@ -748,7 +797,7 @@

// Take care of all the reflinked files (macos only)
// - Add them to the paths.json
// - Fix any occurences of the prefix in the files
// - Fix any occurrences of the prefix in the files
// - Rename files that need clobber-renames
let mut reflinked_paths_entries = Vec::new();
for (parent_dir, files) in reflinked_files {
Expand Down Expand Up @@ -809,6 +858,7 @@
allow_symbolic_links && !entry.no_link,
allow_hard_links && !entry.no_link,
allow_ref_links && !entry.no_link,
options.is_overlay,
platform,
options.apple_codesign_behavior,
);
9E88 Expand Down
47 changes: 47 additions & 0 deletions py-rattler/create_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python
import asyncio
import shutil
from pathlib import Path
from tempfile import gettempdir, TemporaryDirectory

from rattler import solve, install
from rattler.virtual_package import VirtualPackage


async def main(base_dir: Path, specs: list[str], cache_dir: Path):
original_prefix = base_dir / "env-materializedxx"
if original_prefix.exists():
shutil.rmtree(original_prefix)
dematerialized_prefix = base_dir / "env-dematerialized"
if dematerialized_prefix.exists():
shutil.rmtree(dematerialized_prefix)

print("Solving environment:", specs)
records = await solve(channels=["conda-forge"], specs=specs, virtual_packages=VirtualPackage.detect())
print("Will install", len(records), "packages")

print("Populating caches")
with TemporaryDirectory() as tmpdir:
await install(records, target_prefix=Path(tmpdir) / "env", is_overlay=False, show_progress=False, cache_dir=cache_dir)

print("Creating conventional environment")
await install(records, target_prefix=original_prefix, is_overlay=False, show_progress=True, cache_dir=cache_dir)

print("Creating dematerialized environment")
await install(records, target_prefix=dematerialized_prefix, is_overlay=True, show_progress=True, cache_dir=cache_dir)


def parse_args():
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--cache-dir", type=Path, default=Path(gettempdir()) / "rattler-cache")
parser.add_argument("--base-dir", type=Path, required=True)
parser.add_argument("specs", nargs="+")
args = parser.parse_args()

asyncio.run(main(args.base_dir, args.specs, args.cache_dir))


if __name__ == "__main__":
parse_args()
3 changes: 3 additions & 0 deletions py-rattler/rattler/install/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ async def install(
execute_link_scripts: bool = False,
show_progress: bool = True,
client: Optional[Client] = None,
is_overlay: bool = False,
) -> None:
"""
Create an environment by downloading and linking the `dependencies` in
Expand Down Expand Up @@ -71,6 +72,7 @@ async def install(
show_progress: If set to `True` a progress bar will be shown on the CLI.
client: An authenticated client to use for downloading packages. If not specified a default
client will be used.
is_overlay: If set to `True` the environment will be created as an overlay environment.
"""

await py_install(
Expand All @@ -83,4 +85,5 @@ async def install(
client=client._client if client is not None else None,
execute_link_scripts=execute_link_scripts,
show_progress=show_progress,
is_overlay=is_overlay,
)
5 changes: 4 additions & 1 deletion py-rattler/src/installer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::{
// TODO: Accept functions to report progress
#[pyfunction]
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (records, target_prefix, execute_link_scripts=false, show_progress=false, platform=None, client=None, cache_dir=None, installed_packages=None, reinstall_packages=None))]
#[pyo3(signature = (records, target_prefix, execute_link_scripts=false, show_progress=false, platform=None, client=None, cache_dir=None, installed_packages=None, reinstall_packages=None, is_overlay=false))]
pub fn py_install<'a>(
py: Python<'a>,
records: Vec<Bound<'a, PyAny>>,
Expand All @@ -29,6 +29,7 @@ pub fn py_install<'a>(
cache_dir: Option<PathBuf>,
installed_packages: Option<Vec<Bound<'a, PyAny>>>,
reinstall_packages: Option<HashSet<String>>,
is_overlay: bool,
) -> PyResult<Bound<'a, PyAny>> {
let dependencies = records
.into_iter()
Expand Down Expand Up @@ -70,6 +71,8 @@ pub fn py_install<'a>(
installer.set_download_client(client);
}

installer.set_overlay(is_overlay);

if let Some(cache_dir) = cache_dir {
installer.set_package_cache(PackageCache::new(cache_dir));
}
Expand Down
120 changes: 120 additions & 0 deletions py-rattler/start_shell_in_dematerialized.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python3
import argparse
import ctypes
import ctypes.util
import os
import subprocess
import sys
from pathlib import Path
from tempfile import TemporaryDirectory

from rattler.shell import Shell, activate, ActivationVariables

libc = ctypes.CDLL(ctypes.util.find_library('c'), use_errno=True)
libc.mount.argtypes = (ctypes.c_char_p, ctypes.c_char_p, ctypes.c_char_p, ctypes.c_ulong, ctypes.c_char_p)


def mount(source, target, fs, options=''):
ret = libc.mount(source.encode(), str(target).encode(), fs.encode(), 0, options.encode())
if ret < 0:
errno = ctypes.get_errno()
raise OSError(errno, f"Error mounting {source} ({fs}) on {target} with options '{options}': {os.strerror(errno)}")


def build_overlay(temp_dir: Path, underlays: list[str], i: int = 0) -> tuple[str, dict[Path, str]]:
page_size = os.sysconf("SC_PAGE_SIZE")
mount_options: dict[Path, str] = {}
underlays = underlays.copy()
while underlays:
options = "lowerdir="
while underlays and len(options) + len(str(underlays[0])) < page_size:
options += f"{underlays.pop(0)}:"
options = options[:-1]
mount_options[temp_dir / str(i)] = options
i += 1
if len(mount_options) == 1:
lowerdirs = mount_options.pop(temp_dir / str(i - 1))
else:
lowerdirs, extra_mounts = build_overlay(temp_dir, list(mount_options), i)
mount_options |= extra_mounts
return lowerdirs, mount_options


def mount_overlay(target: Path, options: str, overlay_method: str):
match overlay_method:
case "native":
mount("overlay", target, "overlay", options)
case "overlayfs-fuse":
subprocess.run(["fuse-overlayfs", "-o", options, target])
case _:
raise NotImplementedError(f"Unsupported overlay method: {overlay_method}")


def main(prefix: Path, overlay_path: Path | None, overlay_method: str):
underlays_path = prefix / ".dematerialized" / "underlays"
if not underlays_path.exists():
raise RuntimeError("This is not a dematerialized environment!")
cache_underlays = [path.resolve(strict=True) for path in underlays_path.iterdir() if path.is_symlink()]

if overlay_path:
overlay_path.mkdir(parents=True, exist_ok=True)
work_path = overlay_path.with_suffix(".work")
work_path.mkdir(parents=True, exist_ok=True)

with TemporaryDirectory() as tempdir:
lowerdirs, mount_options = build_overlay(Path(tempdir), cache_underlays)

# Unshare mount and user namespaces, and map current user to root
uid = os.getuid()
gid = os.getgid()
os.unshare(os.CLONE_NEWNS | os.CLONE_NEWUSER)
Path("/proc/self/uid_map").write_text(f"0 {uid} 1")
Path("/proc/self/setgroups").write_text("deny")
Path("/proc/self/gid_map").write_text(f"0 {gid} 1")

# Make the root filesystem private
MS_REC = 0x4000
MS_PRIVATE = 0x40000
libc.mount("none".encode(), "/".encode(), None, MS_REC | MS_PRIVATE, None)

# It's only possible to pass up to PAGE_SIZE bytes of options to the
# mount syscall. If the options are too long, we need to mount the
# intermediate underlays first.
for path, options in mount_options.items():
print("Mounting intermediate underlay at path:", path)
path.mkdir(parents=True, exist_ok=False)
mount_overlay(path, options, overlay_method)

# Mount the final overlayfs instance
options = lowerdirs.replace('=', f'={prefix}:', 1)
if overlay_path:
options += f",upperdir={overlay_path},workdir={work_path}"
if len(options) > os.sysconf("SC_PAGE_SIZE"):
raise RuntimeError("Overlay options are too long!")
mount_overlay(prefix, options, overlay_method)

# Activate the environment
activation_script = Path(tempdir) / "activate"
actvars = ActivationVariables(None, sys.path)
a = activate(prefix, actvars, Shell.bash)
activation_script.write_text(
f"unset BASH_ENV\n{a.script}"
)

# TODO: We can't clean up the temporary directory if we exec into bash
# because the process will be replaced. Maybe we can set an atexit hook
# to clean up the temporary directory during the activation process?
cmd = ["bash", "--norc", "--noprofile", "-c", "exec bash --norc --noprofile"]
os.execvpe("bash", cmd, os.environ | {"BASH_ENV": str(activation_script)})


if __name__ == '__main__':
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--overlay-dir", type=Path, help="Path to overlay directory")
group.add_argument("--without-writable-overlay", action="store_true", help="Disable writable overlay")
parser.add_argument("--overlay-method", choices=["native", "overlayfs-fuse"], help="Overlay method to use", required=True)
parser.add_argument("prefix", type=Path)
args = parser.parse_args()

main(prefix=args.prefix, overlay_path=args.overlay_dir, overlay_method=args.overlay_method)
Loading
0