use tracing::{debug, trace, warn};
use crate::patches::apply_multi_file_patch;
use crate::sweep::mkdirp;
use crate::{Error, LinkStats, LinkStrategy, Linker, sys};
use aube_lockfile::LockedPackage;
use aube_store::{PackageIndex, StoredFile};
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
impl Linker {
/// Detect the best linking strategy for the filesystem at the given path.
///
/// One-arg form. Probes within one dir. Fine when store and
/// project node_modules share the same mount. Use the two-arg
/// form for installs where the store lives on a different
/// filesystem than the project (USB drives, bind mounts, Docker
/// volumes, cross-drive Windows installs). Otherwise the probe
/// reports hardlink based on project-FS self-test, then every
/// real link call crosses an FS boundary and hits EXDEV. Runtime
/// falls back to `fs::copy` per file silently, thousands of
/// wasted syscalls, user thinks they got hardlinks.
///
/// Returns `Hardlink` when the probe succeeds, `Copy` otherwise.
/// Reflink is reachable only through explicit
/// `packageImportMethod = clone` / `clone-or-copy`; `auto` resolves
/// to `Hardlink` because hardlink benchmarks faster across every
/// target reflink supports (APFS clonefile, btrfs/xfs FICLONE).
pub fn detect_strategy(path: &Path) -> LinkStrategy {
Self::detect_strategy_cross(path, path)
}
/// Two-arg probe. src is the store shard (or any dir on the
/// store FS), dst is the project modules dir (or any dir on the
/// destination FS). Probe creates a real cross-mount src file
/// and tries to hardlink into dst, which catches EXDEV up front.
/// Returns `Hardlink` when the probe succeeds, `Copy` otherwise.
pub fn detect_strategy_cross(src_dir: &Path, dst_dir: &Path) -> LinkStrategy {
// Memoize per (src_dir, dst_dir) for the process lifetime.
// The probe writes a real test file and tries hardlink,
// ~2 syscalls + 2 unlinks. Multiple Linker instances within
// one install (prewarm + final + per-workspace) all repeat
// the probe; cache the answer.
type ProbeKey = (std::path::PathBuf, std::path::PathBuf);
static CACHE: std::sync::OnceLock<
std::sync::RwLock<std::collections::HashMap<ProbeKey, LinkStrategy>>,
> = std::sync::OnceLock::new();
let key = (src_dir.to_path_buf(), dst_dir.to_path_buf());
let cache = CACHE.get_or_init(Default::default);
if let Some(hit) = cache.read().expect("probe cache poisoned").get(&key) {
return *hit;
}
let test_src = src_dir.join(".aube-link-test-src");
let test_dst = dst_dir.join(".aube-link-test-dst");
let strategy = if std::fs::write(&test_src, b"test").is_ok() {
let result = if std::fs::hard_link(&test_src, &test_dst).is_ok() {
LinkStrategy::Hardlink
} else {
LinkStrategy::Copy
};
let _ = std::fs::remove_file(&test_src);
let _ = std::fs::remove_file(&test_dst);
result
} else {
LinkStrategy::Copy
};
// First-write-wins via `entry().or_insert`. Two concurrent
// linker probes (prewarm + final) sharing the same
// (src_dir, dst_dir) can race on the test files: one observes
// hardlink-ok, the other sees the first writer's leftover and
// falls back to Copy. `.insert()` would let the wrong Copy
// result clobber the correct Hardlink for the rest of the
// process; `or_insert` keeps whichever value landed first.
*cache
.write()
.expect("probe cache poisoned")
.entry(key)
.or_insert(strategy)
}
/// Materialize a package in the global virtual store if not already present.
///
/// Materialize `dep_path` into the shared global virtual store.
///
/// Uses atomic rename to avoid TOCTOU races: materializes into a
/// PID-stamped temp directory, then renames into place. If another
/// process wins the race, its result is kept and the temp dir is
/// cleaned up.
///
/// Exposed so the install driver can pipeline GVS population into
/// the fetch phase: as each tarball finishes importing into the
/// CAS, the driver calls this to reflink the package into its
/// `~/.cache/aube/virtual-store/<subdir>` entry. Link step 1 then
/// hits the `pkg_nm_dir.exists()` fast path and only creates the
/// per-project `.aube/<dep_path>` symlink.
pub fn ensure_in_virtual_store(
&self,
dep_path: &str,
pkg: &LockedPackage,
index: &PackageIndex,
stats: &mut LinkStats,
// `link:` transitives the resolver pinned (e.g. via root
// `pnpm.overrides`) need their on-disk target so the parent's
// sibling symlink doesn't dangle into a non-existent
// `.aube/<name>@link+...`. `None` means "no nested links in
// this graph" and the materialize hot path stays unchanged.
nested_link_targets: Option<&BTreeMap<String, PathBuf>>,
) -> Result<(), Error> {
let _diag =
aube_util::diag::Span::new(aube_util::diag::Category::Linker, "ensure_in_vstore")
.with_meta_fn(|| {
format!(
r#"{{"name":{},"files":{}}}"#,
aube_util::diag::jstr(&pkg.name),
index.len()
)
});
// Global-store paths always run through the vstore_key map —
// when hashes are installed this folds dep-graph + engine
// state into the leaf name, so concurrent builds of the same
// package against different toolchains don't collide.
let subdir = self.virtual_store_subdir(dep_path);
let pkg_nm_dir = self
.virtual_store
.join(&subdir)
.join("node_modules")
.join(&pkg.name);
if pkg_nm_dir.exists() {
trace!("virtual store hit: {dep_path}");
stats.packages_cached += 1;
return Ok(());
}
// Materialize into a temp directory, then atomically rename into place
// to avoid TOCTOU races between concurrent `aube install` processes.
// `subdir` already comes from `dep_path_to_filename`, which
// flattens `/` to `+` as part of its escape pass, so it's
// already safe to splice into a single path component.
let tmp_name = format!(".tmp-{}-{subdir}", std::process::id());
let tmp_base = self.virtual_store.join(&tmp_name);
let result = self.materialize_into(
&tmp_base,
dep_path,
pkg,
index,
stats,
true,
nested_link_targets,
);
if result.is_err() {
let _ = std::fs::remove_dir_all(&tmp_base);
return result;
}
// Atomically move the dep_path entry from the temp dir to the final location.
let tmp_entry = tmp_base.join(&subdir);
let final_entry = self.virtual_store.join(&subdir);
// Ensure the parent of the final entry exists (e.g. for scoped packages).
if let Some(parent) = final_entry.parent() {
mkdirp(parent)?;
}
match aube_util::fs_atomic::rename_with_retry(&tmp_entry, &final_entry) {
Ok(()) => {
trace!("atomically placed {subdir} in virtual store");
}
Err(e) if final_entry.exists() => {
// Another process won the race — that's fine, use theirs.
trace!("lost rename race for {dep_path}, using existing: {e}");
// Undo the stats from our materialization since we're discarding it
stats.packages_linked = stats.packages_linked.saturating_sub(1);
stats.files_linked = stats.files_linked.saturating_sub(index.len());
stats.packages_cached += 1;
// Lost-race path: our `subdir` is still inside
// `tmp_base`, so a full recursive delete is needed.
let _ = std::fs::remove_dir_all(&tmp_base);
return Ok(());
}
Err(e) => {
let _ = std::fs::remove_dir_all(&tmp_base);
return Err(Error::Io(final_entry, e));
}
}
// Successful rename: `tmp_base` is now an empty wrapper directory
// (its single child was the subdir we just renamed out). Use
// `remove_dir` instead of `remove_dir_all` — the latter still
// does the full `opendir`/`fdopendir`(fcntl)/`readdir`/`close`
// walk even on an empty dir, which dtrace shows as ~6 extra
// syscalls per package. At 227 packages that's ~1.4k wasted
// syscalls on every cold install.
//
// `remove_dir` fails with `ENOTEMPTY` if a future change to
// `materialize_into` starts dropping extra files into
// `tmp_base`. Log at debug so the leak is observable without
// being fatal; the worst-case outcome is a stray tmp dir, and
// concurrent-writer races already use the full
// `remove_dir_all` branch above.
if let Err(e) = std::fs::remove_dir(&tmp_base) {
debug!(
"remove_dir({}) failed, leaving tmp in place: {e}",
tmp_base.display()
);
}
Ok(())
}
/// Materialize a single package directly into the per-project
/// virtual store at `aube_dir/<dep_path>/node_modules/<name>/`.
///
/// Idempotent: if the entry already exists, counts as cached and
/// returns. Used by the install-time materializer to pipeline the
/// link work into the fetch phase under non-GVS mode, so the
/// dedicated link phase only has to create top-level
/// `node_modules/<name>` symlinks.
pub fn ensure_in_aube_dir(
&self,
aube_dir: &Path,
dep_path: &str,
pkg: &LockedPackage,
index: &PackageIndex,
stats: &mut LinkStats,
nested_link_targets: Option<&BTreeMap<String, PathBuf>>,
) -> Result<(), Error> {
// `materialize_into` batches `create_dir_all` for every parent
// it needs, so callers don't have to mkdirp the entry's parent
// (which is just `aube_dir` itself, already created by the
// materializer driver).
let entry = aube_dir.join(self.aube_dir_entry_name(dep_path));
if entry.exists() {
stats.packages_cached += 1;
return Ok(());
}
self.materialize_into(
aube_dir,
dep_path,
pkg,
index,
stats,
false,
nested_link_targets,
)
}
/// Materialize a package's files and transitive dep symlinks into a base directory.
///
/// `apply_hashes` controls whether per-dep subdir names are run
/// through `vstore_key` (the content-addressed name) or used as
/// raw `dep_path` strings. Global-store callers pass `true` so
/// the shared `~/.cache/aube/virtual-store/` can hold isolated
/// copies for each `(deps_hash, engine)` combination;
/// per-project `.aube/` callers pass `false` because node's
/// runtime module walk resolves by dep_path only.
#[allow(clippy::too_many_arguments)]
pub(crate) fn materialize_into(
&self,
base_dir: &Path,
dep_path: &str,
pkg: &LockedPackage,
index: &PackageIndex,
stats: &mut LinkStats,
apply_hashes: bool,
// dep_path → absolute on-disk target for any `link:` packages
// referenced as transitive deps. When the parent itself is a
// `file:` Directory or `link:` Link (workspace-style locals),
// its `package.json` may declare `link:./libs/foo` deps that
// point inside the parent's source tree. We sidestep the
// virtual store for those — there is no `.aube/<dep>@link+...`
// entry — and symlink straight to the on-disk path the
// resolver pinned. `None` means "no nested link transitives in
// this graph", which is the common case.
nested_link_targets: Option<&BTreeMap<String, PathBuf>>,
) -> Result<(), Error> {
let subdir = if apply_hashes {
self.virtual_store_subdir(dep_path)
} else {
self.aube_dir_entry_name(dep_path)
};
let pkg_nm_dir = base_dir.join(&subdir).join("node_modules").join(&pkg.name);
// Pre-compute the set of unique parent directories across
// every file in the index AND every scoped transitive-dep
// symlink we're about to create, then mkdir them in a single
// pass. Previously each file looped through `mkdirp(parent)`
// which always did an `exists()` check (= statx syscall) even
// though the same parents were shared by dozens of siblings —
// `materialize_into` for a typical 32-file npm package
// resulted in ~25 redundant statx calls. Collecting the unique
// parents first, sorting by length (so ancestors precede
// descendants), and calling `create_dir_all` once each cuts
// out the redundant stats entirely. `BTreeSet` sorts
// lexicographically, which is good enough because every
// ancestor of a directory is a prefix of it.
let pkg_nm_parent = base_dir.join(&subdir).join("node_modules");
// Collect into Vec + sort + dedup instead of BTreeSet. For a
// package with thousands of files (typescript, next), the
// BTreeSet's per-insert log-N PathBuf comparison (~50-byte
// memcmps) was a measurable cost on top of the redundant
// create_dir_all that the set was deduplicating in the first
// place.
let mut parents: Vec<PathBuf> = Vec::with_capacity(index.len() / 4 + 4);
parents.push(pkg_nm_dir.clone());
// Validate every key once here. The file-linking loop below
// walks the same immutable index, so skipping the check
// there is safe.
for rel_path in index.keys() {
validate_index_key(rel_path)?;
let target = pkg_nm_dir.join(rel_path);
if let Some(parent) = target.parent() {
parents.push(parent.to_path_buf());
}
}
// Scoped transitive deps need `pkg_nm_parent/@scope/` to exist
// before the symlink call; include those parents in the batch.
for dep_name in pkg.dependencies.keys() {
if let Some(slash) = dep_name.find('/')
&& dep_name.starts_with('@')
{
parents.push(pkg_nm_parent.join(&dep_name[..slash]));
}
}
parents.sort_unstable();
parents.dedup();
for parent in &parents {
std::fs::create_dir_all(parent).map_err(|e| Error::Io(parent.clone(), e))?;
}
// `materialize_into` always writes into a fresh location
// (either a `.tmp-<pid>-...` staging dir for the global virtual
// store or a per-project `.aube/<dep_path>` just created by
// the caller), so we can skip the `remove_file(dst)` that
// `link_file` does defensively. Pass `fresh = true` to suppress
// the unlink syscall on every file. For a 1.4k-package install
// that's ~45k wasted `unlink` calls on the hot path.
for (rel_path, stored) in index {
// Key already validated in the parent-collection loop
// above. The index is immutable between the two loops.
let target = pkg_nm_dir.join(rel_path);
if let Err(e) = self.link_file_fresh(stored, rel_path, &target) {
if let Error::MissingStoreFile { .. } = &e {
invalidate_stale_index_for_package(&self.store, pkg);
}
return Err(e);
}
stats.files_linked += 1;
if stored.executable {
// `create_cas_file` writes every CAS entry as 0o644
// unconditionally; the only place a CAS entry's
// shared inode gets the +x bit is the very first
// `make_executable` call against a hardlinked or
// reflinked target — that `chmod` upgrades the
// shared inode for every later linker that points
// at it. Skipping the call (an earlier optimization)
// produced 0o644 binaries on cold installs and
// broke every CLI shipped via npm.
#[cfg(unix)]
xx::file::make_executable(&target).map_err(|e| Error::Xx(e.to_string()))?;
}
}
// Apply any user-supplied patch for this `(name, version)`.
// Patches are applied *after* the files have been linked into
// the virtual store but *before* transitive symlinks, so the
// patched bytes live alongside the unpatched ones at a
// distinct subdir (the graph hash callback is responsible for
// making sure that's true).
let patch_key = pkg.spec_key();
if let Some(patch_text) = self.patches.get(&patch_key) {
apply_multi_file_patch(&pkg_nm_dir, patch_text)
.map_err(|msg| Error::Patch(patch_key.clone(), msg))?;
}
// Create symlinks for transitive dependencies. Parents for
// scoped packages were added to the `parents` batch above, so
// we no longer need a per-symlink mkdirp. We also skip the
// `symlink_metadata().is_ok()` existence check: callers
// guarantee the target directory is freshly created (either a
// `.tmp-<pid>-...` staging dir for the global virtual store or
// a per-project `.aube/<dep_path>` that the caller just
// ensured is empty), so nothing can be in the way.
for (dep_name, dep_version) in &pkg.dependencies {
let dep_dep_path = format!("{dep_name}@{dep_version}");
// Skip any dep whose name matches the package being
// materialized, regardless of version. The symlink would
// land at `pkg_nm_parent.join(dep_name)` which is exactly
// `pkg_nm_dir` — the directory we just populated with the
// package's own files — and `create_dir_link` would fail
// EEXIST. The skip used to require version-equality too,
// but published packages occasionally declare a *different*
// version of themselves as a dep (e.g. `react_ujs@3.3.0`
// pins `react_ujs@^2.7.1`, an artifact of how its build
// script generates its package.json). Treat that as a
// self-reference: `require('<self>')` from inside the
// package resolves to its own files, matching what npm /
// pnpm / yarn end up with after their hoisting passes.
if dep_name == &pkg.name {
continue;
}
let symlink_path = pkg_nm_parent.join(dep_name);
// `link:` transitive: the resolver pinned an absolute
// on-disk target. Skip the virtual-store sibling lookup
// (there is no `.aube/<dep>@link+...` entry for these) and
// symlink straight at the source directory.
//
// Store the absolute target verbatim. A relative path
// would have to thread two pitfalls at once: the GVS
// tmp→final rename (link's own depth changes by one) AND
// macOS `/tmp`→`/private/tmp` symlink expansion (the dir
// the OS resolves the link from is one level deeper than
// `self.virtual_store` lexically suggests). Either alone
// is fixable; together every `pathdiff` variant lands one
// component off and the link dangles. Sibling symlinks
// get away with relative paths because both endpoints
// live inside `base_dir` and move together; nested-link
// targets are *external* (under `project_dir`) so the
// tricks that work for siblings don't apply. Windows
// already uses absolute targets for the same reason (see
// the `#[cfg(windows)]` block below).
if let Some(map) = nested_link_targets
&& let Some(abs_target) = map.get(&dep_dep_path)
{
sys::create_dir_link(abs_target, &symlink_path)
.map_err(|e| Error::Io(symlink_path.clone(), e))?;
continue;
}
// Match the parent's convention: global-store materialization
// walks sibling subdirs under their hashed names, while the
// per-project `.aube/` layout uses raw dep_paths.
let sibling_subdir = if apply_hashes {
self.virtual_store_subdir(&dep_dep_path)
} else {
self.aube_dir_entry_name(&dep_dep_path)
};
// Compute the relative path from the symlink's parent to
// the sibling dep directory. The symlink's parent is
// `pkg_nm_parent/` for a bare name but
// `pkg_nm_parent/@scope/` for a scoped one, so we can't
// hard-code `../..` — doing so would undercount by one
// level for every scoped transitive dep and produce a
// dangling link. `pathdiff::diff_paths` walks the
// difference for us, yielding `../..` for `foo` and
// `../../..` for `@vue/shared`, both relative to whatever
// parent `symlink_path` ends up with.
// `pkg_nm_parent` is `<base_dir>/<subdir>/node_modules/`, so
// two parents deep brings us to `<base_dir>/` where all
// sibling subdirs live side-by-side.
let virtual_root = pkg_nm_parent
.parent()
.and_then(Path::parent)
.unwrap_or(&pkg_nm_parent);
let sibling_abs = virtual_root
.join(&sibling_subdir)
.join("node_modules")
.join(dep_name);
let link_parent = symlink_path.parent().unwrap_or(&pkg_nm_parent);
let target = pathdiff::diff_paths(&sibling_abs, link_parent)
.unwrap_or_else(|| sibling_abs.clone());
// GVS materialize writes into `.tmp-<pid>-<subdir>/`, then
// atomic-renames into `self.virtual_store/<subdir>/`. POSIX
// symlinks store the relative offset verbatim. Offset stays
// invariant under the wrapper rename, so the link resolves
// correctly after the move. Windows junctions resolve the
// target against `link.parent()` at create time and persist
// an absolute path, which binds the junction to the tmp
// wrapper. After rename every sibling link dangles into a
// gone `.tmp-<pid>-...` path. Fix: on Windows GVS path
// (`apply_hashes = true`) rewrite the target to point at
// the final virtual store root so the stored absolute path
// survives the rename.
#[cfg(windows)]
let target = if apply_hashes {
self.virtual_store
.join(&sibling_subdir)
.join("node_modules")
.join(dep_name)
} else {
target
};
sys::create_dir_link(&target, &symlink_path)
.map_err(|e| Error::Io(symlink_path.clone(), e))?;
}
stats.packages_linked += 1;
trace!("materialized {dep_path} ({} files)", index.len());
Ok(())
}
/// Hardlink-or-copy a file into a freshly-created destination.
/// Assumes `dst` does not exist — callers (`materialize_into`)
/// always write into a `.tmp-<pid>-...` staging dir or a
/// just-wiped per-project `.aube/<dep_path>`, so the defensive
/// `remove_file(dst)` an idempotent variant would need is skipped.
/// Eliminates one syscall per linked file (~45k on the medium
/// benchmark fixture).
pub(crate) fn link_file_fresh(
&self,
stored: &StoredFile,
rel_path: &str,
dst: &Path,
) -> Result<(), Error> {
#[cfg(target_os = "macos")]
const SMALL_FILE_COPY_MAX: u64 = 16 * 1024;
let map_io = |e: std::io::Error| classify_link_error(stored, rel_path, dst, e);
let missing_source = || Error::MissingStoreFile {
store_path: stored.store_path.clone(),
rel_path: rel_path.to_string(),
};
// Track the realized strategy (may differ from `self.strategy` when
// a reflink or hardlink falls back to copy) for diagnostic
// attribution. Diag emits a `linker.link_<strategy>` event with
// the per-file duration so the analyzer can break down link cost
// by realized path: reflink (zero-copy CoW), hardlink (zero-cost
// metadata link), copy (full byte transfer), or the
// small-file-copy short circuit on macOS.
let diag_t0 = aube_util::diag::enabled().then(std::time::Instant::now);
let realized: &'static str;
match self.strategy {
LinkStrategy::Reflink => {
#[cfg(target_os = "macos")]
if matches!(stored.size, Some(size) if size <= SMALL_FILE_COPY_MAX) {
std::fs::copy(&stored.store_path, dst).map_err(map_io)?;
if let Some(t0) = diag_t0 {
aube_util::diag::event(
aube_util::diag::Category::Linker,
"link_macos_small_copy",
t0.elapsed(),
None,
);
}
return Ok(());
}
if let Err(e) = reflink_copy::reflink(&stored.store_path, dst) {
// Source-missing short-circuit avoids the misleading
// "fell back to copy" trace and the redundant copy
// attempt that would just ENOENT for the same reason.
if !stored.store_path.exists() {
return Err(missing_source());
}
// Fall back to copy on cross-filesystem errors
trace!("reflink failed, falling back to copy: {e}");
std::fs::copy(&stored.store_path, dst).map_err(map_io)?;
realized = "reflink_fallback_copy";
} else {
realized = "reflink";
}
}
LinkStrategy::Hardlink => {
if let Err(e) = std::fs::hard_link(&stored.store_path, dst) {
if !stored.store_path.exists() {
return Err(missing_source());
}
// Fall back to copy on cross-filesystem errors (EXDEV)
trace!("hardlink failed, falling back to copy: {e}");
std::fs::copy(&stored.store_path, dst).map_err(map_io)?;
realized = "hardlink_fallback_copy";
} else {
realized = "hardlink";
}
}
LinkStrategy::Copy => {
std::fs::copy(&stored.store_path, dst).map_err(map_io)?;
realized = "copy";
}
}
if let Some(t0) = diag_t0 {
// `realized` is one of seven static strings; matching is
// O(1) and the static `&str` keeps the JSONL category compact.
let name = match realized {
"reflink" => "link_reflink",
"reflink_fallback_copy" => "link_reflink_fallback",
"hardlink" => "link_hardlink",
"hardlink_fallback_copy" => "link_hardlink_fallback",
"copy" => "link_copy",
"macos_small_copy" => "link_macos_small_copy",
_ => "link_unknown",
};
aube_util::diag::event(aube_util::diag::Category::Linker, name, t0.elapsed(), None);
}
Ok(())
}
}
/// Translate a copy failure into the most informative linker error.
/// ENOENT can mean either side of the operation is missing — stat the
/// source CAS shard to attribute it. A missing shard means the cached
/// package index is out of sync with the on-disk store, which the
/// caller can recover from by invalidating the cached index and
/// re-importing the tarball.
fn classify_link_error(
stored: &StoredFile,
rel_path: &str,
dst: &Path,
err: std::io::Error,
) -> Error {
if err.kind() == std::io::ErrorKind::NotFound && !stored.store_path.exists() {
return Error::MissingStoreFile {
store_path: stored.store_path.clone(),
rel_path: rel_path.to_string(),
};
}
Error::Io(dst.to_path_buf(), err)
}
/// Best-effort drop the cached package index when materialize discovers
/// its referenced CAS shard is gone. Callers always surface the original
/// `MissingStoreFile` error first; this side effect just makes sure the
/// next install miss `load_index` instead of looping on the same dead
/// reference. If the cache write fails (e.g. permission error), warn
/// loudly so the user knows the auto-recovery didn't take and they need
/// to wipe the index dir by hand (run `aube store path` to find it).
pub(crate) fn invalidate_stale_index_for_package(store: &aube_store::Store, pkg: &LockedPackage) {
match store.invalidate_cached_index(pkg.registry_name(), &pkg.version, pkg.integrity.as_deref())
{
Ok(true) => debug!("invalidated stale index for {}", pkg.spec_key()),
Ok(false) => {}
Err(e) => warn!(
"failed to invalidate stale index for {}: {e}; manual recovery: rm -rf \"$(aube store path)/index\"",
pkg.spec_key()
),
}
}
/// Defence in depth for the tarball path-traversal class. The
/// primary guard lives in `aube_store::import_tarball`, which
/// refuses malformed entries before they enter the `PackageIndex`.
/// This helper is the last check before `base.join(key)` is
/// written through the linker, so an index loaded from a cache
/// file that predates the store-side validation (or a bug that
/// lets a traversing key slip past it) still cannot produce a
/// file outside the package root.
pub(crate) fn validate_index_key(key: &str) -> Result<(), Error> {
if key.is_empty()
|| key.starts_with('/')
|| key.starts_with('\\')
|| key.contains('\0')
|| key.contains('\\')
{
return Err(Error::UnsafeIndexKey(key.to_string()));
}
// Reject any `..` component or Windows drive prefix like `C:`
// that would make `Path::join` escape the base.
for component in std::path::Path::new(key).components() {
match component {
std::path::Component::ParentDir
| std::path::Component::RootDir
| std::path::Component::Prefix(_) => {
return Err(Error::UnsafeIndexKey(key.to_string()));
}
std::path::Component::Normal(os) => {
#[cfg(windows)]
{
if let Some(s) = os.to_str()
&& s.contains(':')
{
return Err(Error::UnsafeIndexKey(key.to_string()));
}
}
#[cfg(not(windows))]
{
let _ = os;
}
}
std::path::Component::CurDir => {}
}
}
Ok(())
}