Skip to main content

crates/aube-lockfile/src/pnpm/read.rs

use super::dep_path::{
    parse_dep_path, peerless_alias_target, rewrite_snapshot_alias_deps, version_to_dep_path,
};
use super::raw::{RawDepSpec, local_source_from_resolution, parse_raw_lockfile};
use crate::{
    CatalogEntry, DepType, DirectDep, Error, LocalSource, LockedPackage, LockfileGraph, PeerDepMeta,
};
use std::collections::BTreeMap;
use std::path::Path;

/// Parse a pnpm-lock.yaml file into a LockfileGraph.
pub fn parse(path: &Path) -> Result<LockfileGraph, Error> {
    let content = crate::read_lockfile(path)?;
    let raw = parse_raw_lockfile(&content)
        .map_err(|e| Error::parse_yaml_err(path, content.clone(), &e))?;

    // Parse importers (direct deps of each workspace package).
    // We track synthesized LockedPackages for local (`file:` / `link:`)
    // deps here so the main packages loop below doesn't try to process
    // them off the canonical lockfile key.
    let mut importers = BTreeMap::new();
    let mut local_packages: BTreeMap<String, LockedPackage> = BTreeMap::new();
    let mut skipped_optional_dependencies: BTreeMap<String, BTreeMap<String, String>> =
        BTreeMap::new();
    // pnpm v9 encodes npm-aliases implicitly: the importer key is
    // the alias (`express-fork`), `specifier:` carries `npm:<real>@<range>`,
    // and `version:` is `<real>@<resolved>`. There is no `aliasOf:`
    // field — that's an aube-specific writer extension. We record
    // each alias here and synthesize an alias-keyed LockedPackage
    // after the canonical packages loop, mirroring the shape the
    // resolver-fresh path emits so the linker stays single-shape.
    // Tuple: (alias_dep_path, real_dep_path, alias_name, real_name).
    let mut alias_remaps: Vec<(String, String, String, String)> = Vec::new();

    let mut push_direct = |deps: &mut Vec<DirectDep>,
                           alias_remaps: &mut Vec<(String, String, String, String)>,
                           name: &str,
                           info: &RawDepSpec,
                           dep_type: DepType| {
        // pnpm appends a `(peer@ver)` suffix to the importer
        // `version:` of URL- and git-based direct deps when the
        // resolved snapshot carries peer context, the same way it
        // does for semver versions. `LocalSource::parse` treats the
        // whole string as the URL, so a RemoteTarballSource built
        // from the raw value fetches `…/tar.gz/SHA(peer@ver)` and
        // 404s. Strip it here so the URL that reaches the fetcher
        // and the dep_path hash are both peer-context-free —
        // consistent with what `parse_dep_path` does for snapshot
        // keys downstream.
        let classify_version = info.version.split('(').next().unwrap_or(&info.version);
        if let Some(local) = LocalSource::parse(classify_version, Path::new("")) {
            // `Path::new("")` means tarball-vs-dir classification is
            // skipped; we default to Directory and rely on the
            // resolver's on-disk re-read for the authoritative source
            // type during a subsequent `aube install` (lockfile-only
            // path never materializes local deps anyway before the
            // fetch step re-classifies).
            //
            // Re-classify Directory → Tarball if the path looks
            // like a tarball filename, so `.tgz`/`.tar.gz`
            // targets round-trip correctly even when the file
            // isn't present at parse time. The filename
            // heuristic lives on `LocalSource` so this stays in
            // lockstep with `LocalSource::parse`.
            let local = match local {
                LocalSource::Directory(p) if LocalSource::path_looks_like_tarball(&p) => {
                    LocalSource::Tarball(p)
                }
                // Importer `version:` for git deps is the canonical
                // `<url>#<commit>` form pnpm writes. The parser
                // puts the `<commit>` into `committish`; since
                // this is a lockfile round-trip (not a raw user
                // spec), treat it as the pinned commit.
                LocalSource::Git(mut g) if g.resolved.is_empty() => {
                    if let Some(c) = g.committish.take() {
                        g.resolved = c;
                    }
                    LocalSource::Git(g)
                }
                other => other,
            };
            let dep_path = local.dep_path(name);
            deps.push(DirectDep {
                name: name.to_string(),
                dep_path: dep_path.clone(),
                dep_type,
                specifier: Some(info.specifier.clone()),
            });
            local_packages
                .entry(dep_path.clone())
                .or_insert_with(|| LockedPackage {
                    name: name.to_string(),
                    version: "0.0.0".to_string(),
                    integrity: None,
                    dependencies: BTreeMap::new(),
                    peer_dependencies: BTreeMap::new(),
                    peer_dependencies_meta: BTreeMap::new(),
                    dep_path,
                    local_source: Some(local),
                    ..Default::default()
                });
        } else {
            // Detect npm-aliased deps purely from the shape of
            // `version:`. pnpm encodes aliases as
            // `<real_name>@<resolved>(peers…)` regardless of how the
            // alias was declared:
            //   - direct:  `specifier: npm:beamcoder-prebuild@0.7.1`
            //   - catalog: `specifier: 'catalog:'` (the alias lives
            //              in `pnpm-workspace.yaml#catalog`)
            // The earlier `specifier.starts_with("npm:")` gate missed
            // the catalog flavor and silently dropped those deps.
            // Strip any peer suffix before parsing so `version:
            // 18.2.0(react@18.2.0)` (a regular dep with peers) does
            // not parse as `name="18.2.0(react"`.
            let bare_version = info
                .version
                .split('(')
                .next()
                .unwrap_or(info.version.as_str());
            let dep_path = if let Some((real_name, resolved)) = parse_dep_path(bare_version)
                && real_name != name
            {
                let peer_suffix = info
                    .version
                    .find('(')
                    .map(|i| &info.version[i..])
                    .unwrap_or("");
                let alias_dep_path = format!("{name}@{resolved}{peer_suffix}");
                let real_dep_path = info.version.clone();
                alias_remaps.push((
                    alias_dep_path.clone(),
                    real_dep_path,
                    name.to_string(),
                    real_name,
                ));
                alias_dep_path
            } else {
                version_to_dep_path(name, &info.version)
            };
            deps.push(DirectDep {
                name: name.to_string(),
                dep_path,
                dep_type,
                specifier: Some(info.specifier.clone()),
            });
        }
    };

    for (importer_path, importer) in &raw.importers {
        // pnpm writes the workspace root as either `'.'` (most
        // common / current) or `''` (seen on v9 lockfiles in the
        // wild, e.g. npmx.dev). Both mean "the repo root" — we key
        // the graph on `.` everywhere downstream (linker, filters,
        // stats), so normalize at parse time and keep the rest of
        // the pipeline single-shape.
        let importer_path = if importer_path.is_empty() {
            "."
        } else {
            importer_path.as_str()
        };

        // Guard against a malformed lockfile that writes both `''`
        // and `'.'` for root — `BTreeMap` iteration visits `''`
        // first, so the real `'.'` entry would otherwise silently
        // overwrite the normalized empty-key entry. pnpm never
        // emits this, but skipping the second visit is cheap and
        // makes the intent explicit.
        if importers.contains_key(importer_path) {
            continue;
        }

        let mut deps = Vec::new();

        if let Some(ref d) = importer.dependencies {
            for (name, info) in d {
                push_direct(
                    &mut deps,
                    &mut alias_remaps,
                    name,
                    info,
                    DepType::Production,
                );
            }
        }
        if let Some(ref d) = importer.dev_dependencies {
            for (name, info) in d {
                push_direct(&mut deps, &mut alias_remaps, name, info, DepType::Dev);
            }
        }
        if let Some(ref d) = importer.optional_dependencies {
            for (name, info) in d {
                push_direct(&mut deps, &mut alias_remaps, name, info, DepType::Optional);
            }
        }

        if let Some(ref d) = importer.skipped_optional_dependencies
            && !d.is_empty()
        {
            let mut map = BTreeMap::new();
            for (name, info) in d {
                map.insert(name.clone(), info.specifier.clone());
            }
            skipped_optional_dependencies.insert(importer_path.to_string(), map);
        }

        importers.insert(importer_path.to_string(), deps);
    }

    // pnpm v9 splits packages (canonical, keyed by `name@version`) from
    // snapshots (contextualized, keyed by the full dep_path with any
    // `(peer@ver)` suffix). The LockfileGraph needs one entry per snapshot
    // — the same canonical package can produce multiple snapshots when
    // different parts of the tree resolve its peers differently.
    //
    // If `snapshots:` is missing (older aube lockfiles where we wrote
    // everything into packages), fall back to iterating packages directly.
    let mut packages = BTreeMap::new();

    // Harvest snapshot dependencies for any local (`file:`) package
    // that showed up in the importers loop. The canonical snapshot
    // key for a local dep is `<name>@<specifier>` — e.g.
    // `foo@file:./vendor/foo` — so we construct it from each
    // synthesized entry and pull its `dependencies` block out of the
    // raw snapshots map.
    for local_pkg in local_packages.values_mut() {
        if let Some(ref local) = local_pkg.local_source {
            let canonical = format!("{}@{}", local_pkg.name, local.specifier());
            // URL-based direct deps have their peer-context suffix
            // stripped (see `push_direct`), but the matching snapshot
            // entry pnpm wrote still carries the suffix. Fall back to
            // any snapshot whose peer-stripped canonical matches so
            // transitive dependency metadata still flows through.
            let snap = raw.snapshots.get(&canonical).or_else(|| {
                raw.snapshots.iter().find_map(|(k, v)| {
                    parse_dep_path(k)
                        .filter(|(n, ver)| format!("{n}@{ver}") == canonical)
                        .map(|_| v)
                })
            });
            if let Some(snap) = snap
                && let Some(mut deps) = snap.dependencies.clone()
            {
                rewrite_snapshot_alias_deps(&mut deps, &mut alias_remaps);
                local_pkg.dependencies = deps;
            }
            if let Some(snap) = snap
                && let Some(mut opt_deps) = snap.optional_dependencies.clone()
            {
                rewrite_snapshot_alias_deps(&mut opt_deps, &mut alias_remaps);
                local_pkg.dependencies.extend(opt_deps.clone());
                local_pkg.optional_dependencies = opt_deps;
            }
            // Prefer the authoritative LocalSource classification
            // from the `resolution:` block over the guess the
            // importers loop made from the bare specifier. For git
            // deps, preserve any `path:` selector already captured
            // from the importer's `version:` URL — pnpm v9 encodes
            // the subpath in the snapshot key and doesn't always
            // echo it on the resolution block.
            if let Some(pkg_info) = raw.packages.get(&canonical)
                && let Some(ref res) = pkg_info.resolution
                && let Some(mut ls) = local_source_from_resolution(res)
            {
                if let LocalSource::Git(ref mut g) = ls
                    && g.subpath.is_none()
                    && let Some(LocalSource::Git(prior)) = &local_pkg.local_source
                {
                    g.subpath = prior.subpath.clone();
                }
                local_pkg.local_source = Some(ls);
            }
        }
    }
    // Rebuild keys in case the local_source rewrite above changed
    // the classification — kind alone doesn't affect the encoded
    // dep_path (the hash is over the path string only), but the
    // `resolution:` block can also hand us a *different path* than
    // the importer's specifier, which does. Recompute both the map
    // key and the struct field from the final `local_source` so
    // `graph.packages.get(&dep.dep_path)` stays consistent with how
    // DirectDeps were keyed up in the importer loop above. Note
    // that any reclassification with a *new path* would leave the
    // DirectDep still pointing at the old key; pnpm's lockfiles
    // don't do that today, so we treat the re-keying as
    // defensive-only and assert equality in debug builds.
    let mut rekeyed: BTreeMap<String, LockedPackage> = BTreeMap::new();
    for (old_key, mut pkg) in local_packages {
        let new_key = pkg.local_source.as_ref().unwrap().dep_path(&pkg.name);
        pkg.dep_path = new_key.clone();
        debug_assert_eq!(
            old_key, new_key,
            "local dep_path shifted during reclassification — DirectDeps still reference {old_key}"
        );
        rekeyed.insert(new_key, pkg);
    }
    let local_packages = rekeyed;
    // Canonical keys the main loop should ignore — those are the
    // snapshot keys we already absorbed above.
    let local_canonical_keys: std::collections::HashSet<String> = local_packages
        .values()
        .filter_map(|p| {
            p.local_source
                .as_ref()
                .map(|l| format!("{}@{}", p.name, l.specifier()))
        })
        .collect();

    let snapshot_keys: Vec<String> = if raw.snapshots.is_empty() {
        raw.packages.keys().cloned().collect()
    } else {
        raw.snapshots.keys().cloned().collect()
    };

    for dep_path in snapshot_keys {
        if local_canonical_keys.contains(&dep_path) {
            continue;
        }
        let (name, version) = parse_dep_path(&dep_path)
            .ok_or_else(|| Error::parse(path, format!("invalid dep path: {dep_path}")))?;
        // URL-based direct deps are absorbed into `local_packages`
        // under the peer-stripped URL form (see `push_direct`), but the
        // snapshot key still carries any `(peer@ver)` suffix pnpm
        // appended. Check the peer-stripped canonical too so we don't
        // create a duplicate entry that round-trips as a stray
        // `packages:` block.
        if local_canonical_keys.contains(&format!("{name}@{version}")) {
            continue;
        }

        // Look up the canonical package entry by stripping any peer suffix.
        let canonical_key = version_to_dep_path(&name, &version);
        let pkg_info = raw
            .packages
            .get(&canonical_key)
            .or_else(|| raw.packages.get(&dep_path));

        let integrity = pkg_info
            .and_then(|p| p.resolution.as_ref())
            .and_then(|r| r.integrity.clone());

        // Registry packages record a `tarball:` URL only when
        // `lockfileIncludeTarballUrl=true` was active at write time.
        // Preserve it on read so the round-trip writes the same URL
        // back without having to reconsult the registry client.
        //
        // pnpm also writes a `tarball:` entry for non-registry transitive
        // deps whose key is a URL (remote tarball from a github override,
        // pkg.pr.new, etc.) — capture those on the same field so the
        // install path can fetch them verbatim instead of deriving a
        // registry URL that would 404.
        let tarball_url = pkg_info
            .and_then(|p| p.resolution.as_ref())
            .and_then(|r| r.tarball.as_ref())
            .filter(|t| t.starts_with("http://") || t.starts_with("https://"))
            .cloned();

        // pnpm writes `version: <semver>` alongside non-registry entries
        // whose dep-path key is a URL. Prefer that over the URL itself
        // when the dep-path version isn't a real semver — the install
        // path uses `pkg.version` for the store-content cross-check,
        // and comparing a URL to the tarball's declared `2.4.1` would
        // fail every github override'd package.
        //
        // Gated on `tarball_url.is_some()` so the swap only applies to
        // the remote-tarball case (where the URL is recoverable from
        // `resolution.tarball` at write time). `git+`/`git://` /
        // `.git#sha` transitive entries resolve through
        // `resolution: {type: git, commit, repo}` and need a separate
        // round-trip path — they stay on the pre-existing URL-as-
        // version behavior until that path lands.
        let version_is_http_url = version.starts_with("http://") || version.starts_with("https://");
        let version = if version_is_http_url && tarball_url.is_some() {
            pkg_info.and_then(|p| p.version.clone()).unwrap_or(version)
        } else {
            version
        };

        let snapshot = raw.snapshots.get(&dep_path);
        let mut optional_dependencies = snapshot
            .and_then(|s| s.optional_dependencies.clone())
            .unwrap_or_default();
        let mut dependencies = snapshot
            .and_then(|s| s.dependencies.clone())
            .unwrap_or_default();
        rewrite_snapshot_alias_deps(&mut dependencies, &mut alias_remaps);
        rewrite_snapshot_alias_deps(&mut optional_dependencies, &mut alias_remaps);
        dependencies.extend(optional_dependencies.clone());
        let bundled_dependencies = snapshot
            .and_then(|s| s.bundled_dependencies.clone())
            .unwrap_or_default();
        let optional = snapshot.and_then(|s| s.optional).unwrap_or(false);
        let transitive_peer_dependencies = snapshot
            .and_then(|s| s.transitive_peer_dependencies.clone())
            .unwrap_or_default();

        let peer_dependencies = pkg_info
            .and_then(|p| p.peer_dependencies.clone())
            .unwrap_or_default();
        let peer_dependencies_meta = pkg_info
            .and_then(|p| p.peer_dependencies_meta.clone())
            .unwrap_or_default()
            .into_iter()
            .map(|(k, v)| {
                (
                    k,
                    PeerDepMeta {
                        optional: v.optional,
                    },
                )
            })
            .collect();
        let os = pkg_info.map(|p| p.os.clone()).unwrap_or_default();
        let cpu = pkg_info.map(|p| p.cpu.clone()).unwrap_or_default();
        let libc = pkg_info.map(|p| p.libc.clone()).unwrap_or_default();
        let engines = pkg_info.map(|p| p.engines.clone()).unwrap_or_default();
        // pnpm's lockfile only stores `hasBin: true/false` (no paths);
        // reconstruct an opaque single-entry map on parse so
        // `!bin.is_empty()` stays equivalent to `hasBin`, then let
        // downstream writers fill in real paths when they have them.
        // The map key + value are placeholders — writers that care
        // about bin names (bun) read from richer sources.
        let bin = if pkg_info.map(|p| p.has_bin).unwrap_or(false) {
            let mut m = BTreeMap::new();
            m.insert(String::new(), String::new());
            m
        } else {
            BTreeMap::new()
        };
        // Aube-specific extension (see `WritablePackageInfo::alias_of`)
        // — ordinary pnpm lockfiles never carry it, so this stays
        // `None` on pnpm-authored input and round-trips the resolver-
        // emitted value for aliased packages.
        let alias_of = pkg_info.and_then(|p| p.alias_of.clone());

        // Reclassify transitive URL-keyed entries — github forks,
        // pkg.pr.new, `file:` targets — so they round-trip with the
        // right `local_source`. Without this, the install path sees
        // `local_source: None` + a URL-form version and tries to
        // fetch the dep from the npm registry (404).
        let local_source = pkg_info
            .and_then(|p| p.resolution.as_ref())
            .and_then(local_source_from_resolution);
        // `lockfileIncludeTarballUrl` puts registry tarball URLs on
        // ordinary `name@version` entries; only URL-keyed entries are
        // true remote-tarball deps.
        let local_source = match local_source {
            Some(LocalSource::RemoteTarball(_)) if !version_is_http_url => None,
            other => other,
        };

        packages.insert(
            dep_path.clone(),
            LockedPackage {
                name,
                version,
                integrity,
                dependencies,
                optional_dependencies,
                peer_dependencies,
                peer_dependencies_meta,
                dep_path,
                local_source,
                os: os.into(),
                cpu: cpu.into(),
                libc: libc.into(),
                bundled_dependencies,
                optional,
                transitive_peer_dependencies,
                tarball_url,
                alias_of,
                yarn_checksum: None,
                engines,
                bin,
                // pnpm's `snapshots:` only records resolved pins, so
                // the parser has no declared ranges to restore. Left
                // empty; npm / yarn / bun writers fall back to pins
                // when re-emitting a pnpm-sourced graph into one of
                // their formats.
                declared_dependencies: BTreeMap::new(),
                // pnpm's format doesn't carry per-package license or
                // funding metadata, so a pnpm → npm conversion
                // degrades to empty rather than re-fetching each
                // packument. npm writers skip these fields when
                // `None`.
                license: None,
                funding_url: None,
                extra_meta: BTreeMap::new(),
            },
        );
    }

    // Synthesize alias-keyed LockedPackages for npm-aliased importer
    // deps. pnpm v9 only writes the canonical (real-name-keyed) entry
    // in `packages:`; we clone it under the alias dep_path with
    // `name=alias` and `alias_of=Some(real)` so the linker — which
    // already supports this shape via the resolver-fresh path — can
    // create `node_modules/<alias>` symlinks correctly.
    for (alias_dep_path, real_dep_path, alias_name, real_name) in alias_remaps {
        // Skip if the alias entry already exists (aube-written
        // lockfile that emitted both `aliasOf:` and an alias-keyed
        // packages entry).
        if packages.contains_key(&alias_dep_path) {
            continue;
        }
        let Some(real_pkg) = packages
            .get(&real_dep_path)
            .or_else(|| peerless_alias_target(&packages, &real_dep_path))
        else {
            return Err(Error::parse(
                path,
                format!(
                    "npm-alias references missing package {real_dep_path} (alias dep_path: {alias_dep_path})"
                ),
            ));
        };
        let mut aliased = real_pkg.clone();
        aliased.name = alias_name;
        aliased.dep_path = alias_dep_path.clone();
        aliased.alias_of = Some(real_name);
        packages.insert(alias_dep_path, aliased);
    }

    for (k, v) in local_packages {
        packages.insert(k, v);
    }

    let settings = raw
        .settings
        .map(|s| crate::LockfileSettings {
            auto_install_peers: s.auto_install_peers.unwrap_or(true),
            exclude_links_from_lockfile: s.exclude_links_from_lockfile.unwrap_or(false),
            lockfile_include_tarball_url: s.lockfile_include_tarball_url.unwrap_or(false),
        })
        .unwrap_or_default();

    let times = raw.time.unwrap_or_default();

    let catalogs = raw
        .catalogs
        .unwrap_or_default()
        .into_iter()
        .map(|(name, entries)| {
            let inner = entries
                .into_iter()
                .map(|(pkg, e)| {
                    (
                        pkg,
                        CatalogEntry {
                            specifier: e.specifier,
                            version: e.version,
                        },
                    )
                })
                .collect();
            (name, inner)
        })
        .collect();

    let patched_dependencies: BTreeMap<String, String> = raw
        .patched_dependencies
        .unwrap_or_default()
        .into_iter()
        .map(|(k, v)| (k, v.into_path()))
        .collect();

    Ok(LockfileGraph {
        importers,
        packages,
        settings,
        overrides: raw.overrides.unwrap_or_default(),
        ignored_optional_dependencies: raw
            .ignored_optional_dependencies
            .unwrap_or_default()
            .into_iter()
            .collect(),
        times,
        skipped_optional_dependencies,
        catalogs,
        bun_config_version: None,
        patched_dependencies,
        trusted_dependencies: Vec::new(),
        extra_fields: BTreeMap::new(),
        workspace_extra_fields: BTreeMap::new(),
    })
}