use aube_lockfile::LockfileGraph;
use aube_lockfile::dep_path_filename::dep_path_to_filename;
use std::collections::BTreeMap;
use std::path::Path;
/// A map of `name@version` -> raw multi-file unified diff text.
///
/// Keys must match the `spec_key()` value the resolver writes into
/// every `LockedPackage`. The value is the raw multi-file unified diff
/// text written by `aube patch-commit` (or any compatible tool).
pub type Patches = BTreeMap<String, String>;
pub(crate) fn current_patch_hashes(patches: &Patches) -> BTreeMap<String, String> {
use sha2::{Digest, Sha256};
patches
.iter()
.map(|(k, v)| {
let mut h = Sha256::new();
h.update(v.as_bytes());
(k.clone(), hex::encode(h.finalize()))
})
.collect()
}
/// Read the previously-applied patch sidecar at
/// `node_modules/.aube-applied-patches.json`. Missing or malformed
/// files return an empty map — the caller treats them as "no patches
/// were ever applied here," which conservatively triggers a re-link
/// on the first run after the linker started writing the sidecar.
pub(crate) fn read_applied_patches(nm_dir: &Path) -> BTreeMap<String, String> {
let path = nm_dir.join(".aube-applied-patches.json");
let Ok(raw) = std::fs::read_to_string(&path) else {
return Default::default();
};
serde_json::from_str(&raw).unwrap_or_default()
}
/// Write the applied-patch sidecar.
///
/// Next install reads this to compute which `.aube/<dep_path>`
/// entries need re-materializing because their patch set changed.
/// Old code was `let _ = fs::write(...)`, dropped any IO error. If
/// write silently failed (disk full, read-only mount, perms), the
/// sidecar was missing on next install, and
/// wipe_changed_patched_entries did not know which entries to
/// re-link. Install reported success while node_modules had stale
/// patched content on disk. Return Result, caller logs loudly.
pub(crate) fn write_applied_patches(
nm_dir: &Path,
map: &BTreeMap<String, String>,
) -> std::io::Result<()> {
let path = nm_dir.join(".aube-applied-patches.json");
let out = serde_json::to_string(map)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
aube_util::fs_atomic::atomic_write(&path, out.as_bytes())
}
/// Wipe `.aube/<dep_path>` for any package whose patch fingerprint
/// changed between the previous and current install. Used by the
/// per-project (no-global-store) link path, where the directory name
/// doesn't otherwise change when a patch is added or removed.
pub(crate) fn wipe_changed_patched_entries(
aube_dir: &Path,
graph: &LockfileGraph,
prev: &BTreeMap<String, String>,
curr: &BTreeMap<String, String>,
max_length: usize,
) {
let mut affected: std::collections::HashSet<String> = std::collections::HashSet::new();
for k in prev.keys().chain(curr.keys()) {
if prev.get(k) != curr.get(k) {
affected.insert(k.clone());
}
}
if affected.is_empty() {
return;
}
for (dep_path, pkg) in &graph.packages {
let key = pkg.spec_key();
if affected.contains(&key) {
let entry = aube_dir.join(dep_path_to_filename(dep_path, max_length));
let _ = std::fs::remove_dir_all(entry);
}
}
}
/// Apply a git-style multi-file unified diff to a package directory.
///
/// The patch text is split on `diff --git ` boundaries; each section
/// is parsed as a single-file unified diff and applied to the matching
/// file under `pkg_dir`. We deliberately unlink the destination
/// before writing, because the linker materializes files via reflink
/// or hardlink — modifying the file in place would corrupt the global
/// content-addressed store the linked file points to.
fn is_safe_rel_component(rel: &str) -> bool {
if rel.is_empty() || rel.contains('\0') || rel.contains('\\') {
return false;
}
let p = Path::new(rel);
if p.is_absolute()
|| p.has_root()
|| rel.starts_with('/')
|| rel.len() >= 2 && rel.as_bytes()[1] == b':'
{
return false;
}
p.components().all(|c| {
matches!(
c,
std::path::Component::Normal(_) | std::path::Component::CurDir
)
})
}
fn ensure_no_symlink_in_chain(pkg_dir: &Path, rel: &str) -> Result<(), String> {
let mut cursor = pkg_dir.to_path_buf();
for comp in Path::new(rel).components() {
cursor.push(comp);
match std::fs::symlink_metadata(&cursor) {
Ok(meta) => {
if meta.file_type().is_symlink() {
return Err(format!("{}", cursor.display()));
}
// Junctions on Windows are `IO_REPARSE_TAG_MOUNT_POINT`
// reparse points, not `IO_REPARSE_TAG_SYMLINK`, and
// `FileType::is_symlink()` returns false for them.
// Catch every reparse point via the file-attribute
// bit so a junction can't sneak the patch out of the
// package directory.
#[cfg(windows)]
{
use std::os::windows::fs::MetadataExt;
const FILE_ATTRIBUTE_REPARSE_POINT: u32 = 0x0400;
if meta.file_attributes() & FILE_ATTRIBUTE_REPARSE_POINT != 0 {
return Err(format!("{}", cursor.display()));
}
}
}
Err(ref e) if e.kind() == std::io::ErrorKind::NotFound => break,
Err(e) => return Err(format!("stat {}: {e}", cursor.display())),
}
}
Ok(())
}
pub(crate) fn apply_multi_file_patch(pkg_dir: &Path, patch_text: &str) -> Result<(), String> {
let sections = split_patch_sections(patch_text);
if sections.is_empty() {
return Err("patch contained no `diff --git` sections".to_string());
}
for section in sections {
let rel = section
.rel_path
.as_ref()
.ok_or_else(|| "patch section missing file path".to_string())?;
// Refuse patch headers that escape the package directory.
// A hostile diff with `b/../../etc/shadow` as the target
// would otherwise let the patch step overwrite or delete
// files outside the installed package. Same rules we apply
// to tar entries over in aube-store (no absolute, no drive
// prefix, no `..`, no backslash, no NUL).
if !is_safe_rel_component(rel) {
return Err(format!("patch file path escapes package: {rel:?}"));
}
// Walk every parent component of the target on disk and refuse
// to follow any symlink or junction. Without this guard, a
// package that planted a directory link inside its own tree
// (or a workspace where the user has a symlinked dep dir)
// would let `pkg_dir.join(rel)` resolve through the link, and
// `atomic_write` would overwrite a file outside `pkg_dir`.
// CVE-2018-1000156 (GNU patch) class.
if let Err(e) = ensure_no_symlink_in_chain(pkg_dir, rel) {
return Err(format!("patch target contains symlink: {e}"));
}
let target = pkg_dir.join(rel);
let original = if target.exists() {
std::fs::read_to_string(&target)
.map_err(|e| format!("failed to read {}: {e}", target.display()))?
} else {
String::new()
};
// `+++ /dev/null` means the patch deletes the file. Skip diffy
// entirely — `diffy::apply` would otherwise produce an empty
// string and we'd write a zero-byte file in place of the
// original, leaving `require('./removed')` resolving to an
// empty module instead of the expected `MODULE_NOT_FOUND`.
if section.is_deletion {
if target.exists() {
std::fs::remove_file(&target)
.map_err(|e| format!("failed to remove {}: {e}", target.display()))?;
}
continue;
}
// git-style patches always use LF line endings, but published
// tarballs frequently ship files with CRLF (Windows editors,
// `core.autocrlf=true` checkouts). Diffy is byte-exact and
// refuses to match CRLF context against LF hunk lines, so we
// normalize the original to LF before applying and restore the
// CRLF on write. pnpm's patch applier does the same thing.
let was_crlf = original.contains("\r\n");
let normalized = if was_crlf {
original.replace("\r\n", "\n")
} else {
original
};
let parsed = diffy::Patch::from_str(§ion.body)
.map_err(|e| format!("failed to parse patch for {rel}: {e}"))?;
let patched_lf = diffy::apply(&normalized, &parsed)
.map_err(|e| format!("failed to apply patch for {rel}: {e}"))?;
let patched = if was_crlf {
// Promote bare `\n` to `\r\n`, then collapse any `\r\r\n`
// back so a patch line containing a literal `\r` byte (rare
// but legal for binary-ish text) doesn't gain a second CR.
patched_lf.replace('\n', "\r\n").replace("\r\r\n", "\r\n")
} else {
patched_lf
};
// Break any reflink/hardlink to the global store before
// writing the patched bytes — otherwise we'd silently mutate
// every other project sharing this CAS file. Stage the write
// through a sibling tempfile and `rename` into place so a
// crash or Ctrl-C mid-patch cannot leave the package with
// the original file unlinked and no replacement written.
// POSIX `rename(2)` atomically replaces the destination, so
// no pre-removal is needed and removing first would create
// the exact TOCTOU window the rename is supposed to close.
// Windows `MoveFileExW` fails when the destination exists,
// so the unlink is gated behind `cfg(windows)`.
#[cfg(windows)]
{
if target.exists() {
std::fs::remove_file(&target)
.map_err(|e| format!("failed to unlink {}: {e}", target.display()))?;
}
}
aube_util::fs_atomic::atomic_write(&target, patched.as_bytes()).map_err(|e| {
format!(
"failed to write patched file into place {}: {e}",
target.display()
)
})?;
}
Ok(())
}
struct PatchSection {
rel_path: Option<String>,
/// Single-file unified diff body — `diffy::Patch::from_str` parses
/// this directly. Always begins with `--- ` so the diffy parser
/// finds its anchor.
body: String,
/// `+++ /dev/null` was seen in the header — the patch deletes this
/// file, so the linker should `remove_file` instead of writing
/// patched bytes (which `diffy::apply` would emit as an empty
/// string).
is_deletion: bool,
}
/// Split a git-style multi-file patch into one section per file.
/// We look for `diff --git a/<path> b/<path>` markers, pull the path
/// out of the `b/...` half (post-edit name), and capture everything
/// from the next `--- ` line until the following `diff --git ` (or
/// EOF) as the diffy-compatible body.
fn parse_diff_git_b_path(rest: &str) -> Option<String> {
if let Some(after) = rest.strip_prefix("\"a/") {
let end_a = after.find("\" \"b/")?;
let after_b = &after[end_a + 5..];
let close = after_b.rfind('"')?;
return unescape_git_quoted(&after_b[..close]);
}
let body = rest.strip_prefix("a/")?;
let mut search_from = 0;
while let Some(rel) = body[search_from..].find(" b/") {
let abs = search_from + rel;
let path_a = &body[..abs];
let path_b = &body[abs + 3..];
if path_a == path_b {
return Some(path_b.to_string());
}
search_from = abs + 1;
}
body.find(" b/").map(|i| body[i + 3..].to_string())
}
fn unescape_git_quoted(s: &str) -> Option<String> {
let bytes = s.as_bytes();
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] != b'\\' {
out.push(bytes[i]);
i += 1;
continue;
}
if i + 1 >= bytes.len() {
return None;
}
match bytes[i + 1] {
b'\\' => {
out.push(b'\\');
i += 2;
}
b'"' => {
out.push(b'"');
i += 2;
}
b'n' => {
out.push(b'\n');
i += 2;
}
b't' => {
out.push(b'\t');
i += 2;
}
b'r' => {
out.push(b'\r');
i += 2;
}
b'a' => {
out.push(0x07);
i += 2;
}
b'b' => {
out.push(0x08);
i += 2;
}
b'f' => {
out.push(0x0C);
i += 2;
}
b'v' => {
out.push(0x0B);
i += 2;
}
d0 @ b'0'..=b'3'
if i + 3 < bytes.len()
&& (b'0'..=b'7').contains(&bytes[i + 2])
&& (b'0'..=b'7').contains(&bytes[i + 3]) =>
{
let n = ((d0 - b'0') << 6) | ((bytes[i + 2] - b'0') << 3) | (bytes[i + 3] - b'0');
out.push(n);
i += 4;
}
_ => return None,
}
}
String::from_utf8(out).ok()
}
fn split_patch_sections(text: &str) -> Vec<PatchSection> {
let mut out: Vec<PatchSection> = Vec::new();
let mut current_path: Option<String> = None;
let mut body = String::new();
let mut in_body = false;
let mut is_deletion = false;
let flush = |out: &mut Vec<PatchSection>,
path: &mut Option<String>,
body: &mut String,
is_deletion: &mut bool| {
if !body.is_empty() || *is_deletion {
out.push(PatchSection {
rel_path: path.take(),
body: std::mem::take(body),
is_deletion: std::mem::replace(is_deletion, false),
});
} else {
*path = None;
}
};
for line in text.split_inclusive('\n') {
let stripped = line.trim_end_matches(['\n', '\r']);
if let Some(rest) = stripped.strip_prefix("diff --git ") {
// New file boundary — flush whatever we were collecting.
flush(&mut out, &mut current_path, &mut body, &mut is_deletion);
in_body = false;
// Parse `a/<path> b/<path>` and prefer the post-edit
// (`b/`) path so renames land on the new name.
current_path = parse_diff_git_b_path(rest);
continue;
}
if !in_body {
if stripped.starts_with("--- ") {
in_body = true;
// Rewrite `--- /dev/null` (file addition) to `--- a/<path>`
// so diffy's parser still gets a valid header. The
// original file content we feed `diffy::apply` is empty
// for additions, which is what diffy expects.
if stripped == "--- /dev/null"
&& let Some(rel) = current_path.as_deref()
{
body.push_str(&format!("--- a/{rel}\n"));
} else {
body.push_str(stripped);
body.push('\n');
}
}
// Skip git's `index ...` / `new file mode ...` /
// `similarity index ...` decorations — diffy doesn't
// understand them and they aren't needed once we know
// the target path.
continue;
}
if stripped == "+++ /dev/null" {
// File deletion — note it and drop this header line. The
// linker will `remove_file` and skip the diffy apply path
// entirely, so the rest of the body (the hunk that empties
// the file) is intentionally discarded.
is_deletion = true;
continue;
}
body.push_str(stripped);
body.push('\n');
}
flush(&mut out, &mut current_path, &mut body, &mut is_deletion);
out
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(windows)]
#[test]
fn apply_multi_file_patch_refuses_to_follow_junction_outside_pkg() {
let outside = tempfile::tempdir().unwrap();
let pkg_root = tempfile::tempdir().unwrap();
let pkg = pkg_root.path().join("pkg");
std::fs::create_dir_all(&pkg).unwrap();
let escape = pkg.join("escape");
junction::create(outside.path(), &escape).unwrap();
let target = outside.path().join("victim.txt");
std::fs::write(&target, "untouched\n").unwrap();
let patch = "diff --git a/escape/victim.txt b/escape/victim.txt\n\
--- a/escape/victim.txt\n\
+++ b/escape/victim.txt\n\
@@ -1 +1 @@\n\
-untouched\n\
+PWNED\n";
let result = apply_multi_file_patch(&pkg, patch);
assert!(result.is_err(), "patch must refuse junction-bearing rel");
let after = std::fs::read_to_string(&target).unwrap();
assert_eq!(after, "untouched\n");
}
#[cfg(unix)]
#[test]
fn apply_multi_file_patch_refuses_to_follow_symlink_outside_pkg() {
let outside = tempfile::tempdir().unwrap();
let pkg_root = tempfile::tempdir().unwrap();
let pkg = pkg_root.path().join("pkg");
std::fs::create_dir_all(&pkg).unwrap();
let escape = pkg.join("escape");
std::os::unix::fs::symlink(outside.path(), &escape).unwrap();
let target = outside.path().join("victim.txt");
std::fs::write(&target, "untouched\n").unwrap();
let patch = "diff --git a/escape/victim.txt b/escape/victim.txt\n\
--- a/escape/victim.txt\n\
+++ b/escape/victim.txt\n\
@@ -1 +1 @@\n\
-untouched\n\
+PWNED\n";
let result = apply_multi_file_patch(&pkg, patch);
assert!(result.is_err(), "patch must refuse symlink-bearing rel");
let after = std::fs::read_to_string(&target).unwrap();
assert_eq!(after, "untouched\n");
}
#[test]
fn round_trips_simple_patch() {
let dir = tempfile::tempdir().unwrap();
let pkg = dir.path().join("pkg");
std::fs::create_dir_all(&pkg).unwrap();
std::fs::write(pkg.join("index.js"), "module.exports = 'old';\n").unwrap();
let patch = "diff --git a/index.js b/index.js\n\
--- a/index.js\n\
+++ b/index.js\n\
@@ -1 +1 @@\n\
-module.exports = 'old';\n\
+module.exports = 'new';\n";
apply_multi_file_patch(&pkg, patch).unwrap();
assert_eq!(
std::fs::read_to_string(pkg.join("index.js")).unwrap(),
"module.exports = 'new';\n"
);
}
#[test]
fn crlf_patch_path_does_not_carry_carriage_return() {
let patch = "diff --git a/index.js b/index.js\r\n\
--- a/index.js\r\n\
+++ b/index.js\r\n\
@@ -1 +1 @@\r\n\
-module.exports = 'old';\r\n\
+module.exports = 'new';\r\n";
let sections = split_patch_sections(patch);
assert_eq!(sections.len(), 1);
assert_eq!(sections[0].rel_path.as_deref(), Some("index.js"));
}
#[test]
fn crlf_deletion_patch_recognized() {
let patch = "diff --git a/removed.js b/removed.js\r\n\
deleted file mode 100644\r\n\
--- a/removed.js\r\n\
+++ /dev/null\r\n\
@@ -1 +0,0 @@\r\n\
-gone\r\n";
let sections = split_patch_sections(patch);
assert_eq!(sections.len(), 1);
assert!(sections[0].is_deletion);
}
#[test]
fn diff_git_path_with_space_b_substring() {
let patch = "diff --git a/a b/c.js b/a b/c.js\n\
--- a/a b/c.js\n\
+++ b/a b/c.js\n\
@@ -1 +1 @@\n\
-x\n\
+y\n";
let sections = split_patch_sections(patch);
assert_eq!(sections.len(), 1);
assert_eq!(sections[0].rel_path.as_deref(), Some("a b/c.js"));
}
#[test]
fn diff_git_quoted_path_form() {
let patch = "diff --git \"a/path with spaces.js\" \"b/path with spaces.js\"\n\
--- a/path with spaces.js\n\
+++ b/path with spaces.js\n\
@@ -1 +1 @@\n\
-x\n\
+y\n";
let sections = split_patch_sections(patch);
assert_eq!(sections.len(), 1);
assert_eq!(sections[0].rel_path.as_deref(), Some("path with spaces.js"));
}
#[test]
fn applies_lf_patch_against_crlf_file() {
// Tarballs published from Windows editors ship CRLF text. pnpm
// / git emit LF-only patches even against those files. Diffy is
// byte-exact, so the apply path normalizes CRLF -> LF before
// matching and restores CRLF on write.
let dir = tempfile::tempdir().unwrap();
let pkg = dir.path().join("pkg");
std::fs::create_dir_all(&pkg).unwrap();
std::fs::write(pkg.join("a.txt"), b"one\r\ntwo\r\nthree\r\n").unwrap();
let patch = "diff --git a/a.txt b/a.txt\n\
--- a/a.txt\n\
+++ b/a.txt\n\
@@ -1,3 +1,3 @@\n\
\x20one\n\
-two\n\
+TWO\n\
\x20three\n";
apply_multi_file_patch(&pkg, patch).unwrap();
let bytes = std::fs::read(pkg.join("a.txt")).unwrap();
assert_eq!(bytes, b"one\r\nTWO\r\nthree\r\n");
}
#[test]
fn crlf_restore_preserves_embedded_cr_byte() {
// A patch line that adds a literal `\r` byte mid-line must not
// gain a second `\r` when we re-CRLF the output. Naive
// `replace('\n', "\r\n")` would turn `\r\n` into `\r\r\n`; the
// `\r\r\n` collapse undoes that.
let dir = tempfile::tempdir().unwrap();
let pkg = dir.path().join("pkg");
std::fs::create_dir_all(&pkg).unwrap();
std::fs::write(pkg.join("a.txt"), b"one\r\ntwo\r\n").unwrap();
let patch = "diff --git a/a.txt b/a.txt\n\
--- a/a.txt\n\
+++ b/a.txt\n\
@@ -1,2 +1,2 @@\n\
-one\n\
+has\rcr\n\
\x20two\n";
apply_multi_file_patch(&pkg, patch).unwrap();
let bytes = std::fs::read(pkg.join("a.txt")).unwrap();
assert_eq!(bytes, b"has\rcr\r\ntwo\r\n");
}
#[test]
fn diff_git_quoted_path_unescapes_git_escapes() {
let path = parse_diff_git_b_path(r#""a/foo\".js" "b/foo\".js""#).expect("quoted parse");
assert_eq!(path, "foo\".js");
let path = parse_diff_git_b_path(r#""a/back\\slash.js" "b/back\\slash.js""#)
.expect("backslash parse");
assert_eq!(path, "back\\slash.js");
let path = parse_diff_git_b_path("\"a/caf\\303\\251.js\" \"b/caf\\303\\251.js\"")
.expect("octal parse");
assert_eq!(path, "café.js");
}
}