Skip to main content

lib/npm/dependency/dedupe.ex

defmodule NPM.Dependency.Dedupe do
  @moduledoc """
  Analyzes and deduplicates npm dependency trees.

  Finds packages that appear multiple times in the lockfile (or could
  be hoisted) and suggests which duplicates can be removed. This is the
  logic behind `mix npm.dedupe`.
  """

  @doc """
  Finds packages installed at multiple versions.

  Returns maps with the base package name and sorted distinct versions.
  """
  @spec find(map()) :: [%{name: String.t(), versions: [String.t()]}]
  def find(lockfile) do
    lockfile
    |> Enum.group_by(
      fn {name, _entry} -> base_name(name) end,
      fn {_name, entry} -> entry_version(entry) end
    )
    |> Enum.filter(fn {_name, versions} -> length(Enum.uniq(versions)) > 1 end)
    |> Enum.map(fn {name, versions} ->
      %{name: name, versions: versions |> Enum.uniq() |> Enum.sort()}
    end)
    |> Enum.sort_by(& &1.name)
  end

  @doc """
  Finds packages in the lockfile that could potentially be deduped.

  Returns a list of `{name, versions}` where `versions` is a list of
  version strings for packages that appear in multiple forms.
  """
  @spec find_duplicates(map()) :: [{String.t(), [String.t()]}]
  def find_duplicates(lockfile) do
    lockfile
    |> find()
    |> Enum.map(fn duplicate -> {duplicate.name, duplicate.versions} end)
  end

  @doc """
  Counts package groups installed at multiple versions.
  """
  @spec count(map()) :: non_neg_integer()
  def count(lockfile), do: lockfile |> find() |> length()

  @doc """
  Formats duplicate report for display.
  """
  @spec format_report([%{name: String.t(), versions: [String.t()]}]) :: String.t()
  def format_report([]), do: "No duplicate packages found."

  def format_report(dupes) do
    header = "Duplicate packages (#{length(dupes)}):\n"

    body =
      Enum.map_join(dupes, "\n", fn duplicate ->
        "  #{duplicate.name}: #{Enum.join(duplicate.versions, ", ")}"
      end)

    header <> body
  end

  @doc """
  Returns the number of extra package versions that dedupe could remove.
  """
  @spec potential_savings([%{name: String.t(), versions: [String.t()]}]) :: non_neg_integer()
  def potential_savings(dupes) do
    Enum.sum(Enum.map(dupes, fn duplicate -> length(duplicate.versions) - 1 end))
  end

  @doc """
  Calculates how many bytes could be saved by deduplication.

  This is an estimate based on the number of duplicate package entries.
  """
  @spec savings_estimate(map()) :: %{packages: non_neg_integer(), duplicates: non_neg_integer()}
  def savings_estimate(lockfile) do
    %{packages: map_size(lockfile), duplicates: lockfile |> find() |> potential_savings()}
  end

  @doc """
  Finds the best version of a package that satisfies all dependents.

  Given a package name and a lockfile, looks at all packages that depend
  on it and finds a version (if any) that satisfies all their ranges.
  """
  @spec best_shared_version(String.t(), map()) :: {:ok, String.t()} | :no_common_version
  def best_shared_version(name, lockfile) do
    ranges =
      lockfile
      |> Enum.flat_map(fn {_pkg, entry} ->
        case Map.get(entry.dependencies, name) do
          nil -> []
          range -> [range]
        end
      end)
      |> Enum.uniq()

    case Map.get(lockfile, name) do
      nil ->
        :no_common_version

      entry ->
        if Enum.all?(ranges, &version_satisfies?(entry.version, &1)) do
          {:ok, entry.version}
        else
          :no_common_version
        end
    end
  end

  @doc """
  Returns a summary of the deduplication analysis.
  """
  @spec summary(map()) :: %{
          total_packages: non_neg_integer(),
          unique_packages: non_neg_integer(),
          duplicate_groups: non_neg_integer(),
          saveable: non_neg_integer()
        }
  def summary(lockfile) do
    dupes = find_duplicates(lockfile)
    savings = savings_estimate(lockfile)

    %{
      total_packages: map_size(lockfile),
      unique_packages:
        lockfile |> Enum.map(fn {n, _} -> base_name(n) end) |> Enum.uniq() |> length(),
      duplicate_groups: length(dupes),
      saveable: savings.duplicates
    }
  end

  defp base_name(name) do
    name |> String.split("/node_modules/") |> List.last()
  end

  defp entry_version(%{version: version}), do: version
  defp entry_version(%{"version" => version}), do: version
  defp entry_version(_), do: "unknown"

  defp version_satisfies?(version, range) do
    NPMSemver.matches?(version, range)
  rescue
    _ -> false
  end
end