Skip to main content

lib/mix/tasks/php.golden.ex

defmodule Mix.Tasks.Php.Golden do
  @shortdoc "Generate golden filter_var verdicts from your local `php` binary"

  @moduledoc """
  Runs every input in `test/fixtures/corpus.exs` through the real
  `filter_var(..., FILTER_VALIDATE_EMAIL)` of your local `php` binary and
  writes a golden verdict file to `test/fixtures/golden/php-<version>.tsv`.

  These golden files are committed and are the oracle the differential tests
  assert against, so anyone can verify parity with `mix test` even without PHP
  installed. Regenerate them (e.g. after adding corpus cases, or to record a
  new PHP version) with:

      mix php.golden

  The functions in this module are also used by the live test suite, so the
  test-time and CLI generation paths are identical.
  """
  use Mix.Task

  @corpus_path "test/fixtures/corpus.exs"
  @catalog_path "test/fixtures/corpus_catalog.b64"
  @gen_script "scripts/gen_golden.php"
  @golden_dir "test/fixtures/golden"

  @impl Mix.Task
  def run(_args) do
    php = find_php!()
    version = php_version(php)
    inputs = corpus_inputs()
    tsv = php_raw_tsv(php, inputs)

    path = golden_path(version)
    File.mkdir_p!(Path.dirname(path))
    File.write!(path, render_golden(version, tsv))

    Mix.shell().info("Wrote #{path} (#{length(inputs)} cases, php #{version})")
  end

  @doc "Absolute path to the `php` executable, or raises with guidance."
  def find_php! do
    System.find_executable("php") ||
      Mix.raise("`php` was not found on PATH. Install PHP (>= 8.1) to (re)generate golden files.")
  end

  @doc "Returns the PHP version string of the given binary (e.g. \"8.5.5\")."
  def php_version(php) do
    {out, 0} = System.cmd(php, ["-r", "echo PHP_VERSION;"])
    String.trim(out)
  end

  @doc """
  Loads all corpus inputs (binaries), de-duplicated, order preserved.

  Two sources are merged:

    * `test/fixtures/corpus.exs` — curated, human-readable cases plus the
      programmatic length/boundary constructions.
    * `test/fixtures/corpus_catalog.b64` — the authoritative byte-exact quirks
      catalog (base64 per line; fields after a tab are documentation).
  """
  def corpus_inputs do
    (curated_inputs() ++ catalog_inputs())
    |> Enum.uniq()
  end

  defp curated_inputs do
    {entries, _binding} = Code.eval_file(rooted(@corpus_path))
    Enum.map(entries, fn {input, _category, _note} -> input end)
  end

  defp catalog_inputs do
    path = rooted(@catalog_path)

    if File.exists?(path) do
      path
      |> File.read!()
      |> String.split("\n", trim: true)
      |> Enum.reject(&String.starts_with?(&1, "#"))
      |> Enum.map(fn line ->
        line |> String.split("\t") |> hd() |> String.trim() |> Base.decode64!()
      end)
    else
      []
    end
  end

  @doc """
  Invokes the PHP oracle (`scripts/gen_golden.php`) over the given inputs and
  returns its raw TSV output: lines of `base64_input\\tdefault\\tunicode`.
  """
  def php_raw_tsv(php, inputs) do
    b64 = Enum.map_join(inputs, "\n", &Base.encode64/1)
    tmp = Path.join(System.tmp_dir!(), "pfve_#{System.unique_integer([:positive])}.b64")
    File.write!(tmp, b64)

    try do
      case System.cmd(php, [rooted(@gen_script), tmp]) do
        {out, 0} -> out
        {out, code} -> Mix.raise("php oracle exited #{code}:\n#{out}")
      end
    after
      File.rm(tmp)
    end
  end

  @doc """
  Parses golden TSV text (with or without `#` header/comment lines) into a list
  of `{input_binary, default_valid?, unicode_valid?}` tuples.
  """
  def parse_golden_tsv(text) do
    text
    |> String.split("\n", trim: true)
    |> Enum.reject(&String.starts_with?(&1, "#"))
    |> Enum.map(fn line ->
      [b64, d, u] = String.split(line, "\t")
      {Base.decode64!(b64), d == "1", u == "1"}
    end)
  end

  @doc "Path to the golden file for a given PHP version string."
  def golden_path(version), do: rooted(Path.join(@golden_dir, "php-#{version}.tsv"))

  @doc "Glob of all committed golden files."
  def golden_files, do: Path.wildcard(rooted(Path.join(@golden_dir, "php-*.tsv")))

  defp render_golden(version, tsv) do
    """
    # PHP FILTER_VALIDATE_EMAIL golden verdicts
    # php #{version}
    # generated by `mix php.golden` from #{@corpus_path}
    # columns: base64(input) <TAB> default(0|1) <TAB> unicode(0|1)
    """ <> tsv
  end

  defp rooted(path), do: Path.expand(path, File.cwd!())
end