defmodule Mix.Tasks.Php.Golden do
@shortdoc "Generate golden filter_var verdicts from your local `php` binary"
@moduledoc """
Runs every input in `test/fixtures/corpus.exs` through the real
`filter_var(..., FILTER_VALIDATE_EMAIL)` of your local `php` binary and
writes a golden verdict file to `test/fixtures/golden/php-<version>.tsv`.
These golden files are committed and are the oracle the differential tests
assert against, so anyone can verify parity with `mix test` even without PHP
installed. Regenerate them (e.g. after adding corpus cases, or to record a
new PHP version) with:
mix php.golden
The functions in this module are also used by the live test suite, so the
test-time and CLI generation paths are identical.
"""
use Mix.Task
@corpus_path "test/fixtures/corpus.exs"
@catalog_path "test/fixtures/corpus_catalog.b64"
@gen_script "scripts/gen_golden.php"
@golden_dir "test/fixtures/golden"
@impl Mix.Task
def run(_args) do
php = find_php!()
version = php_version(php)
inputs = corpus_inputs()
tsv = php_raw_tsv(php, inputs)
path = golden_path(version)
File.mkdir_p!(Path.dirname(path))
File.write!(path, render_golden(version, tsv))
Mix.shell().info("Wrote #{path} (#{length(inputs)} cases, php #{version})")
end
@doc "Absolute path to the `php` executable, or raises with guidance."
def find_php! do
System.find_executable("php") ||
Mix.raise("`php` was not found on PATH. Install PHP (>= 8.1) to (re)generate golden files.")
end
@doc "Returns the PHP version string of the given binary (e.g. \"8.5.5\")."
def php_version(php) do
{out, 0} = System.cmd(php, ["-r", "echo PHP_VERSION;"])
String.trim(out)
end
@doc """
Loads all corpus inputs (binaries), de-duplicated, order preserved.
Two sources are merged:
* `test/fixtures/corpus.exs` — curated, human-readable cases plus the
programmatic length/boundary constructions.
* `test/fixtures/corpus_catalog.b64` — the authoritative byte-exact quirks
catalog (base64 per line; fields after a tab are documentation).
"""
def corpus_inputs do
(curated_inputs() ++ catalog_inputs())
|> Enum.uniq()
end
defp curated_inputs do
{entries, _binding} = Code.eval_file(rooted(@corpus_path))
Enum.map(entries, fn {input, _category, _note} -> input end)
end
defp catalog_inputs do
path = rooted(@catalog_path)
if File.exists?(path) do
path
|> File.read!()
|> String.split("\n", trim: true)
|> Enum.reject(&String.starts_with?(&1, "#"))
|> Enum.map(fn line ->
line |> String.split("\t") |> hd() |> String.trim() |> Base.decode64!()
end)
else
[]
end
end
@doc """
Invokes the PHP oracle (`scripts/gen_golden.php`) over the given inputs and
returns its raw TSV output: lines of `base64_input\\tdefault\\tunicode`.
"""
def php_raw_tsv(php, inputs) do
b64 = Enum.map_join(inputs, "\n", &Base.encode64/1)
tmp = Path.join(System.tmp_dir!(), "pfve_#{System.unique_integer([:positive])}.b64")
File.write!(tmp, b64)
try do
case System.cmd(php, [rooted(@gen_script), tmp]) do
{out, 0} -> out
{out, code} -> Mix.raise("php oracle exited #{code}:\n#{out}")
end
after
File.rm(tmp)
end
end
@doc """
Parses golden TSV text (with or without `#` header/comment lines) into a list
of `{input_binary, default_valid?, unicode_valid?}` tuples.
"""
def parse_golden_tsv(text) do
text
|> String.split("\n", trim: true)
|> Enum.reject(&String.starts_with?(&1, "#"))
|> Enum.map(fn line ->
[b64, d, u] = String.split(line, "\t")
{Base.decode64!(b64), d == "1", u == "1"}
end)
end
@doc "Path to the golden file for a given PHP version string."
def golden_path(version), do: rooted(Path.join(@golden_dir, "php-#{version}.tsv"))
@doc "Glob of all committed golden files."
def golden_files, do: Path.wildcard(rooted(Path.join(@golden_dir, "php-*.tsv")))
defp render_golden(version, tsv) do
"""
# PHP FILTER_VALIDATE_EMAIL golden verdicts
# php #{version}
# generated by `mix php.golden` from #{@corpus_path}
# columns: base64(input) <TAB> default(0|1) <TAB> unicode(0|1)
""" <> tsv
end
defp rooted(path), do: Path.expand(path, File.cwd!())
end