Skip to main content

src/rebar3_erli18n_prv_check.erl

-module(rebar3_erli18n_prv_check).

-moduledoc """
`rebar3 erli18n check` — fail the build when the `.pot` templates are stale.

This is the `mix gettext --check-up-to-date` experience for Erlang: it
re-extracts the project and compares the result against the committed
`.pot` files. By DEFAULT it detects FULL drift — both the msgid set AND the
`#:` references — so a moved or renamed call site is caught, matching the
Elixir behaviour. The laxer `--names-only` mode compares only the msgid set
(stable against pure line-churn) for teams that find reference drift noisy.

Because extraction is literal-only, a legitimately dynamic (non-literal)
key is never extracted into the `.pot` in the first place, so it can never
produce a false drift failure in either mode — the dynamic-key guarantee.
""".

%% This module implements the rebar3 `provider` contract (`init/1`, `do/1`,
%% `format_error/1`); it is registered via `providers:create([{module, ?MODULE}, ...])`
%% in `init/1`. The `-behaviour(provider)` attribute is intentionally omitted:
%% the `provider` behaviour ships inside the rebar3 escript (stripped of
%% debug_info and not on Hex), so neither dialyzer nor eqwalizer can load its
%% callback info standalone — the attribute would only yield false
%% "behaviour/callback not available" diagnostics for a contract the exports
%% already satisfy.

-export([init/1, do/1, format_error/1]).

-define(PROVIDER, check).
-define(NAMESPACE, erli18n).
-define(DEPS, [{default, compile}]).

-doc "Register the `check` provider under the `erli18n` namespace.".
-spec init(rebar3_erli18n_host:state()) -> {ok, rebar3_erli18n_host:state()}.
init(State) ->
    Provider = rebar3_erli18n_host:create_provider([
        {name, ?PROVIDER},
        {namespace, ?NAMESPACE},
        {module, ?MODULE},
        {bare, true},
        {deps, ?DEPS},
        {example, "rebar3 erli18n check"},
        {opts, rebar3_erli18n_common:common_opts()},
        {short_desc, "Fail the build when committed .pot templates are out of date (CI gate)."},
        {desc,
            "Re-extract the project and compare against the committed .pot files. Defaults to "
            "full drift detection (msgids AND #: references), the mix gettext --check-up-to-date "
            "experience. Pass --names-only for the laxer msgid-set-only comparison. Legitimately "
            "dynamic keys are never extracted, so they never cause a false failure."}
    ]),
    {ok, rebar3_erli18n_host:add_provider(State, Provider)}.

-doc "Run the freshness check; error (non-zero exit) on drift.".
-spec do(rebar3_erli18n_host:state()) -> {ok, rebar3_erli18n_host:state()} | {error, string()}.
do(State) ->
    case rebar3_erli18n_common:extract_project(State) of
        {ok, ByDomain} ->
            NamesOnly = names_only(State),
            case check_all(State, ByDomain, NamesOnly) of
                ok ->
                    rebar3_erli18n_host:info("erli18n: catalogs up to date", []),
                    {ok, State};
                {drift, Summary} ->
                    {error, format_error({drift, Summary})}
            end;
        {error, Reason} ->
            {error, format_error(Reason)}
    end.

-doc "Render a provider error to a human string.".
-spec format_error(term()) -> string().
format_error(Reason) ->
    rebar3_erli18n_common:format_error(Reason).

%% =========================
%% Comparison
%% =========================

-spec names_only(rebar3_erli18n_host:state()) -> boolean().
names_only(State) ->
    Args = rebar3_erli18n_host:parsed_args(State),
    proplists:get_value(names_only, Args, false) =:= true.

%% Check every domain reachable from EITHER side: the freshly-extracted
%% domains AND the domains that already have a committed `.pot` on disk. The
%% union is what catches a domain whose call sites ALL vanished — fresh
%% extraction drops its key, but the stale `.pot` is still on disk, so it must
%% still be compared (and reported as drift, since the fresh side is now an
%% empty catalog). Comparing only `ByDomain` would silently skip it.
%%
%% Domains are keyed by their `.pot` NAME (the basename sans extension, a
%% string) rather than as atoms: the fresh side's atom keys are mapped to
%% their names with `atom_to_list/1` (no new atoms), and the disk side yields
%% names straight from the filenames. Nothing here ever calls `list_to_atom/1`
%% on a filesystem-derived basename, so an arbitrary set of stray `*.pot`
%% files cannot exhaust the atom table.
-spec check_all(rebar3_erli18n_host:state(), #{atom() => [Entry]}, boolean()) ->
    ok | {drift, binary()}
when
    Entry :: rebar3_erli18n_common:dedup_entry().
check_all(State, ByDomain, NamesOnly) ->
    PotDir = rebar3_erli18n_common:pot_dir(State),
    ByName = maps:fold(
        fun(Domain, Entries, Acc) -> Acc#{atom_to_list(Domain) => Entries} end,
        #{},
        ByDomain
    ),
    Names = all_domain_names(ByName, PotDir),
    Drifts = lists:foldl(
        fun(Name, Acc) ->
            %% A domain present on disk but absent from the fresh extraction
            %% defaults to an empty entry list, so its committed `.pot` is
            %% compared against an empty catalog and reports drift.
            Entries = maps:get(Name, ByName, []),
            case check_one(PotDir, Name, Entries, NamesOnly) of
                ok -> Acc;
                {drift, Msg} -> [Msg | Acc]
            end
        end,
        [],
        Names
    ),
    case Drifts of
        [] -> ok;
        _ -> {drift, iolist_to_binary(lists:join(~"\n", lists:reverse(Drifts)))}
    end.

%% The sorted union of the freshly-extracted domain NAMES and the names with a
%% committed `<Name>.pot` under `PotDir`. Sorting keeps the drift report
%% deterministic regardless of map/dir iteration order.
-spec all_domain_names(#{string() => [Entry]}, file:filename()) -> [string()] when
    Entry :: rebar3_erli18n_common:dedup_entry().
all_domain_names(ByName, PotDir) ->
    Fresh = maps:keys(ByName),
    OnDisk = committed_pot_names(PotDir),
    lists:usort(Fresh ++ OnDisk).

%% The names of the committed `<Name>.pot` templates directly under `PotDir`
%% (the `*.pot` basenames, sans extension, as strings). A missing directory
%% yields `[]`.
-spec committed_pot_names(file:filename()) -> [string()].
committed_pot_names(PotDir) ->
    %% `filelib:wildcard/2` yields the matching basenames as `[string()]`
    %% relative to PotDir; drop the trailing ".pot" with a string-preserving
    %% slice. (We avoid `filename:basename/2`, whose `file:name_all()` return
    %% type loses string-ness under eqwalizer.) A missing PotDir yields `[]`.
    [lists:sublist(N, length(N) - length(".pot")) || N <- filelib:wildcard("*.pot", PotDir)].

-spec check_one(file:filename(), string(), [Entry], boolean()) -> ok | {drift, binary()} when
    Entry :: rebar3_erli18n_common:dedup_entry().
check_one(PotDir, Name, Entries, NamesOnly) ->
    Path = filename:join(PotDir, Name ++ ".pot"),
    Fresh = rebar3_erli18n_po_meta:dump(rebar3_erli18n_common:entries_to_pot(Entries)),
    case file:read_file(Path) of
        {ok, Committed} ->
            compare(Name, Path, Committed, Fresh, NamesOnly);
        {error, enoent} ->
            {drift, drift_msg(Name, Path, "missing .pot (run `rebar3 erli18n extract`)")}
    end.

%% Full mode: byte-comparison of the metadata-aware dump (msgids + refs).
%% Names-only mode: compare just the sorted msgid set, ignoring references.
-spec compare(string(), file:filename(), binary(), binary(), boolean()) -> ok | {drift, binary()}.
compare(Name, Path, Committed, Fresh, false) ->
    case normalize(Committed) =:= normalize(Fresh) of
        true -> ok;
        false -> {drift, drift_msg(Name, Path, "out of date (msgid or reference changed)")}
    end;
compare(Name, Path, Committed, Fresh, true) ->
    case {msgid_set(Committed), msgid_set(Fresh)} of
        {{ok, Set}, {ok, Set}} ->
            ok;
        _ ->
            %% Either the sets differ or the committed file is unparseable —
            %% both are drift.
            {drift, drift_msg(Name, Path, "out of date (msgid set changed)")}
    end.

%% Normalize trailing whitespace so a committed file with/without a final
%% newline does not false-fail the byte compare.
-spec normalize(binary()) -> binary().
normalize(Bin) ->
    string:trim(Bin, trailing, "\n").

%% The set of {Context, Msgid} keys in a `.po`/`.pot`, ignoring everything
%% else (references, translations, header). Used by `--names-only`. An
%% unparseable input yields `parse_error`, which never equals an `{ok, _}`
%% set, so it is treated as drift by the caller.
-spec msgid_set(binary()) -> {ok, [{undefined | binary(), binary()}]} | parse_error.
msgid_set(Bin) ->
    case erli18n_po:parse(Bin) of
        {ok, #{entries := Entries}} ->
            {ok, lists:sort([key_of(E) || E <- Entries])};
        {error, _} ->
            parse_error
    end.

-spec key_of(erli18n_po:entry()) -> {undefined | binary(), binary()}.
key_of({singular, Ctx, Msgid, _}) -> {Ctx, Msgid};
key_of({plural, Ctx, Msgid, _, _}) -> {Ctx, Msgid}.

-spec drift_msg(string(), file:filename(), string()) -> binary().
drift_msg(Name, Path, Why) ->
    iolist_to_binary(
        io_lib:format("  ~ts (~ts): ~ts", [Name ++ ".pot", Path, Why])
    ).