-module(rebar3_erli18n_common).
-moduledoc """
Shared plumbing for the `extract`/`merge`/`check`/`report` providers.
Centralizes the parts that would otherwise be duplicated across the four
providers: the common getopt option set, project source discovery + the
abstract-form walk, deduplication of extracted call sites into catalog
entries (merging `#:` references), `.pot`/`.po` directory resolution, and a
uniform `format_error/1`. Keeping this in one module makes the providers
thin wrappers and lets a single suite cover the walk/dedup logic to 100%.
## Catalog layout
The `.pot` templates live in `priv/gettext/<Domain>.pot`; the translated
catalogs in `priv/gettext/<Locale>/LC_MESSAGES/<Domain>.po`. This mirrors
the runtime loader's default path (`erli18n:default_po_path/3`) so a
project's extracted templates and loaded catalogs share one tree.
""".
-export([
common_opts/0,
pot_dir/1,
po_path/3,
extract_project/1,
entries_to_pot/1,
dedup_entries/1,
runtime_lib_path/0,
format_lib_path/1,
maybe_log_runtime_lib_path/0,
format_error/1
]).
-export_type([dedup_entry/0]).
-doc """
A deduplicated catalog entry: one logical `{Domain, Context, Msgid}` with
all the `#:` references that pointed at it (in first-seen source order).
`kind`/`plural` come from the first occurrence.
""".
-type dedup_entry() :: #{
domain := atom(),
kind := rebar3_erli18n_keywords:kind(),
context := undefined | binary(),
msgid := binary(),
plural := undefined | binary(),
references := [reference_ref()]
}.
-doc "A `#:` source reference: a relative source path and a 1-based line.".
-type reference_ref() :: {file:filename(), pos_integer()}.
-doc """
The getopt option spec shared by the providers.
`--domain` restricts the operation to a single domain; `--locale` selects a
target locale (merge/report); `--names-only` switches `check` to the laxer
msgid-set comparison; `--pot-dir` overrides the default `priv/gettext` root.
""".
-spec common_opts() ->
[{atom(), char() | undefined, string(), atom() | tuple(), string()}].
common_opts() ->
[
{domain, $d, "domain", string, "Restrict to a single gettext domain (default: all)."},
{locale, $l, "locale", string, "Target locale (merge/report)."},
{names_only, undefined, "names-only", boolean,
"check: compare only the msgid set, ignoring #: reference drift."},
{pot_dir, undefined, "pot-dir", string,
"Catalog root directory (default: <app>/priv/gettext)."}
].
%% =========================
%% Path resolution
%% =========================
-doc """
The `.pot` template directory: `<RootApp>/priv/gettext` (or the
`--pot-dir` override). The first project app is treated as the root.
""".
-spec pot_dir(rebar3_erli18n_host:state()) -> file:filename().
pot_dir(State) ->
Args = rebar3_erli18n_host:parsed_args(State),
case proplists:get_value(pot_dir, Args) of
undefined -> default_pot_dir(State);
Dir -> Dir
end.
-spec default_pot_dir(rebar3_erli18n_host:state()) -> file:filename().
default_pot_dir(State) ->
AppDir = root_app_dir(State),
filename:join([AppDir, "priv", "gettext"]).
-doc """
The `.po` path for `{Domain, Locale}`:
`<pot_dir>/<Locale>/LC_MESSAGES/<Domain>.po`.
""".
-spec po_path(rebar3_erli18n_host:state(), atom(), string()) -> file:filename().
po_path(State, Domain, Locale) ->
filename:join([
pot_dir(State), Locale, "LC_MESSAGES", atom_to_list(Domain) ++ ".po"
]).
-spec root_app_dir(rebar3_erli18n_host:state()) -> file:filename().
root_app_dir(State) ->
case rebar3_erli18n_host:project_apps(State) of
[App | _] -> rebar3_erli18n_host:app_dir(App);
[] -> rebar3_erli18n_host:state_dir(State)
end.
%% =========================
%% Project extraction
%% =========================
-doc """
Walk every project app's `src/` and extract all recognized call sites,
grouped and deduplicated by domain.
Returns `{ok, #{Domain => [dedup_entry()]}}`, or the first
`{error, Reason}` an `epp` parse raised. Each domain's entry list is sorted
by `{Context, Msgid}` for deterministic, diff-stable output.
""".
-spec extract_project(rebar3_erli18n_host:state()) ->
{ok, #{atom() => [dedup_entry()]}} | {error, term()}.
extract_project(State) ->
maybe_log_runtime_lib_path(),
Apps = rebar3_erli18n_host:project_apps(State),
IncludeDirs = include_dirs(Apps),
Files = lists:flatmap(fun app_src_files/1, Apps),
case extract_files(Files, IncludeDirs, []) of
{ok, Raw} ->
{ok, group_and_dedup(Raw)};
{error, _} = Err ->
Err
end.
%% =========================
%% Cross-package load-path diagnostic
%% =========================
-doc """
The loaded location of the `erli18n_po` runtime module, as `code:which/1`
sees it at the moment of the call — `non_existing` if the module is not on
the code path, `preloaded`/`cover_compiled` for those special cases, or the
absolute `.beam` path otherwise.
This is the structural proof of the plugin -> lib load path. Every provider
reaches `erli18n_po:parse/1`, `erli18n_po:dump/1`, and
`erli18n_po:escape_string/1` across the published `{deps, [erli18n]}`
boundary. In a downstream consumer that surfaces the unpublished lib via
`_checkouts/erli18n`, this resolves under the consumer's
`_build/<profile>/checkouts/erli18n/ebin/erli18n_po.beam`, demonstrating
that the checkout (not a Hex fetch) backs the cross-package calls. See
`apps/rebar3_erli18n/README.md` ("Proven cross-package load path").
""".
-spec runtime_lib_path() -> non_existing | cover_compiled | preloaded | file:filename().
runtime_lib_path() ->
code:which(erli18n_po).
-doc """
When the `ERLI18N_DIAG_LOADPATH` OS environment variable is set, log the
loaded `erli18n_po` path through the rebar3 logger at provider-run time, so
the cross-package load path can be captured from a real
`rebar3 erli18n {extract,merge,check,report}` run. A no-op (returns `ok`,
emits nothing) when the variable is unset, so it adds no output to ordinary
runs.
""".
-spec maybe_log_runtime_lib_path() -> ok.
maybe_log_runtime_lib_path() ->
case os:getenv("ERLI18N_DIAG_LOADPATH") of
false ->
ok;
_ ->
rebar3_erli18n_host:info(
"erli18n: runtime lib erli18n_po loaded from ~ts",
[format_lib_path(runtime_lib_path())]
)
end.
-doc """
Render a `code:which/1` result as a printable string: the `.beam` path
verbatim when loaded, or the special atom (`non_existing`, `preloaded`,
`cover_compiled`) spelled out so the diagnostic line is unambiguous about
WHY the cross-package module is not a concrete path.
""".
-spec format_lib_path(non_existing | cover_compiled | preloaded | file:filename()) -> string().
format_lib_path(Path) when is_list(Path) -> Path;
format_lib_path(Atom) when is_atom(Atom) -> atom_to_list(Atom).
-spec extract_files([file:filename()], [file:filename()], [Acc]) ->
{ok, [rebar3_erli18n_extract_forms:extracted()]} | {error, term()}
when
Acc :: rebar3_erli18n_extract_forms:extracted().
extract_files([], _IncludeDirs, Acc) ->
{ok, lists:reverse(Acc)};
extract_files([File | Rest], IncludeDirs, Acc) ->
RelFile = rel_source(File),
%% One epp pass per file (`scan_file/2` derives the domain AND the entries
%% together), so there is a single error site — a file `epp` cannot open.
case rebar3_erli18n_extract_forms:scan_file(File, IncludeDirs) of
{ok, _Domain, Entries} ->
Relocated = [reref(E, RelFile) || E <- Entries],
extract_files(Rest, IncludeDirs, lists:reverse(Relocated, Acc));
{error, Reason} ->
{error, {parse_failed, File, Reason}}
end.
%% Rewrite an extracted entry's reference to use the relative source path
%% (stable across machines / build dirs in `#:` lines).
-spec reref(rebar3_erli18n_extract_forms:extracted(), file:filename()) ->
rebar3_erli18n_extract_forms:extracted().
reref(#{reference := {_AbsFile, Line}} = E, RelFile) ->
E#{reference := {RelFile, Line}}.
%% Reduce a source path to a project-relative one for `#:` lines: keep
%% everything from the `src/` segment onward (every file the extractor sees
%% comes from `app_src_files/1`'s `<app>/src/**/*.erl` wildcard, so the path
%% always contains a `src` segment). Source paths from the wildcard are flat
%% strings, so the rejoined relative path is a string without any conversion.
-spec rel_source(file:filename()) -> file:filename().
rel_source(File) ->
Kept = drop_until_src(filename:split(File)),
filename:join(Kept).
%% Drop leading path segments up to (and keeping) the first `src`. The
%% extractor only ever passes `<app>/src/...` paths, so a `src` segment is
%% always present; a path without one is a contract violation that crashes
%% here explicitly rather than silently mis-keying a reference.
-spec drop_until_src([file:name_all()]) -> [file:name_all()].
drop_until_src(["src" | _] = Rest) -> Rest;
drop_until_src([_ | Rest]) -> drop_until_src(Rest).
-spec include_dirs([rebar3_erli18n_host:app_info()]) -> [file:filename()].
include_dirs(Apps) ->
lists:flatmap(
fun(App) ->
Dir = rebar3_erli18n_host:app_dir(App),
[filename:join(Dir, "include"), filename:join(Dir, "src"), Dir]
end,
Apps
).
-spec app_src_files(rebar3_erli18n_host:app_info()) -> [file:filename()].
app_src_files(App) ->
SrcDir = filename:join(rebar3_erli18n_host:app_dir(App), "src"),
filelib:wildcard(filename:join(SrcDir, "**/*.erl")).
%% =========================
%% Grouping and deduplication
%% =========================
-spec group_and_dedup([rebar3_erli18n_extract_forms:extracted()]) ->
#{atom() => [dedup_entry()]}.
group_and_dedup(Raw) ->
ByDomain = lists:foldl(
fun(#{domain := Domain} = E, Acc) ->
maps:update_with(Domain, fun(L) -> [E | L] end, [E], Acc)
end,
#{},
Raw
),
maps:map(fun(_Domain, Entries) -> dedup_entries(lists:reverse(Entries)) end, ByDomain).
-doc """
Collapse a domain's raw extracted entries into deduplicated catalog
entries, keyed by `{Context, Msgid}`, merging each duplicate's reference.
References are kept in first-seen order with duplicates removed; the entry
list is returned sorted by `{Context, Msgid}` for deterministic output.
""".
-spec dedup_entries([rebar3_erli18n_extract_forms:extracted()]) -> [dedup_entry()].
dedup_entries(Entries) ->
Map = lists:foldl(fun dedup_one/2, #{}, Entries),
Sorted = lists:sort(
fun(#{context := C1, msgid := M1}, #{context := C2, msgid := M2}) ->
{norm_ctx(C1), M1} =< {norm_ctx(C2), M2}
end,
maps:values(Map)
),
[finalize_refs(E) || E <- Sorted].
-spec dedup_one(rebar3_erli18n_extract_forms:extracted(), Acc) -> Acc when
Acc :: #{{undefined | binary(), binary()} => dedup_entry()}.
dedup_one(#{context := Ctx, msgid := Msgid, reference := Ref} = E, Acc) ->
Key = {Ctx, Msgid},
case maps:find(Key, Acc) of
{ok, #{references := Refs} = Existing} ->
Acc#{Key := Existing#{references := [Ref | Refs]}};
error ->
Acc#{
Key => #{
domain => maps:get(domain, E),
kind => maps:get(kind, E),
context => Ctx,
msgid => Msgid,
plural => maps:get(plural, E),
references => [Ref]
}
}
end.
%% References accumulate newest-first; reverse to source order and dedup.
-spec finalize_refs(dedup_entry()) -> dedup_entry().
finalize_refs(#{references := Refs} = E) ->
Ordered = lists:reverse(Refs),
E#{references := dedup_keep_order(Ordered, [], #{})}.
%% Keep the first occurrence of each reference, preserving order.
-spec dedup_keep_order([Ref], [Ref], #{Ref => true}) -> [Ref] when Ref :: reference_ref().
dedup_keep_order([], Acc, _Seen) ->
lists:reverse(Acc);
dedup_keep_order([H | T], Acc, Seen) ->
case maps:is_key(H, Seen) of
true -> dedup_keep_order(T, Acc, Seen);
false -> dedup_keep_order(T, [H | Acc], Seen#{H => true})
end.
-spec norm_ctx(undefined | binary()) -> binary().
norm_ctx(undefined) -> <<>>;
norm_ctx(Ctx) -> Ctx.
%% =========================
%% .pot construction
%% =========================
-doc """
Build a `rebar3_erli18n_po_meta:catalog()` (`.pot` template) from a domain's
deduplicated entries: an empty header, every `msgstr` empty, references as
`#:` lines.
""".
-spec entries_to_pot([dedup_entry()]) -> rebar3_erli18n_po_meta:catalog().
entries_to_pot(Entries) ->
#{
header => pot_header(),
entries => [to_meta_entry(E) || E <- Entries]
}.
-spec pot_header() -> binary().
pot_header() ->
<<
"Project-Id-Version: \n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
>>.
-spec to_meta_entry(dedup_entry()) -> rebar3_erli18n_po_meta:meta_entry().
to_meta_entry(#{
kind := singular, context := Ctx, msgid := Msgid, references := Refs
}) ->
#{
body => {singular, Ctx, Msgid, <<>>},
references => Refs
};
to_meta_entry(#{
kind := plural, context := Ctx, msgid := Msgid, plural := Plural, references := Refs
}) ->
#{
body => {plural, Ctx, Msgid, Plural, [{0, <<>>}, {1, <<>>}]},
references => Refs
}.
%% =========================
%% Errors
%% =========================
-doc "Render a shared provider error to a human-readable string.".
-spec format_error(term()) -> string().
format_error({parse_failed, File, Reason}) ->
lists:flatten(io_lib:format("erli18n: failed to parse ~ts: ~p", [File, Reason]));
format_error({drift, Summary}) ->
lists:flatten(io_lib:format("erli18n: catalog drift detected~n~ts", [Summary]));
format_error({po_parse_failed, Path, Reason}) ->
lists:flatten(io_lib:format("erli18n: failed to parse ~ts: ~p", [Path, Reason]));
format_error({write_failed, Path, Reason}) ->
lists:flatten(io_lib:format("erli18n: cannot write ~ts: ~p", [Path, Reason]));
format_error(Reason) ->
lists:flatten(io_lib:format("erli18n: ~p", [Reason])).