Skip to main content

src/erli18n_pt_store.erl

-module(erli18n_pt_store).

-moduledoc """
`persistent_term` storage layer for erli18n translation catalogs.

Each loaded `{Domain, Locale}` catalog is stored as ONE persistent term:
key `{erli18n_catalog, Domain, Locale}`, value a single map holding

- `{singular, Context, Msgid} => Translation`
- `{plural, Context, Msgid, Index} => Translation`
- `'$header' => header_state()`

Reads are copy-free (`persistent_term:get/2` does not copy the term onto the
caller's heap) and lock-free from the caller process. The read semantics mirror
the previous ETS reads byte-for-byte: a missing catalog or a missing key both
yield `undefined`, and the plural read evaluates the compiled rule (or the
C/Germanic fallback) exactly as before.

## Read-path rule (load-bearing)

Call `persistent_term:get/2` FRESH on every lookup and let the returned map be
transient. NEVER cache the catalog map in a long-lived process's state or
process dictionary: a process holding the old term is forced into a major
(fullsweep) garbage collection when the catalog is reloaded, and would serve a
stale catalog. (erlang.org/doc/apps/erts/persistent_term.html, Best Practices.)

## Reload cost

`put_map/3` (install/reload) and `unload/2` (`persistent_term:erase/1`) of the
catalog map defer a node-wide literal-area cleanup: every process still
referencing the old map runs a major GC, and all processes are made runnable to
scan their heaps. It is paid once per (re)load — acceptable for erli18n's
load-once-at-boot workload, but it is a real cost that ETS did not have.

The loaded-catalog index term (`?INDEX_KEY`) is written on the same node-wide
basis only when its `ordsets` set actually changes: the first load of a
`{Domain, Locale}` pair and the unload of the last reference to it each pay one
index `persistent_term:put` (and its literal-area cleanup), but a reload of an
already-indexed catalog skips the index write entirely (compare-before-put), so
a steady-state reload pays the cost of the catalog-map term only.

### Why the default-available index is NOT cached here

The per-request locale-negotiation default-available path rebuilds a canonical
`available_index` from `loaded_locales/0` on each request. Caching that prebuilt
index as a SECOND `persistent_term` keyed off `?INDEX_KEY` was considered and
DELIBERATELY REJECTED. The per-request work it removes is marginal (one
copy-free `index_get/0` plus a `usort`/`canonicalize` over the tiny index) and
is ALREADY fully avoidable by an application passing an explicit `available`
option (the per-request thunk then returns it directly, with no rebuild). A
cached term, by contrast, would add a recurring cost: a second
`persistent_term:put` that must fire in lock-step with `?INDEX_KEY` on EVERY
index change (first load of a pair, last unload, and the header-only-reload
`index_del` path), each scheduling the node-wide literal-area GC that the
compare-before-put discipline exists to minimize. It would also widen the
lock-step invariant surface (across `put_map/3`, `unload/2`, `index_update/2`,
`erase_all/0`) and need `?INDEX_KEY`-style namespace exclusion. A marginal,
easily-avoided per-request saving traded for recurring reload-time node-wide GC
plus added invariant/bug surface is premature optimization, so the index is
recomputed per request rather than cached.
""".

-export([
    build_map/2,
    put_map/3,
    load/4,
    reload/4,
    merge_entries/3,
    get_singular/4,
    get_plural_form/5,
    lookup_header/2,
    get_map/2,
    unload/2
]).

%% Storage-namespace introspection / lifecycle (observability + app stop).
-export([
    all/0,
    loaded_locales/0,
    data_keys/1,
    key_count/1,
    data_count/1,
    storage_bytes/1,
    erase_all/0
]).

%% `persistent_term:get/0' is specced with `term()' keys, so eqwalizer cannot
%% see that our namespace filter `{erli18n_catalog, D, L}' guarantees an
%% `{atom(), binary()}' shape. The comprehension narrows the key by pattern, so
%% the `{domain(), locale(), catalog_map()}' result is sound by construction;
%% re-announce it with a static boundary annotation (no runtime cast — the
%% `eqwalizer:dynamic_cast/1' helper ships only in the test-only
%% `eqwalizer_support' dep, which Hex cannot package).
-eqwalizer({nowarn_function, all/0}).

-define(KEY(Domain, Locale), {erli18n_catalog, Domain, Locale}).
%% The loaded-catalog index lives under its OWN fixed (zero-argument) key, a
%% 1-tuple so it can never be confused with a 3-tuple catalog key
%% `{erli18n_catalog, Domain, Locale}` by any namespace comprehension (the
%% `all/0`/`erase_all/0` filters match the 3-tuple, so the index term is
%% excluded from them by construction).
-define(INDEX_KEY, {erli18n_catalog_index}).
-define(HEADER, '$header').

-type domain() :: atom().
-type locale() :: binary().
-type context() :: undefined | binary().
-type msgid() :: binary().
-type translation() :: binary().
%% The authoritative header-state shape lives in `erli18n_server' (the only
%% producer of header states — built in full by its load/reload staging and
%% handed to `build_map/2'). Alias it here so a stored/looked-up header is
%% byte-identical to what `erli18n_server:lookup_header/2' promises: the
%% compiled plural rule (or the `fallback' atom when the .po has no
%% Plural-Forms header) plus its metadata keys.
-type header_state() :: erli18n_server:header_state().
%% A data (non-header) key inside a catalog map. `Domain`/`Locale` are
%% factored up into the persistent_term key, so the in-map keys carry only
%% the context/msgid (and, for plurals, the form index).
-type data_key() ::
    {singular, context(), msgid()}
    | {plural, context(), msgid(), non_neg_integer()}.
%% Every key a catalog map may hold: its data keys plus the header marker.
-type stored_key() :: data_key() | '$header'.
-type catalog_map() :: #{stored_key() => translation() | header_state()}.

-export_type([catalog_map/0, data_key/0]).

-doc """
Build the per-catalog value map from parsed `.po` entries and a header state,
WITHOUT touching `persistent_term` (pure). Used to construct the map off the
measured/serialized write path before a single `put_map/3`.
""".
-spec build_map([erli18n_po:entry()], header_state()) -> catalog_map().
build_map(Entries, HeaderState) when is_list(Entries), is_map(HeaderState) ->
    lists:foldl(fun put_entry/2, #{?HEADER => HeaderState}, Entries).

-doc "Install a pre-built catalog map as the single persistent term for `{Domain, Locale}`.".
-spec put_map(domain(), locale(), catalog_map()) -> ok.
put_map(Domain, Locale, Map) when is_atom(Domain), is_binary(Locale), is_map(Map) ->
    persistent_term:put(?KEY(Domain, Locale), Map),
    %% Keep the loaded-catalog index in lock-step with the catalog term, mirroring
    %% the `loaded_catalogs/0` semantics: a catalog counts as "loaded" only once it
    %% holds >=1 data (non-header) entry. `put_map/3` is the single add chokepoint
    %% (load/4, reload/4, merge_entries/3-that-creates, and the server's staged
    %% install all route through here), so updating the index here — add when the
    %% installed map carries data, drop it otherwise — keeps the index equal to the
    %% `data_count > 0` set.
    %%
    %% The `data_count =:= 0` -> `index_del` branch is reached by a header-only
    %% LOAD/RELOAD, NOT by a merge: `build_map/2` seeds `#{?HEADER => HeaderState}`,
    %% so a load/reload with an empty `Entries` list installs a header-only map
    %% (`data_count = 0`) and must drop any prior index entry for the pair. A merge
    %% can NEVER reach this branch: `merge_entries/3` folds add-only `put_entry/2`
    %% over an existing base, so the resulting map can only GROW-or-equal the base;
    %% an equal merge short-circuits to `ok` WITHOUT calling `put_map/3`, and a
    %% growing merge keeps every prior data key, so `data_count` stays > 0.
    case data_count(Map) > 0 of
        true -> index_add(Domain, Locale);
        false -> index_del(Domain, Locale)
    end,
    ok.

-doc "Build the catalog map and install it in one step (the install/load commit).".
-spec load(domain(), locale(), [erli18n_po:entry()], header_state()) -> ok.
load(Domain, Locale, Entries, HeaderState) ->
    put_map(Domain, Locale, build_map(Entries, HeaderState)).

-doc """
Reload a catalog: whole-term replacement. Identical to `load/4` because
`persistent_term:put/2` overwrites the term at the key, so there are no stale
entries to prune (a concurrent reader sees either the entire old or the entire
new catalog, never a half-applied one).
""".
-spec reload(domain(), locale(), [erli18n_po:entry()], header_state()) -> ok.
reload(Domain, Locale, Entries, HeaderState) ->
    load(Domain, Locale, Entries, HeaderState).

-doc "Singular lookup. Mirrors `erli18n_server:lookup_singular/4` (miss => `undefined`).".
-spec get_singular(domain(), locale(), context(), msgid()) -> {ok, translation()} | undefined.
get_singular(Domain, Locale, Context, Msgid) when
    is_atom(Domain),
    is_binary(Locale),
    (Context =:= undefined orelse is_binary(Context)),
    is_binary(Msgid)
->
    case lookup_map(Domain, Locale) of
        undefined ->
            undefined;
        Map ->
            case maps:get({singular, Context, Msgid}, Map, undefined) of
                undefined -> undefined;
                Translation -> {ok, Translation}
            end
    end.

-doc """
Plural-aware lookup. Mirrors `erli18n_server:lookup_plural_form/5`: read the
header, select the form index via the compiled rule (or the C/Germanic
fallback), then read the per-index plural key. Miss => `undefined`.
""".
-spec get_plural_form(domain(), locale(), context(), msgid(), integer()) ->
    {ok, translation()} | undefined.
get_plural_form(Domain, Locale, Context, Msgid, N) when
    is_atom(Domain),
    is_binary(Locale),
    (Context =:= undefined orelse is_binary(Context)),
    is_binary(Msgid),
    is_integer(N)
->
    case lookup_map(Domain, Locale) of
        undefined ->
            undefined;
        Map ->
            %% Header absent (catalog not loaded, or populated only by the
            %% low-level insert_* API, which writes data rows but no header)
            %% -> `undefined` directly, WITHOUT reading any entry. This mirrors
            %% `erli18n_server:lookup_plural_form/5`'s historical contract: the
            %% header is what carries the Plural-Forms rule, so with no header
            %% there is no form to select and the lookup is a miss.
            case maps:get(?HEADER, Map, undefined) of
                #{plural := fallback} ->
                    plural_entry(Map, Context, Msgid, fallback_form_index(N));
                #{plural := Compiled} ->
                    plural_entry(Map, Context, Msgid, erli18n_plural:evaluate(Compiled, N));
                undefined ->
                    undefined
            end
    end.

%% Read the per-index plural entry from an already-resolved catalog map.
%% Reached only after a present header has selected the form index; a missing
%% index key is a `undefined` miss.
plural_entry(Map, Context, Msgid, Index) ->
    case maps:get({plural, Context, Msgid, Index}, Map, undefined) of
        undefined -> undefined;
        Translation -> {ok, Translation}
    end.

-doc "Header lookup. Mirrors `erli18n_server:lookup_header/2` (miss => `undefined`).".
-spec lookup_header(domain(), locale()) -> {ok, header_state()} | undefined.
lookup_header(Domain, Locale) when is_atom(Domain), is_binary(Locale) ->
    case lookup_map(Domain, Locale) of
        undefined ->
            undefined;
        Map ->
            case maps:get(?HEADER, Map, undefined) of
                undefined -> undefined;
                HeaderState -> {ok, HeaderState}
            end
    end.

-doc "Unload a catalog: erase the persistent term. Idempotent (`ok` whether present or not).".
-spec unload(domain(), locale()) -> ok.
unload(Domain, Locale) when is_atom(Domain), is_binary(Locale) ->
    _ = persistent_term:erase(?KEY(Domain, Locale)),
    %% Drop the pair from the loaded-catalog index in the same body, keeping it
    %% consistent with the erased term. Idempotent: erasing a never-loaded catalog
    %% deletes an absent pair (a total no-op).
    index_del(Domain, Locale),
    ok.

-doc """
Merge entries into the existing catalog map, creating it if absent and
preserving the header if present. Used by the low-level insert API
(`erli18n_server:insert_singular/5` etc.): an empty merge is a no-op (no
catalog is created, no `put`). A negative or non-integer plural form index is a
loud `function_clause` crash, matching the historical insert contract.
""".
-spec merge_entries(domain(), locale(), [erli18n_po:entry()]) -> ok.
merge_entries(Domain, Locale, Entries) when
    is_atom(Domain), is_binary(Locale), is_list(Entries)
->
    Base =
        case lookup_map(Domain, Locale) of
            undefined -> #{};
            Existing -> Existing
        end,
    Map = lists:foldl(fun put_entry/2, Base, Entries),
    case Map =:= Base of
        true -> ok;
        false -> put_map(Domain, Locale, Map)
    end.

-doc "Return the whole catalog map for `{Domain, Locale}`, or `undefined` if not loaded.".
-spec get_map(domain(), locale()) -> catalog_map() | undefined.
get_map(Domain, Locale) when is_atom(Domain), is_binary(Locale) ->
    lookup_map(Domain, Locale).

-doc """
List every loaded catalog as `{Domain, Locale, Map}`. Filters the node's
persistent terms by the `erli18n_catalog` namespace, so it is O(total
persistent terms on the node) — an observability call, never the hot path.
""".
-spec all() -> [{domain(), locale(), catalog_map()}].
all() ->
    [{D, L, V} || {{erli18n_catalog, D, L}, V} <- persistent_term:get()].

-doc """
The loaded-locale set: the sorted, distinct locales across all loaded catalogs.
ONE keyed `persistent_term` read (copy-free) plus a project/`usort` over the
tiny index — O(1) on the node table, NOT the node-wide scan that `all/0` does.
This is the hot-path accessor behind `erli18n:loaded_locales/0`.
""".
-spec loaded_locales() -> [locale()].
loaded_locales() ->
    lists:usort([Locale || {_Domain, Locale} <- index_get()]).

-doc "The data (non-header) keys of a catalog map.".
-spec data_keys(catalog_map()) -> [data_key()].
data_keys(Map) when is_map(Map) ->
    lists:foldl(fun keep_data_key/2, [], maps:keys(Map)).

-doc "Total number of stored keys in a catalog map (data keys plus the header).".
-spec key_count(catalog_map()) -> non_neg_integer().
key_count(Map) when is_map(Map) ->
    map_size(Map).

-doc "Number of data (non-header) keys in a catalog map.".
-spec data_count(catalog_map()) -> non_neg_integer().
data_count(Map) when is_map(Map) ->
    case maps:is_key(?HEADER, Map) of
        true -> map_size(Map) - 1;
        false -> map_size(Map)
    end.

-doc "Approximate storage size of a catalog map in bytes (term word size * word).".
-spec storage_bytes(catalog_map()) -> non_neg_integer().
storage_bytes(Map) when is_map(Map) ->
    erts_debug:size(Map) * erlang:system_info(wordsize).

-doc """
Erase every `{erli18n_catalog, _, _}` persistent term on the node and return
how many were removed. Called on application stop, since persistent terms are
node-global and are NOT cleared when the application stops.
""".
-spec erase_all() -> non_neg_integer().
erase_all() ->
    Keys = [K || {{erli18n_catalog, _D, _L} = K, _V} <- persistent_term:get()],
    lists:foreach(fun(K) -> _ = persistent_term:erase(K) end, Keys),
    %% Erase the loaded-catalog index too, OUTSIDE the catalog `Keys` count: it is
    %% not a catalog, so the returned total (how many catalogs were removed) stays
    %% catalog-only. This clears the index on application stop and the test resets.
    _ = persistent_term:erase(?INDEX_KEY),
    length(Keys).

%% --- internal ---

lookup_map(Domain, Locale) ->
    persistent_term:get(?KEY(Domain, Locale), undefined).

%% The loaded-catalog index: an `ordsets` of `{Domain, Locale}` pairs stored
%% under the single `?INDEX_KEY` term. It mirrors the set of catalogs with
%% `data_count > 0` (the `loaded_catalogs/0` key set), so removing one catalog
%% never drops a locale still held by another domain. `ordsets:add_element/2`
%% is idempotent (reload adds no duplicate) and `ordsets:del_element/2` is total
%% (idempotent unload of an absent pair is a no-op).
%% `index_get/0` is a copy-free keyed read; `index_add/2`/`index_del/2` write
%% the index back ONLY when the ordset actually changes (see `index_update/2`),
%% so a reload of an already-indexed catalog or an unload of an absent pair
%% skips the `persistent_term:put` and its node-wide literal-area GC.
-spec index_get() -> ordsets:ordset({domain(), locale()}).
index_get() ->
    persistent_term:get(?INDEX_KEY, []).

-spec index_add(domain(), locale()) -> ok.
index_add(Domain, Locale) ->
    index_update(fun ordsets:add_element/2, {Domain, Locale}).

-spec index_del(domain(), locale()) -> ok.
index_del(Domain, Locale) ->
    index_update(fun ordsets:del_element/2, {Domain, Locale}).

%% Compare-before-put on the loaded-catalog index. `persistent_term:put/2`
%% schedules a node-wide literal-area GC (every process is made runnable to
%% scan its heap), so it must run ONLY when the index actually changes. A
%% reload of an already-indexed catalog (`add` of a present pair) and an unload
%% of an absent pair (`del` of a missing pair) both leave the ordset identical
%% to what is already stored: in that case skip the `put` and its GC entirely.
%% The lock-step invariant is preserved exactly — when the op is a no-op the
%% stored term already equals `Op(Pair, Current)`, so omitting the write keeps
%% the term equal to the `data_count > 0` catalog set by construction.
-spec index_update(
    fun((Pair, ordsets:ordset(Pair)) -> ordsets:ordset(Pair)),
    Pair
) -> ok when Pair :: {domain(), locale()}.
index_update(Op, Pair) ->
    Current = index_get(),
    case Op(Pair, Current) of
        Current ->
            ok;
        Updated ->
            persistent_term:put(?INDEX_KEY, Updated),
            ok
    end.

-spec put_entry(erli18n_po:entry(), catalog_map()) -> catalog_map().
put_entry({singular, Ctx, Msgid, Translation}, Map) ->
    Map#{{singular, Ctx, Msgid} => Translation};
put_entry({plural, Ctx, Msgid, _MsgidPlural, Forms}, Map) ->
    %% The form-index guard keeps the historical loud contract: a negative or
    %% non-integer index is a `function_clause` crash, not a silently stored
    %% bad key. The parser only ever emits valid (>= 0) indices, so the load
    %% path never trips it; only a hand-crafted insert can.
    lists:foldl(
        fun({Idx, Translation}, Acc) when is_integer(Idx), Idx >= 0 ->
            Acc#{{plural, Ctx, Msgid, Idx} => Translation}
        end,
        Map,
        Forms
    ).

%% Accumulate the data (non-header) keys of a catalog map. Clause-based so the
%% key shape is narrowed exactly: the header marker is dropped, singular and
%% plural keys are kept verbatim. Folds over the catalog's KEYS (not its
%% key/value pairs): the value plays no part in collecting the data keys.
-spec keep_data_key(stored_key(), [data_key()]) -> [data_key()].
keep_data_key(?HEADER, Acc) ->
    Acc;
keep_data_key({singular, _Ctx, _Msgid} = Key, Acc) ->
    [Key | Acc];
keep_data_key({plural, _Ctx, _Msgid, _Idx} = Key, Acc) ->
    [Key | Acc].

%% C/Germanic fallback when the .po has no Plural-Forms header: N == 1 -> 0 else 1.
fallback_form_index(1) -> 0;
fallback_form_index(_N) -> 1.