Skip to main content

src/barrel_doc.erl

%%%-------------------------------------------------------------------
%%% @doc Document utilities for barrel_docdb
%%%
%%% Provides functions for document manipulation, revision handling,
%%% document hashing, and CBOR document content access.
%%%
%%% The module supports two document representations:
%%% - Indexed binary: CBOR with structural index for O(1) path access
%%% - Erlang map: Standard map for manipulation
%%%
%%% All map-like functions work with both representations transparently.
%%% @end
%%%-------------------------------------------------------------------
-module(barrel_doc).

-include("barrel_docdb.hrl").

%% Document accessors (metadata)
-export([id/1, rev/1, id_rev/1, deleted/1]).

%% Revision operations
-export([
    parse_revision/1,
    make_revision/2,
    revision_hash/3,
    compare_revisions/2
]).

%% Revision history
-export([
    encode_revisions/1,
    parse_revisions/1,
    trim_history/3
]).

%% Document processing
-export([
    make_doc_record/1,
    doc_without_meta/1
]).

%% Unique ID generation
-export([generate_docid/0]).

%% CBOR Document Construction & Import
-export([
    new/0, new/1,
    from_map/1,
    from_json/1,
    from_cbor/1
]).

%% CBOR Document Export
-export([
    to_map/1,
    to_json/1,
    to_cbor/1
]).

%% CBOR Map-like API
-export([
    get/2, get/3,
    set/3,
    remove/2,
    update/3,
    is_key/2,
    keys/1,
    size/1,
    find/2,
    values/1,
    to_list/1,
    take/2,
    merge/2,
    fold/3,
    map/2,
    filter/2
]).

%% Index utilities
-export([
    is_indexed/1,
    normalize/1,
    ensure_indexed/1
]).

%% Type definitions
-type cbor_doc() :: binary().
-type doc_input() :: map() | cbor_doc().
-type path() :: [binary() | integer()].

-export_type([cbor_doc/0, doc_input/0, path/0]).

%%====================================================================
%% Document Accessors
%%====================================================================

%% @doc Get document ID
-spec id(doc()) -> docid() | undefined.
id(#{<<"id">> := Id}) -> Id;
id(#{}) -> undefined;
id(_) -> erlang:error(bad_doc).

%% @doc Get document revision
-spec rev(doc()) -> revid().
rev(#{<<"_rev">> := Rev}) -> Rev;
rev(#{}) -> <<>>;
rev(_) -> error(bad_doc).

%% @doc Get document ID and revision
-spec id_rev(doc()) -> {docid() | undefined, revid()}.
id_rev(#{<<"id">> := Id, <<"_rev">> := Rev}) -> {Id, Rev};
id_rev(#{<<"id">> := Id}) -> {Id, <<>>};
id_rev(#{<<"_rev">> := _Rev}) -> erlang:error(bad_doc);
id_rev(#{}) -> {undefined, <<>>};
id_rev(_) -> erlang:error(bad_doc).

%% @doc Check if document is deleted
-spec deleted(doc()) -> boolean().
deleted(#{<<"_deleted">> := Del}) when is_boolean(Del) -> Del;
deleted(_) -> false.

%%====================================================================
%% Revision Operations
%%====================================================================

%% @doc Parse a revision ID into {Generation, Hash}
-spec parse_revision(revid()) -> {non_neg_integer(), binary()}.
parse_revision(<<>>) -> {0, <<>>};
parse_revision(Rev) when is_binary(Rev) ->
    case binary:split(Rev, <<"-">>) of
        [BinPos, Hash] -> {binary_to_integer(BinPos), Hash};
        _ -> exit({bad_rev, bad_format})
    end;
parse_revision(Rev) when is_list(Rev) ->
    parse_revision(list_to_binary(Rev));
parse_revision(_Rev) ->
    exit({bad_rev, bad_format}).

%% @doc Create a revision ID from generation and hash
-spec make_revision(non_neg_integer(), binary()) -> revid().
make_revision(Gen, Hash) when is_integer(Gen), is_binary(Hash) ->
    <<(integer_to_binary(Gen))/binary, "-", Hash/binary>>.

%% @doc Generate a revision hash for a document
-spec revision_hash(doc(), revid(), boolean()) -> binary().
revision_hash(Doc, Rev, Deleted) ->
    %% Use SHA-256 for content hashing
    Data = term_to_binary({Doc, Rev, Deleted}),
    Digest = crypto:hash(sha256, Data),
    to_hex(Digest).

%% @doc Compare two revisions.
%% Returns 1 if RevA is greater, -1 if RevA is less, 0 if equal.
-spec compare_revisions(revid(), revid()) -> -1 | 0 | 1.
compare_revisions(RevA, RevB) ->
    TupleA = parse_revision(RevA),
    TupleB = parse_revision(RevB),
    if
        TupleA > TupleB -> 1;
        TupleA < TupleB -> -1;
        true -> 0
    end.

%%====================================================================
%% Revision History
%%====================================================================

%% @doc Encode revision list to compact format
-spec encode_revisions([revid()]) -> map().
encode_revisions([]) ->
    #{<<"start">> => 0, <<"ids">> => []};
encode_revisions(Revs) ->
    [Oldest | _] = Revs,
    {Start, _} = parse_revision(Oldest),
    Digests = lists:foldl(
        fun(Rev, Acc) ->
            {_, Digest} = parse_revision(Rev),
            [Digest | Acc]
        end,
        [],
        Revs
    ),
    #{<<"start">> => Start, <<"ids">> => lists:reverse(Digests)}.

%% @doc Parse revision history from document
-spec parse_revisions(doc()) -> [revid()].
parse_revisions(#{<<"revisions">> := Revisions}) ->
    case Revisions of
        #{<<"start">> := Start, <<"ids">> := Ids} ->
            {Revs, _} = lists:foldl(
                fun(Id, {Acc, I}) ->
                    Rev = <<(integer_to_binary(I))/binary, "-", Id/binary>>,
                    {[Rev | Acc], I - 1}
                end,
                {[], Start},
                Ids
            ),
            lists:reverse(Revs);
        _ ->
            []
    end;
parse_revisions(#{<<"_rev">> := Rev}) ->
    [Rev];
parse_revisions(_) ->
    [].

%% @doc Trim revision history based on ancestors and limit
-spec trim_history(map(), [revid()], non_neg_integer()) -> map().
trim_history(EncodedRevs, Ancestors, Limit) ->
    #{<<"start">> := Start, <<"ids">> := Digests} = EncodedRevs,
    ADigests = array:from_list(Digests),
    {_, Limit2} = lists:foldl(
        fun(Ancestor, {Matched, Unmatched}) ->
            {Gen, Digest} = parse_revision(Ancestor),
            Idx = Start - Gen,
            IsDigest = array:get(Idx, ADigests) =:= Digest,
            if
                Idx >= 0, Idx < Matched, IsDigest =:= true ->
                    {Idx, Idx + 1};
                true ->
                    {Matched, Unmatched}
            end
        end,
        {length(Digests), Limit},
        Ancestors
    ),
    EncodedRevs#{<<"ids">> => lists:sublist(Digests, Limit2)}.

%%====================================================================
%% Document Processing
%%====================================================================

%% @doc Remove metadata fields from document
-spec doc_without_meta(doc()) -> doc().
doc_without_meta(Doc) ->
    maps:filter(
        fun
            (<<"_attachments">>, _) -> false;
            (<<"_", _/binary>>, _) -> false;
            (_, _) -> true
        end,
        Doc
    ).

%% @doc Create internal document record from user document
-spec make_doc_record(doc()) -> map().
make_doc_record(#{<<"id">> := Id, <<"doc">> := Doc0, <<"history">> := History}) ->
    %% Bulk format with explicit history
    Deleted = maps:get(<<"deleted">>, Doc0, false),
    Atts = maps:get(<<"_attachments">>, Doc0, #{}),
    Doc1 = doc_without_meta(Doc0),
    #{
        id => Id,
        ref => erlang:make_ref(),
        revs => History,
        deleted => Deleted,
        attachments => Atts,
        doc => Doc1
    };
make_doc_record(Doc0) ->
    %% Regular document format
    Deleted = maps:get(<<"_deleted">>, Doc0, false),
    Rev = maps:get(<<"_rev">>, Doc0, <<>>),
    Atts = maps:get(<<"_attachments">>, Doc0, #{}),
    Id = case maps:find(<<"id">>, Doc0) of
        {ok, DocId} -> DocId;
        error -> generate_docid()
    end,
    Doc1 = doc_without_meta(Doc0),
    Hash = revision_hash(Doc1, Rev, Deleted),
    Revs = case Rev of
        <<>> ->
            [<<"1-", Hash/binary>>];
        _ ->
            {Gen, _} = parse_revision(Rev),
            NewRev = <<(integer_to_binary(Gen + 1))/binary, "-", Hash/binary>>,
            [NewRev, Rev]
    end,
    #{
        id => Id,
        ref => erlang:make_ref(),
        revs => Revs,
        deleted => Deleted,
        hash => Hash,
        attachments => Atts,
        doc => Doc1
    }.

%%====================================================================
%% ID Generation
%%====================================================================

%% @doc Generate a unique document ID
-spec generate_docid() -> docid().
generate_docid() ->
    %% Use a combination of timestamp and random bytes
    Now = erlang:system_time(microsecond),
    Random = crypto:strong_rand_bytes(8),
    Data = <<Now:64, Random/binary>>,
    to_hex(crypto:hash(md5, Data)).

%%====================================================================
%% Internal Functions
%%====================================================================

%% @doc Convert binary to lowercase hex string
-spec to_hex(binary()) -> binary().
to_hex(Bin) ->
    << <<(hex_char(N))>> || <<N:4>> <= Bin >>.

hex_char(N) when N < 10 -> $0 + N;
hex_char(N) -> $a + N - 10.

%%====================================================================
%% CBOR Document Construction & Import
%%====================================================================

%% @doc Create empty indexed document
-spec new() -> cbor_doc().
new() ->
    barrel_docdb_codec_cbor:encode(#{}).

%% @doc Create indexed document from Erlang map
-spec new(map()) -> cbor_doc().
new(Map) when is_map(Map) ->
    barrel_docdb_codec_cbor:encode(Map).

%% @doc Import from Erlang map (alias for new/1)
-spec from_map(map()) -> cbor_doc().
from_map(Map) ->
    new(Map).

%% @doc Import from JSON binary
-spec from_json(binary()) -> cbor_doc().
from_json(JsonBin) when is_binary(JsonBin) ->
    Map = json:decode(JsonBin),
    new(Map).

%% @doc Import from plain CBOR (adds index if missing)
-spec from_cbor(binary()) -> cbor_doc().
from_cbor(CborBin) when is_binary(CborBin) ->
    normalize(CborBin).

%%====================================================================
%% CBOR Document Export
%%====================================================================

%% @doc Export to Erlang map (full decode)
%% Handles both indexed CBOR and plain CBOR
-spec to_map(doc_input()) -> map().
to_map(Doc) when is_map(Doc) ->
    Doc;
to_map(Doc) when is_binary(Doc) ->
    barrel_docdb_codec_cbor:decode_any(Doc).

%% @doc Export to JSON binary
-spec to_json(doc_input()) -> binary().
to_json(Doc) when is_map(Doc) ->
    iolist_to_binary(json:encode(Doc));
to_json(<<"CB", _/binary>> = Doc) ->
    %% Indexed CBOR - use optimized to_json
    barrel_docdb_codec_cbor:to_json(Doc);
to_json(Doc) when is_binary(Doc) ->
    %% Plain CBOR - decode then encode as JSON
    iolist_to_binary(json:encode(barrel_docdb_codec_cbor:decode_cbor(Doc))).

%% @doc Export to plain CBOR (without index)
%% Use when sending to external clients
-spec to_cbor(doc_input()) -> binary().
to_cbor(Doc) when is_map(Doc) ->
    barrel_docdb_codec_cbor:encode_cbor(Doc);
to_cbor(<<"CB", _/binary>> = Doc) ->
    %% Indexed CBOR - extract payload
    barrel_docdb_codec_cbor:payload(Doc);
to_cbor(Doc) when is_binary(Doc) ->
    %% Already plain CBOR
    Doc.

%%====================================================================
%% CBOR Map-like API
%%====================================================================

%% @doc Get value at path (lazy - uses index when available)
-spec get(doc_input(), path()) -> term() | undefined.
get(Doc, Path) ->
    get(Doc, Path, undefined).

%% @doc Get value at path with default
-spec get(doc_input(), path(), term()) -> term().
get(Doc, Path, Default) when is_map(Doc) ->
    get_from_map(Doc, Path, Default);
get(Doc, Path, Default) when is_binary(Doc) ->
    barrel_docdb_codec_cbor:get(Doc, Path, Default).

%% @doc Set value at path (returns indexed binary)
-spec set(doc_input(), path(), term()) -> cbor_doc().
set(Doc, Path, Value) when is_map(Doc) ->
    NewMap = set_in_map(Doc, Path, Value),
    barrel_docdb_codec_cbor:encode(NewMap);
set(Doc, Path, Value) when is_binary(Doc) ->
    barrel_docdb_codec_cbor:set(Doc, Path, Value).

%% @doc Check if key exists at top level (uses index, no decode)
-spec is_key(doc_input(), binary()) -> boolean().
is_key(Doc, Key) when is_map(Doc) ->
    maps:is_key(Key, Doc);
is_key(Doc, Key) when is_binary(Doc) ->
    barrel_docdb_codec_cbor:is_key(Doc, Key).

%% @doc Get all top-level keys (uses index when available)
-spec keys(doc_input()) -> [binary()].
keys(Doc) when is_map(Doc) ->
    maps:keys(Doc);
keys(Doc) when is_binary(Doc) ->
    barrel_docdb_codec_cbor:keys(Doc).

%% @doc Get number of top-level entries
-spec size(doc_input()) -> non_neg_integer().
size(Doc) when is_map(Doc) ->
    maps:size(Doc);
size(Doc) when is_binary(Doc) ->
    barrel_docdb_codec_cbor:size(Doc).

%% @doc Remove a key from the document (returns indexed binary)
-spec remove(doc_input(), binary()) -> cbor_doc().
remove(Doc, Key) when is_map(Doc) ->
    barrel_docdb_codec_cbor:encode(maps:remove(Key, Doc));
remove(Doc, Key) when is_binary(Doc) ->
    Map = to_map(Doc),
    barrel_docdb_codec_cbor:encode(maps:remove(Key, Map)).

%% @doc Update value at path using a function (returns indexed binary)
%% Fun is called with current value (or undefined if not present)
-spec update(doc_input(), path(), fun((term()) -> term())) -> cbor_doc().
update(Doc, Path, Fun) when is_function(Fun, 1) ->
    CurrentValue = get(Doc, Path, undefined),
    NewValue = Fun(CurrentValue),
    set(Doc, Path, NewValue).

%% @doc Find a key (like maps:find/2)
%% Returns {ok, Value} if found, error if not
-spec find(doc_input(), binary()) -> {ok, term()} | error.
find(Doc, Key) when is_map(Doc) ->
    maps:find(Key, Doc);
find(Doc, Key) when is_binary(Doc) ->
    case get(Doc, [Key], '$barrel_not_found$') of
        '$barrel_not_found$' -> error;
        Value -> {ok, Value}
    end.

%% @doc Get all values (top-level only)
-spec values(doc_input()) -> [term()].
values(Doc) when is_map(Doc) ->
    maps:values(Doc);
values(Doc) when is_binary(Doc) ->
    maps:values(to_map(Doc)).

%% @doc Convert to list of {Key, Value} tuples (top-level only)
-spec to_list(doc_input()) -> [{binary(), term()}].
to_list(Doc) when is_map(Doc) ->
    maps:to_list(Doc);
to_list(Doc) when is_binary(Doc) ->
    maps:to_list(to_map(Doc)).

%% @doc Remove and return value at key
%% Returns {Value, UpdatedDoc} or error if key not found
-spec take(doc_input(), binary()) -> {term(), cbor_doc()} | error.
take(Doc, Key) ->
    Map = to_map(Doc),
    case maps:take(Key, Map) of
        {Value, NewMap} ->
            {Value, barrel_docdb_codec_cbor:encode(NewMap)};
        error ->
            error
    end.

%% @doc Merge two documents (second overwrites first)
%% Returns indexed binary
-spec merge(doc_input(), doc_input()) -> cbor_doc().
merge(Doc1, Doc2) ->
    Map1 = to_map(Doc1),
    Map2 = to_map(Doc2),
    barrel_docdb_codec_cbor:encode(maps:merge(Map1, Map2)).

%% @doc Fold over top-level key-value pairs
-spec fold(fun((binary(), term(), Acc) -> Acc), Acc, doc_input()) -> Acc.
fold(Fun, Init, Doc) when is_map(Doc) ->
    maps:fold(Fun, Init, Doc);
fold(Fun, Init, Doc) when is_binary(Doc) ->
    maps:fold(Fun, Init, to_map(Doc)).

%% @doc Map a function over all top-level values
%% Returns indexed binary
-spec map(fun((binary(), term()) -> term()), doc_input()) -> cbor_doc().
map(Fun, Doc) when is_function(Fun, 2) ->
    Map = to_map(Doc),
    NewMap = maps:map(Fun, Map),
    barrel_docdb_codec_cbor:encode(NewMap).

%% @doc Filter key-value pairs using a predicate
%% Returns indexed binary
-spec filter(fun((binary(), term()) -> boolean()), doc_input()) -> cbor_doc().
filter(Pred, Doc) when is_function(Pred, 2) ->
    Map = to_map(Doc),
    NewMap = maps:filter(Pred, Map),
    barrel_docdb_codec_cbor:encode(NewMap).

%%====================================================================
%% Index Utilities
%%====================================================================

%% @doc Check if binary has barrel index (starts with "CB" magic)
-spec is_indexed(binary() | map()) -> boolean().
is_indexed(<<"CB", _/binary>>) -> true;
is_indexed(_) -> false.

%% @doc Normalize any input to indexed binary (for storage)
%% Auto-detects: map | indexed binary | plain CBOR
-spec normalize(doc_input()) -> cbor_doc().
normalize(Doc) when is_map(Doc) ->
    barrel_docdb_codec_cbor:encode(Doc);
normalize(Doc) when is_binary(Doc) ->
    case is_indexed(Doc) of
        true -> Doc;
        false ->
            %% Plain CBOR -> decode and re-encode with index
            Map = barrel_docdb_codec_cbor:decode_cbor(Doc),
            barrel_docdb_codec_cbor:encode(Map)
    end.

%% @doc Alias for normalize/1
-spec ensure_indexed(doc_input()) -> cbor_doc().
ensure_indexed(Doc) ->
    normalize(Doc).

%%====================================================================
%% Internal: Map Path Operations
%%====================================================================

%% @private Get value from nested map by path
get_from_map(Map, [], _Default) ->
    Map;
get_from_map(Map, [Key | Rest], Default) when is_map(Map) ->
    case maps:find(Key, Map) of
        {ok, Value} -> get_from_map(Value, Rest, Default);
        error -> Default
    end;
get_from_map(List, [Index | Rest], Default) when is_list(List), is_integer(Index) ->
    case Index >= 0 andalso Index < length(List) of
        true -> get_from_map(lists:nth(Index + 1, List), Rest, Default);
        false -> Default
    end;
get_from_map(_, _, Default) ->
    Default.

%% @private Set value in nested map by path
set_in_map(Map, [Key], Value) when is_map(Map) ->
    Map#{Key => Value};
set_in_map(Map, [Key | Rest], Value) when is_map(Map) ->
    SubMap = maps:get(Key, Map, #{}),
    Map#{Key => set_in_map(SubMap, Rest, Value)};
set_in_map(_, [], _Value) ->
    error(empty_path).