Skip to main content

src/livery_s3_xml.erl

%% SPDX-License-Identifier: Apache-2.0
%% Copyright 2026 Benoit Chesneau
-module(livery_s3_xml).
-moduledoc """
Minimal XML parsing for S3 responses.

S3 returns XML for bucket/object listings, versioning, multipart, batch delete,
and error bodies. Rather than pull in the `xmerl` record headers, this parses
into a light `{Tag, Attrs, Children}` tree via `xmerl_sax_parser` and offers a
few navigation helpers (`child/2`, `children/2`, `text/2`). Tags are local
names as binaries; namespaces are dropped (S3 uses a single default namespace).
""".

-export([parse/1]).
-export([child/2, children/2, text/2, node_text/1]).

-export_type([tree/0, node_/0]).

-type tree() :: {binary(), [{binary(), binary()}], [node_()]}.
-type node_() :: tree() | {text, binary()}.

%%====================================================================
%% Parsing
%%====================================================================

-doc "Parse an XML binary into the root element tree.".
-spec parse(binary()) -> {ok, tree()} | {error, term()}.
parse(Bin) when is_binary(Bin) ->
    Opts = [{event_fun, fun event/3}, {event_state, {[], undefined}}],
    try xmerl_sax_parser:stream(Bin, Opts) of
        {ok, {[], Root}, _Rest} when Root =/= undefined -> {ok, Root};
        {ok, {_, undefined}, _Rest} -> {error, empty_document};
        Other -> {error, {xml_parse, Other}}
    catch
        Class:Reason -> {error, {xml_parse, Class, Reason}}
    end.

%% Event state is {Stack, Root}: Stack holds open elements (children reversed),
%% Root is the completed top element once the document closes.
-spec event(term(), term(), State) -> State when State :: {[tree()], undefined | tree()}.
event({startElement, _Uri, Local, _QName, Attrs}, _Loc, {Stack, Root}) ->
    {[{to_bin(Local), conv_attrs(Attrs), []} | Stack], Root};
event({characters, Chars}, _Loc, {[{T, A, Ch} | Rest], Root}) ->
    {[{T, A, [{text, to_bin(Chars)} | Ch]} | Rest], Root};
event({endElement, _Uri, _Local, _QName}, _Loc, {[{T, A, Ch} | Rest], Root}) ->
    Node = {T, A, lists:reverse(Ch)},
    case Rest of
        [] -> {[], Node};
        [{Pt, Pa, Pch} | More] -> {[{Pt, Pa, [Node | Pch]} | More], Root}
    end;
event(_Event, _Loc, State) ->
    State.

-spec conv_attrs(list()) -> [{binary(), binary()}].
conv_attrs(Attrs) ->
    [{to_bin(Name), to_bin(Value)} || {_Uri, _Prefix, Name, Value} <- Attrs].

-spec to_bin(binary() | list()) -> binary().
to_bin(B) when is_binary(B) -> B;
to_bin(L) when is_list(L) -> unicode:characters_to_binary(L).

%%====================================================================
%% Navigation
%%====================================================================

-doc "First child element named `Name`, or `undefined`.".
-spec child(tree(), binary()) -> tree() | undefined.
child(Tree, Name) ->
    case [C || {T, _, _} = C <- element_children(Tree), T =:= Name] of
        [First | _] -> First;
        [] -> undefined
    end.

-doc "All child elements named `Name`.".
-spec children(tree(), binary()) -> [tree()].
children(Tree, Name) ->
    [C || {T, _, _} = C <- element_children(Tree), T =:= Name].

-doc "Text content of the first child named `Name`, or `undefined`.".
-spec text(tree(), binary()) -> binary() | undefined.
text(Tree, Name) ->
    case child(Tree, Name) of
        undefined -> undefined;
        Node -> node_text(Node)
    end.

-doc "Concatenated direct text content of an element.".
-spec node_text(tree()) -> binary().
node_text({_, _, Ch}) ->
    iolist_to_binary([B || {text, B} <- Ch]).

-spec element_children(tree()) -> [tree()].
element_children({_, _, Ch}) ->
    [C || C <- Ch, is_tuple(C), tuple_size(C) =:= 3].