src/yaccety_sax.erl

-module(yaccety_sax).

-include("yaccety_sax.hrl").

%% ====================================================================
%% API functions
%% ====================================================================

-export([
    event_characters/5,
    event_comment/2,
    event_dtd/2,
    event_endDocument/1,
    event_endElement/2,
    event_processingInstruction/3,
    event_startDocument/6,
    event_startElement/4
]).

-export([file/1, file/2]).
-export([stream/1, stream/2]).

-export([write_event/2]).

-export([next_event/1]).

-type parser_state() :: #ys_state{}.
-type ext_parser_state() :: parser_state().

-export_type([ext_parser_state/0]).

-type options() :: [
    %% report whitespace character events?
    {whitespace, boolean()}
    %% report comment events?
    | {comments, boolean()}
    %% report processing-instruction events?
    | {proc_inst, boolean()}
    %% base-uri of this stream
    | {base, binary()}
    %% pre-processed dtd to use for this stream
    | {dtd, processed_dtd()}
    %% Continuation fun and State
    | {continuation, {Fun :: fun(), State :: any()}}
    %% External entity reader fun, if any
    | {external, Fun :: fun()}
].

-export_type([options/0]).

stream(Stream) -> stream(Stream, []).

stream(Stream, Opts) ->
    State = opts(Opts, #ys_state{}),
    State#ys_state{rest_stream = Stream}.

file(Filename) -> file(Filename, []).

file(Filename, Opts) when is_list(Filename) ->
    file(unicode:characters_to_binary(Filename), Opts);
file(Filename, Opts) ->
    State = opts(Opts, #ys_state{}),
    Base = filename:dirname(filename:absname(Filename)),
    case State#ys_state.continuation of
        undefined ->
            State#ys_state{
                continuation = ys_utils:default_file_continuation(Filename),
                base = Base
            };
        _ ->
            State#ys_state{base = Base}
    end.

opts([{continuation, {F, S}} | T], Acc) when is_function(F, 1) ->
    opts(T, Acc#ys_state{continuation = {F, S}});
opts([{external, F} | T], Acc) when is_function(F, 2) ->
    opts(T, Acc#ys_state{external = F});
opts([{base, Base} | T], Acc) when is_list(Base) ->
    opts(T, Acc#ys_state{base = unicode:characters_to_binary(Base)});
opts([{base, Base} | T], Acc) when is_binary(Base) ->
    opts(T, Acc#ys_state{base = Base});
opts([{dtd, DTD} | T], Acc) when is_map(DTD) ->
    opts(T, Acc#ys_state{dtd = DTD});
opts([{proc_inst, Bool} | T], Acc) when is_boolean(Bool) ->
    opts(T, Acc#ys_state{proc_inst = Bool});
opts([{comments, Bool} | T], Acc) when is_boolean(Bool) ->
    opts(T, Acc#ys_state{comments = Bool});
opts([{whitespace, Bool} | T], Acc) when is_boolean(Bool) ->
    opts(T, Acc#ys_state{whitespace = Bool});
opts([{namespace_aware, Bool} | T], Acc) when is_boolean(Bool) ->
    opts(T, Acc#ys_state{namespace_aware = Bool});
opts([], Acc) ->
    Acc;
opts([H | _], _) ->
    fatal_error(unknown_option, H).

-spec next_event(State) -> {Event, State} when
    State :: ext_parser_state(), Event :: xml_event() | {error, no_event}.
%% Returns the next event or error if no event in the stream.
next_event(#ys_state{position = []}) ->
    {error, no_event};
next_event(#ys_state{position = [Position | Ps], rest_stream = Stream} = State) ->
    case Position of
        ?content ->
            case ys_parse:parse_content(Stream, State) of
                {no_bytes, State1} ->
                    fatal_error(illegal_data, {Stream, State1});
                {Event, State1} ->
                    {Event, State1};
                State1 ->
                    next_event(State1)
            end;
        ?empty ->
            [T | Ts] = State#ys_state.tags,
            event_endElement(T, State#ys_state{position = Ps, tags = Ts});
        ?document ->
            % sets position to misc_pre_dtd
            ys_parse:parse_XMLDecl(Stream, State);
        ?misc_pre_dtd ->
            case ys_parse:parse_Misc(Stream, State) of
                {no_bytes, State1} ->
                    fatal_error(illegal_data, {Stream, State1});
                {Event, State1} ->
                    {Event, State1};
                State1 ->
                    next_event(State1#ys_state{position = [?dtd | Ps]})
            end;
        ?dtd ->
            case ys_parse:parse_doctypedecl(Stream, State) of
                {Event, State1} ->
                    {Event, State1#ys_state{position = [?misc_post_dtd | Ps]}};
                State1 ->
                    next_event(State1#ys_state{position = [?element | Ps]})
            end;
        ?misc_post_dtd ->
            case ys_parse:parse_Misc(Stream, State) of
                {no_bytes, State1} ->
                    fatal_error(missing_element, {Stream, State1});
                {Event, State1} ->
                    {Event, State1};
                State1 ->
                    next_event(State1#ys_state{position = [?element | Ps]})
            end;
        ?element ->
            ys_parse:parse_element(Stream, State);
        ?misc_post_element ->
            case ys_parse:parse_Misc(Stream, State) of
                {no_bytes, State1} ->
                    event_endDocument(State1#ys_state{rest_stream = <<>>});
                {Event, State1} ->
                    {Event, State1};
                State1 ->
                    fatal_error(illegal_data, {Stream, State1})
            end
    end.

%% ====================================================================
%% Events
%% ====================================================================

-spec event_startDocument(
    Version,
    Encoding,
    EncSet,
    StandAlone,
    StandSet,
    State
) -> {Event, State} when
    Version :: binary(),
    Encoding :: binary(),
    EncSet :: boolean(),
    StandAlone :: boolean(),
    StandSet :: boolean(),
    State :: ext_parser_state(),
    Event :: xml_startDocument().
event_startDocument(
    Version,
    Encoding,
    EncSet,
    StandAlone,
    StandSet,
    #ys_state{line = Line} = State
) ->
    Event = #{
        type => startDocument,
        line => Line,
        version => Version,
        encoding => Encoding,
        enc_set => EncSet,
        standalone => StandAlone,
        sa_set => StandSet
    },
    {Event, State#ys_state{standalone = StandAlone}}.

-spec event_endDocument(State) -> {Event, State} when
    State :: ext_parser_state(), Event :: xml_endDocument().
event_endDocument(#ys_state{line = Line} = State) ->
    Event = #{
        type => endDocument,
        line => Line
    },
    {Event, State}.

-spec event_dtd(
    Processed :: processed_dtd(),
    State :: ext_parser_state()
) -> {xml_dtd(), ext_parser_state()}.
event_dtd(Processed, #ys_state{line = Line} = State) ->
    Event = #{
        type => dtd,
        line => Line,
        proc => Processed
    },
    {Event, State}.

-spec event_startElement(QName, Attributes, Namespaces, State) -> {Event, State} when
    QName :: qname(),
    Attributes :: list(xml_attribute()),
    Namespaces :: list(xml_namespace()),
    State :: ext_parser_state(),
    Event :: xml_startElement().
event_startElement(QName, Attributes, Namespaces, #ys_state{line = Line} = State) ->
    Event = #{
        type => startElement,
        line => Line,
        qname => QName,
        attributes => Attributes,
        namespaces => Namespaces
    },
    {Event, State}.

-spec event_endElement(QName, State) -> {Event, State} when
    QName :: qname(), State :: ext_parser_state(), Event :: xml_endElement().
event_endElement(
    QName,
    #ys_state{
        line = Line
    } = State
) ->
    Event = #{
        type => endElement,
        line => Line,
        qname => QName
    },
    {Event, State}.

-spec event_processingInstruction(Target, Data, State) -> {Event, State} when
    Target :: binary(),
    Data :: binary(),
    State :: ext_parser_state(),
    Event :: xml_processingInstruction().
event_processingInstruction(Target, Data, #ys_state{line = Line} = State) ->
    Event = #{
        type => processingInstruction,
        line => Line,
        target => Target,
        data => Data
    },
    {Event, State}.

-spec event_characters(Data, CData, Ignorable, IsWs, State) -> {Event, State} | State when
    Data :: binary(),
    CData :: boolean(),
    Ignorable :: boolean(),
    IsWs :: boolean(),
    State :: ext_parser_state(),
    Event :: xml_characters().
event_characters(<<>>, _CData, _Ignorable, _, State) ->
    State;
event_characters(Data, CData, Ignorable, IsWs, #ys_state{line = Line} = State) ->
    Event = #{
        type => characters,
        line => Line,
        data => Data,
        cdata => CData,
        ignore => Ignorable,
        ws => IsWs
    },
    {Event, State}.

-spec event_comment(Text, State) -> {Event, State} when
    Text :: binary(), State :: ext_parser_state(), Event :: xml_comment().
event_comment(Text, #ys_state{line = Line} = State) ->
    Event = #{
        type => comment,
        line => Line,
        text => Text
    },
    {Event, State}.

write_event(#{type := startDocument}, {Bytes, State}) ->
    % XXX this should add version, standalone, encoding
    {Bytes, State};
write_event(#{type := endDocument}, {Bytes, State}) ->
    {Bytes, State};
write_event(#{type := characters, data := Data}, {Bytes, State}) ->
    % XXX should normalize text, maybe CDATA wrap
    {<<Bytes/binary, Data/binary>>, State};
write_event(#{type := endElement, qname := {_, Px, Ln}}, {Bytes, State}) ->
    case Px of
        <<>> -> {<<Bytes/binary, "</", Ln/binary, ">">>, State};
        _ -> {<<Bytes/binary, "</", Px/binary, ":", Ln/binary, ">">>, State}
    end;
write_event(
    #{type := startElement, namespaces := Nss, attributes := Atts, qname := QName}, {Bytes, State}
) ->
    NmFun = fun
        ({_, <<>>, Ln}) ->
            Ln;
        ({_, Px, Ln}) ->
            [Px, <<":">>, Ln]
    end,
    Name = NmFun(QName),
    NsFun = fun
        ({NsUri, <<>>}) ->
            [<<" xmlns=\"">>, NsUri, <<"\"">>];
        ({NsUri, NsPx}) ->
            [<<" xmlns:">>, NsPx, <<"=\"">>, NsUri, <<"\"">>]
    end,
    AtFun = fun({AQName, AValue}) ->
        AQName1 = NmFun(AQName),
        [<<" ">>, AQName1, <<"=\"">>, AValue, <<"\"">>]
    end,
    IoList = ["<", Name, [NsFun(N) || N <- Nss], [AtFun(A) || A <- Atts], ">"],
    Out = iolist_to_binary(IoList),
    {<<Bytes/binary, Out/binary>>, State};
% TODO processing-instructions, comments
write_event(_Event, {Bytes, State}) ->
    {Bytes, State}.

fatal_error(Reason, State) ->
    error(Reason, [State]).