src/xmltree.erl

%%% vim:ts=2:sw=2:et
%%%-----------------------------------------------------------------------------
%%% @doc Parse XML into a hierarchical Erlang term
%%% ```
%%% % Example xml:
%%%
%%%  <?xml version="1.0" encoding="UTF-8" ?>
%%%  <root id="1">
%%%    <ele id="2"/>
%%%    <ele id="3">vvv\nxxx\n</ele>
%%%  </root>
%%%
%%%
%%% # Usage example
%%% 1> xmltree:file(L).
%%% {root,[{id,<<"1">>}],
%%%      [{ele,[{id,<<"2">>}],[]},
%%%       {ele,[{id,<<"3">>}],<<"vvv\nxxx\n">>}]}
%%%
%%% 2> Rules = {root, [{id,integer}], [{ele, [{id,integer}], string}]},
%%% 2> xmltree:string(L, Rules).
%%% {root,[{id,1}],
%%%    [{ele,[{id,2}],[]},{ele,[{id,3}],"vvv\nxxx\n"}]}
%%% '''
%%% @author Serge Aleynikov <saleyn@gmail.com>
%%% @end
%%%-----------------------------------------------------------------------------
%%% Date:   2015-12-10
%%%-----------------------------------------------------------------------------
%%% Copyright (c) 2015 Serge Aleynikov
%%%-----------------------------------------------------------------------------
-module(xmltree).
-export([file/1, file/2, string/1, string/2, xml/1, xml/2]).

-include_lib("xmerl/include/xmerl.hrl").

file(Filename) when is_list(Filename) ->
  {Xml, _} = xmerl_scan:file(Filename),
  xml(Xml).
file(Filename, RulesFile) when is_list(Filename), is_list(RulesFile) ->
  {Xml, _} = xmerl_scan:file(Filename),
  case file:consult(RulesFile) of
    {ok, [Rules]} -> xml(Xml, Rules);
    {ok,  Rules } -> xml(Xml, Rules)
  end;
file(Filename, Rules) when is_list(Filename), is_tuple(Rules) ->
  {Xml, _} = xmerl_scan:file(Filename),
  xml(Xml, Rules).

string(XmlS) when is_list(XmlS) ->
  {Xml, _} = xmerl_scan:string(XmlS),
  xml(Xml).
string(XmlS, Rules) when is_list(XmlS), is_tuple(Rules) ->
  {Xml, _} = xmerl_scan:string(XmlS),
  xml(Xml, Rules).


xml(#xmlElement{name = N, attributes = A, content = C}) ->
  {N, process_attributes(A), xml(C)};
xml([#xmlElement{} = E | T]) ->
  [xml(E) | xml(T)];
xml([#xmlComment{} | T]) ->
  xml(T);
xml([#xmlText{value = V} | T]) ->
  case [C || C <- V, not lists:member(C, "\n ")] of
    [] -> xml(T);
    _  -> [unicode:characters_to_binary(V, utf8) | xml(T)]
  end;
xml([]) ->
  [].

xml(#xmlElement{name = N, attributes = A, content = C}, {N, AttrRules, ChildRules}) ->
  {N, process_attributes(A, AttrRules), xml(C, ChildRules)};
xml([#xmlElement{name = N} = E | T], ChildRules) ->
  case lists:keyfind(N, 1, ChildRules) of
    false          -> throw({no_rule_for_element, N, E});
    {F, A}         -> [xml(E, {F,A,[]}) | xml(T, ChildRules)];
    {_,_,_} = Rule -> [xml(E, Rule)     | xml(T, ChildRules)]
  end;
xml([#xmlComment{} | T], ChildRules) ->
  xml(T, ChildRules);
xml([#xmlText{value = V} | T], ChildRules) ->
  case [C || C <- V, not lists:member(C, "\n ")] of
    [] -> xml(T, ChildRules);
    _  -> [process_value(V, ChildRules) | xml(T, ChildRules)]
  end;
xml([], _) ->
  [].

process_attributes([#xmlAttribute{name=N, value=V} | T]) ->
  [{N, unicode:characters_to_binary(V, utf8)} | process_attributes(T)];
process_attributes([]) ->
  [].

process_attributes([#xmlAttribute{name=N, value=V} | T], Rules) ->
  [{N, process_value(V, proplists:get_value(N, Rules))} | process_attributes(T, Rules)];
process_attributes([], _) ->
  [].


process_value(Value,   Fun) when is_function(Fun ,1) -> Fun(Value);
process_value(Value,   atom)    -> list_to_atom   (Value);
process_value("Y",     boolean) -> true;
process_value("N",     boolean) -> false;
process_value(Value,   boolean) -> A = list_to_existing_atom(Value),
                                   if is_boolean(A) -> A;
                                      true          -> throw({value_is_not_boolean, Value})
                                   end;
process_value(Value,   integer) -> list_to_integer(Value);
process_value(Value,   float)   -> list_to_float  (Value);
process_value(Value,   binary)  -> list_to_binary (Value);
process_value(Value,   string)  -> Value;
process_value(Value,   _)       -> unicode:characters_to_binary(Value, utf8).