lib/sweet_xml.ex

defmodule SweetXpath do
  @moduledoc false

  defmodule Priv do
    @moduledoc false
    @doc false
    def self_val(val), do: val
  end

  defstruct path: ".",
    is_value: true,
    is_list: false,
    is_keyword: false,
    is_optional: false,
    cast_to: false,
    transform_fun: &(Priv.self_val/1),
    namespaces: []
end

defmodule SweetXml.XmerlFatal do
  @moduledoc """
  An error raised when xmerl exits with a fatal error.
  """
  defexception [:message, :reason, :file, :line, :col]

  @impl Exception
  def exception({reason, {:file, file}, {:line, line}, {:col ,col}}) do
    %__MODULE__{reason: reason, file: file, line: line, col: col, message: inspect(reason)}
  end
end

defmodule SweetXml.DTDError do
  @moduledoc """
  An error raised when a non allowed DTD is encountered.
  """
  defexception [:message]
end


defmodule SweetXml do
  @moduledoc ~S"""
  `SweetXml` is a thin wrapper around `:xmerl`. It allows you to convert a
  string or xmlElement record as defined in `:xmerl` to an elixir value such
  as `map`, `list`, `char_list`, or any combination of these.

  For normal sized documents, `SweetXml` primarily exposes 3 functions

    * `SweetXml.xpath/2` - return a value based on the xpath expression
    * `SweetXml.xpath/3` - similar to above but allowing nesting of mapping
    * `SweetXml.xmap/2` - return a map with keywords mapped to values returned
      from xpath

  For something larger, `SweetXml` mainly exposes 1 function

    * `SweetXml.stream_tags/3` - stream a given tag or a list of tags, and
      optionally "discard" some dom elements in order to free memory during
      streaming for big files which cannot fit entirely in memory

  ## Examples

  Simple Xpath:

      iex> import SweetXml
      iex> doc = "<h1><a>Some linked title</a></h1>"
      iex> doc |> xpath(~x"//a/text()")
      'Some linked title'

  Nested Mapping:

      iex> import SweetXml
      iex> doc = "<body><header><p>Message</p><ul><li>One</li><li><a>Two</a></li></ul></header></body>"
      iex> doc |> xpath(~x"//header", message: ~x"./p/text()", a_in_li: ~x".//li/a/text()"l)
      %{a_in_li: ['Two'], message: 'Message'}

  Streaming:

      iex> import SweetXml
      iex> doc = ["<ul><li>l1</li><li>l2", "</li><li>l3</li></ul>"]
      iex> SweetXml.stream_tags(doc, :li)
      ...> |> Stream.map(fn {:li, doc} ->
      ...>      doc |> SweetXml.xpath(~x"./text()")
      ...>    end)
      ...> |> Enum.to_list
      ['l1', 'l2', 'l3']

  For more examples please see help for each individual functions

  ## The ~x Sigil

  Warning ! Because we use `xmerl` internally, only XPath 1.0 paths are handled.

  Notice in the above examples, we used the expression `~x"//a/text()"` to
  define the path. The reason is it allows us to more precisely specify what
  is being returned.

    * `~x"//some/path"`

      without any modifiers, `xpath/2` will return the value of the entity if
      the entity is of type `xmlText`, `xmlAttribute`, `xmlPI`, `xmlComment`
      as defined in `:xmerl`

    * `~x"//some/path"e`

      `e` stands for (e)ntity. This forces `xpath/2` to return the entity with
      which you can further chain your `xpath/2` call

    * `~x"//some/path"l`

      'l' stands for (l)ist. This forces `xpath/2` to return a list. Without
      `l`, `xpath/2` will only return the first element of the match

    * `~x"//some/path"el` - mix of the above

    * `~x"//some/path"k`

      'k' stands for (K)eyword. This forces `xpath/2` to return a Keyword instead of a Map.

    * `~x"//some/path"s`

      's' stands for (s)tring. This forces `xpath/2` to return the value as
      string instead of a char list.

    * `x"//some/path"o`

      'o' stands for (O)ptional. This allows the path to not exist, and will return nil.

    * `~x"//some/path"sl` - string list.

  Notice also in the examples section, we always import SweetXml first. This
  makes `x_sigil` available in the current scope. Without it, instead of using
  `~x`, you can do the following

      iex> doc = "<h1><a>Some linked title</a></h1>"
      iex> doc |> SweetXml.xpath(%SweetXpath{path: '//a/text()', is_value: true, cast_to: false, is_list: false, is_keyword: false})
      'Some linked title'

  Note the use of char_list in the path definition.
  """

  require Record
  @doc false
  Record.defrecord :xmlDecl, Record.extract(:xmlDecl, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlAttribute, Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlNamespace, Record.extract(:xmlNamespace, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlNsNode, Record.extract(:xmlNsNode, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlElement, Record.extract(:xmlElement, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlText, Record.extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlComment, Record.extract(:xmlComment, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlPI, Record.extract(:xmlPI, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlDocument, Record.extract(:xmlDocument, from_lib: "xmerl/include/xmerl.hrl")
  @doc false
  Record.defrecord :xmlObj, Record.extract(:xmlObj, from_lib: "xmerl/include/xmerl.hrl")

  @type doc :: (iodata | String.t | Enum.t)
  @type spec :: %SweetXpath{}
  @type xmlElement :: record(:xmlElement)

  @doc ~s"""
  `sigil_x/2` simply returns a `%SweetXpath{}` struct, with modifiers converted to
  boolean fields:

      iex> SweetXml.sigil_x("//some/path", 'e')
      %SweetXpath{path: '//some/path', is_value: false, cast_to: false, is_list: false, is_keyword: false}

  Or you can simply import and use the `~x` expression:

      iex> import SweetXml
      iex> ~x"//some/path"e
      %SweetXpath{path: '//some/path', is_value: false, cast_to: false, is_list: false, is_keyword: false}

  Valid modifiers are `e`, `s`, `l` and `k`. Below is the full explanation

    * `~x"//some/path"`

      without any modifiers, `xpath/2` will return the value of the entity if
      the entity is of type `xmlText`, `xmlAttribute`, `xmlPI`, `xmlComment`
      as defined in `:xmerl`

    * `~x"//some/path"e`

      `e` stands for (e)ntity. This forces `xpath/2` to return the entity with
      which you can further chain your `xpath/2` call

    * `~x"//some/path"l`

      'l' stands for (l)ist. This forces `xpath/2` to return a list. Without
      `l`, `xpath/2` will only return the first element of the match

    * `~x"//some/path"el` - mix of the above

    * `~x"//some/path"k`

      'k' stands for (K)eyword. This forces `xpath/2` to return a Keyword instead of a Map.

    * `~x"//some/path"s`

      's' stands for (s)tring. This forces `xpath/2` to return the value as
      string instead of a char list.

    * `x"//some/path"o`

      'o' stands for (O)ptional. This allows the path to not exist, and will return nil.

    * `~x"//some/path"sl` - string list.

    * `~x"//some/path"i`

      'i' stands for (i)nteger. This forces `xpath/2` to return the value as
      integer instead of a char list.

    * `~x"//some/path"f`

      'f' stands for (f)loat. This forces `xpath/2` to return the value as
      float instead of a char list.

    * `~x"//some/path"il` - integer list
  """
  def sigil_x(path, modifiers \\ '') do
    %SweetXpath{
      path: String.to_charlist(path),
      is_value: not(?e in modifiers),
      is_list: ?l in modifiers,
      is_keyword: ?k in modifiers,
      is_optional: ?o in modifiers,
      cast_to: cond do
        ?s in modifiers -> :string
        ?S in modifiers -> :soft_string
        ?i in modifiers -> :integer
        ?I in modifiers -> :soft_integer
        ?f in modifiers -> :float
        ?F in modifiers -> :soft_float
        :otherwise -> false
      end
    }
  end

  def add_namespace(xpath, prefix, uri) do
    %SweetXpath{xpath | namespaces: [{to_charlist(prefix), to_charlist(uri)}
                                     | xpath.namespaces]}
  end

  @doc """
  Parse a document into a form ready to be used by `xpath/3` and `xmap/2`.

  `doc` can be

  - a byte list (iodata)
  - a binary
  - any enumerable of binaries (for instance `File.stream!/3` result)

  `options` can be both:
  * `xmerl`'s options as described on the [xmerl_scan](http://www.erlang.org/doc/man/xmerl_scan.html) documentation page,
    see [the erlang tutorial](http://www.erlang.org/doc/apps/xmerl/xmerl_examples.html) for some advanced usage.
      For example: `parse(doc, quiet: true)`
  * `:dtd` to prevent DTD parsing or fetching, with the following possibilities:
    * `:none`, will prevent both internal and external entities, it is the recommended options on untrusted XML.
      This will override the option `{:rules, read_fun, write_fun, state}` if present;
    * `:all`, the default, for backward compatibility, allows all DTDs;
    * `:internal_only`, will block all attempt at external fetching;
    * `[only: entities]` where `entities` is either an atom for a single entity, or a list of atoms.
      If any other entity is defined in the XML, `parse` will raise on them.
      This will override the option `{:rules, read_fun, write_fun, state}` if present.

  When `doc` is an enumerable, the `:cont_fun` option cannot be given.

  Returns an `xmlElement` record.
  """
  @spec parse(doc, opts :: list) :: xmlElement
  def parse(doc, opts \\ []) do
    {opts, do_after} = SweetXml.Options.set_up(opts, RuntimeError)

    try do
      do_parse(doc, opts)
    after
      _ = SweetXml.Options.clean_up(do_after)
    end
  end

  @doc false
  def do_parse(doc, options) when is_binary(doc) do
    doc |> :erlang.binary_to_list |> do_parse(options)
  end
  def do_parse([c | _] = doc, options) when is_integer(c) do
    {parsed_doc, _} = :xmerl_scan.string(doc, options)
    parsed_doc
  end
  def do_parse(doc_enum, options) do
    {parsed_doc, _} = :xmerl_scan.string('', options ++ continuation_opts(doc_enum))
    parsed_doc
  end

  @doc """
  > #### Soft Deprecation {: .warning}
  >
  > Will be later deprecated in favor of `stream_tags!/3`.

  Most common usage of streaming: stream a given tag or a list of tags, and
  optionally "discard" some DOM elements in order to free memory during streaming
  for big files which cannot fit entirely in memory.

  Note that each matched tag produces it's own tree. If a given tag appears in
  the discarded options, it is ignored.

  - `doc` is an enumerable, data will be pulled during the result stream
    enumeration. e.g. `File.stream!("some_file.xml")`
  - `tags` is an atom or a list of atoms you want to extract. Each stream element
    will be `{:tagname, xmlelem}`. e.g. :li, :header
  - `options[:discard]` is the list of tag which will be discarded:
     not added to its parent DOM.
  - More options details are available with `parse/2`.

  ## Examples

      iex> import SweetXml
      iex> doc = ["<ul><li>l1</li><li>l2", "</li><li>l3</li></ul>"]
      iex> SweetXml.stream_tags(doc, :li, discard: [:li])
      ...> |> Stream.map(fn {:li, doc} -> doc |> SweetXml.xpath(~x"./text()") end)
      ...> |> Enum.to_list
      ['l1', 'l2', 'l3']
      iex> SweetXml.stream_tags(doc, [:ul, :li])
      ...> |> Stream.map(fn {_, doc} -> doc |> SweetXml.xpath(~x"./text()") end)
      ...> |> Enum.to_list
      ['l1', 'l2', 'l3', nil]


  Be careful if you set `options[:discard]`. If any of the discarded tags is nested
  inside a kept tag, you will not be able to access them.

  ## Examples

      iex> import SweetXml
      iex> doc = ["<header>", "<title>XML</title", "><header><title>Nested</title></header></header>"]
      iex> SweetXml.stream_tags(doc, :header)
      ...> |> Stream.map(fn {_, doc} -> SweetXml.xpath(doc, ~x".//title/text()") end)
      ...> |> Enum.to_list
      ['Nested', 'XML']
      iex> SweetXml.stream_tags(doc, :header, discard: [:title])
      ...> |> Stream.map(fn {_, doc} -> SweetXml.xpath(doc, ~x"./title/text()") end)
      ...> |> Enum.to_list
      [nil, nil]

  """
  # TODO : deprecate on version 0.10.0 (0.7.0 + 3)
  def stream_tags(doc, tags, options \\ []) do
    tags = if is_atom(tags), do: [tags], else: tags

    {discard_tags, xmerl_options} = case :proplists.lookup(:discard, options) do
      {:discard, tags} -> {tags, :proplists.delete(:discard, options)}
      :none -> {[], options}
    end

    doc |> stream(fn emit ->
      [
        hook_fun: fn
          entity, xstate when Record.is_record(entity, :xmlElement) ->
            name = xmlElement(entity, :name)
            if length(tags) == 0 or name in tags do
              emit.({name, entity})
            end
            {entity, xstate}
          entity, xstate ->
            {entity, xstate}
        end,
        acc_fun: fn
          entity, acc, xstate when Record.is_record(entity, :xmlElement) ->
            if xmlElement(entity, :name) in discard_tags do
              {acc, xstate}
            else
              {[entity | acc], xstate}
            end
          entity, acc, xstate ->
            {[entity | acc], xstate}
        end
      ] ++ xmerl_options
    end)
  end

  @doc """
  Equivalent to `stream_tags/3`, see `stream_tags/3` for more details.
  The difference is in the handling of the errors. The caller can use `try/1`,
  whereas with `stream_tags/3` trapping exits and handling messages was necessary.
  May raise `SweetXml.XmerlFatal` or `SweetXml.DTDError`.
  """
  def stream_tags!(doc, tags, options \\ []) do
    tags = if is_atom(tags), do: [tags], else: tags

    {discard_tags, xmerl_options} = case :proplists.lookup(:discard, options) do
      {:discard, tags} -> {tags, :proplists.delete(:discard, options)}
      :none -> {[], options}
    end

    doc |> stream!(fn emit ->
      [
        hook_fun: fn
          entity, xstate when Record.is_record(entity, :xmlElement) ->
            name = xmlElement(entity, :name)
            if length(tags) == 0 or name in tags do
              emit.({name, entity})
            end
            {entity, xstate}
          entity, xstate ->
            {entity, xstate}
        end,
        acc_fun: fn
          entity, acc, xstate when Record.is_record(entity, :xmlElement) ->
            if xmlElement(entity, :name) in discard_tags do
              {acc, xstate}
            else
              {[entity | acc], xstate}
            end
          entity, acc, xstate ->
            {[entity | acc], xstate}
        end
      ] ++ xmerl_options
    end)
  end

  @doc """
  > #### Soft Deprecation {: .warning}
  >
  > Will be later deprecated in favor of `stream!/2`.

  Create an element stream from a XML `doc`.

  This is a lower level API compared to `SweetXml.stream_tags`. You can use
  the `options_callback` argument to get fine control of what data to be streamed.

  - `doc` is an enumerable, data will be pulled during the result stream
    enumeration. e.g. `File.stream!("some_file.xml")`
  - `options_callback` is an anonymous function `fn emit -> (xmerl_opts | opts)` use it to
    define your :xmerl callbacks and put data into the stream using
    `emit.(elem)` in the callbacks. More details are available with `parse/2`.

  For example, here you define a stream of all `xmlElement` :

      iex> import Record
      iex> doc = ["<h1", "><a>Som", "e linked title</a><a>other</a></h1>"]
      iex> SweetXml.stream(doc, fn emit ->
      ...>   [
      ...>     hook_fun: fn
      ...>       entity, xstate when is_record(entity, :xmlElement)->
      ...>         emit.(entity)
      ...>         {entity, xstate}
      ...>       entity, xstate ->
      ...>         {entity,xstate}
      ...>     end
      ...>   ]
      ...> end) |> Enum.count
      3
  """
  # TODO : deprecate on version 0.10.0 (0.7.0 + 3)
  def stream(doc, options_callback) when is_binary(doc) do
    stream([doc], options_callback)
  end
  def stream([c | _] = doc, options_callback) when is_integer(c) do
    stream([IO.iodata_to_binary(doc)], options_callback)
  end
  def stream(doc, options_callback) do
    Stream.resource fn ->
      {parent, ref} = waiter = {self(), make_ref()}
      opts = options_callback.(fn e -> send(parent, {:event, ref, e}) end)

      {opts, do_after} = SweetXml.Options.set_up(opts, RuntimeError)

      pid = spawn_link fn -> :xmerl_scan.string('', opts ++ continuation_opts(doc, waiter)) end
      {ref, pid, Process.monitor(pid), do_after}
    end, fn {ref, pid, monref, do_after} = acc ->
      receive do
        {:DOWN, ^monref, :process, ^pid, :normal} ->
          {:halt, {:parse_ended, do_after}} ## !!! maybe do something when reason !== :normal
        {:event, ^ref, event} ->
          {[event], acc}
        {:wait, ^ref} ->
          send(pid, {:continue, ref})
          {[], acc}
      end
    end, fn
      {:parse_ended, do_after} ->
      _ = SweetXml.Options.clean_up(do_after)
        :ok

      {ref, pid, monref, do_after} ->
        _ = Process.demonitor(monref)
        _ = SweetXml.Options.clean_up(do_after)
        flush_halt(pid, ref)
    end
  end

  @doc """
  Equivalent to `stream/2`, see `stream/2` for more details.
  The difference is in the handling of the errors. The caller can use `try/1`,
  whereas with `stream/3` trapping exits and handling messages was necessary.
  May raise `SweetXml.XmerlFatal` or `SweetXml.DTDError`.
  """
  def stream!(doc, options_callback) when is_binary(doc) do
    stream!([doc], options_callback)
  end
  def stream!([c | _] = doc, options_callback) when is_integer(c) do
    stream([IO.iodata_to_binary(doc)], options_callback)
  end
  def stream!(doc, options_callback) do
    Stream.resource fn ->
      {parent, ref} = waiter = {self(), make_ref()}
      opts = options_callback.(fn e -> send(parent, {:event, ref, e}) end)

      {opts, do_after} = SweetXml.Options.set_up(opts, SweetXml.DTDError)

      {pid, monref} = spawn_monitor(fn -> :xmerl_scan.string('', opts ++ continuation_opts(doc, waiter)) end)
      {ref, pid, monref, do_after}
    end, fn {ref, pid, monref, do_after} = acc ->
      receive do
        {:DOWN, ^monref, :process, ^pid, :normal} ->
          {:halt, {:parse_ended, do_after}}
        {:DOWN, ^monref, :process, ^pid, {:fatal, error}} ->
          {:halt, {:fatal, error, do_after}}
        {:DOWN, ^monref, :process, ^pid, error} ->
          {:halt, {:error, error, do_after}}
        {:event, ^ref, event} ->
          {[event], acc}
        {:wait, ^ref} ->
          send(pid, {:continue, ref})
          {[], acc}
      end
    end, fn
      {:parse_ended, do_after} ->
        _ = SweetXml.Options.clean_up(do_after)
        :ok

      {:fatal, error, do_after} ->
        _ = SweetXml.Options.clean_up(do_after)
        raise SweetXml.XmerlFatal, error

      {:error, {exception, stacktrace}, do_after} ->
        _ = SweetXml.Options.clean_up(do_after)
        reraise(exception, stacktrace)

      {ref, pid, monref, do_after} ->
        _ = Process.demonitor(monref)
        _ = SweetXml.Options.clean_up(do_after)
        flush_halt(pid, ref)
    end
  end

  @doc ~S"""
  `xpath` allows you to query an XML document with XPath.

  The second argument to xpath is a `%SweetXpath{}` struct. The optional third
  argument is a keyword list, such that the value of each keyword is also
  either a `%SweetXpath{}` or a list with head being a `%SweetXpath{}` and tail being
  another keyword list exactly like before. Please see the examples below for better
  understanding.

  ## Examples

  Simple:

      iex> import SweetXml
      iex> doc = "<h1><a>Some linked title</a></h1>"
      iex> doc |> xpath(~x"//a/text()")
      'Some linked title'

  With optional mapping:

      iex> import SweetXml
      iex> doc = "<body><header><p>Message</p><ul><li>One</li><li><a>Two</a></li></ul></header></body>"
      iex> doc |> xpath(~x"//header", message: ~x"./p/text()", a_in_li: ~x".//li/a/text()"l)
      %{a_in_li: ['Two'], message: 'Message'}

  With optional mapping and nesting:

      iex> import SweetXml
      iex> doc = "<body><header><p>Message</p><ul><li>One</li><li><a>Two</a></li></ul></header></body>"
      iex> doc
      ...> |> xpath(
      ...>      ~x"//header",
      ...>      ul: [
      ...>        ~x"./ul",
      ...>        a: ~x"./li/a/text()"
      ...>      ]
      ...>    )
      %{ul: %{a: 'Two'}}

  ## Security

  Whenever you are working with some xml that was not generated by your system,
  it is highly recommended that you restrain some functionalities of XML
  during the parsing. SweetXml allows in particular to prevent DTD parsing and fetching.
  Unless you know exactly what kind of DTD you want to permit in your xml,
  it is recommended that you use the following code example to prevent possible attacks:
  ```
  doc
  |> parse(dtd: :none)
  |> xpath(spec, subspec)
  ```
  For more details, see `parse/2`.
  """
  @spec xpath(parent :: (doc | xmlElement), spec, subspec) :: any
  when subspec: keyword(spec | subspec)
  def xpath(parent, spec, subspec \\ [])

  def xpath(parent, spec, []) when not is_tuple(parent) do
    parent |> parse |> xpath(spec)
  end

  def xpath(parent, %SweetXpath{is_list: true, is_value: true, cast_to: cast, is_optional: is_opt?} = spec, []) do
    get_current_entities(parent, spec) |> Enum.map(&(_value(&1)) |> to_cast(cast,is_opt?)) |> spec.transform_fun.()
  end

  def xpath(parent, %SweetXpath{is_list: true, is_value: false} = spec, []) do
    get_current_entities(parent, spec) |> spec.transform_fun.()
  end

  def xpath(parent, %SweetXpath{is_list: false, is_value: true, cast_to: string_type, is_optional: is_opt?} = spec, []) when string_type in [:string,:soft_string] do
    spec = %SweetXpath{spec | is_list: true}
    get_current_entities(parent, spec)
    |> Enum.map(&(_value(&1) |> to_cast(string_type, is_opt?)))
    |> Enum.join
    |> spec.transform_fun.()
  end

  def xpath(parent, %SweetXpath{is_list: false, is_value: true, cast_to: cast, is_optional: is_opt?} = spec, []) do
    get_current_entities(parent, spec) |> _value |> to_cast(cast, is_opt?) |> spec.transform_fun.()
  end

  def xpath(parent, %SweetXpath{is_list: false, is_value: false} = spec, []) do
    get_current_entities(parent, spec) |> spec.transform_fun.()
  end

  def xpath(parent, sweet_xpath, subspec) do
    if sweet_xpath.is_list do
      current_entities = xpath(parent, sweet_xpath)
      Enum.map(current_entities, fn (entity) -> xmap(entity, subspec, sweet_xpath) end)
    else
      current_entity = xpath(parent, sweet_xpath)
      xmap(current_entity, subspec, sweet_xpath)
    end
  end

  @doc ~S"""
  `xmap` returns a mapping with each value being the result of `xpath`.

  Just as `xpath`, you can nest the mapping structure. Please see `xpath/3` for
  more detail.

  You can give the option `true` to get the result as a keyword list instead of a map.

  ## Examples

  Simple:

      iex> import SweetXml
      iex> doc = "<h1><a>Some linked title</a></h1>"
      iex> doc |> xmap(a: ~x"//a/text()")
      %{a: 'Some linked title'}

  With optional mapping:

      iex> import SweetXml
      iex> doc = "<body><header><p>Message</p><ul><li>One</li><li><a>Two</a></li></ul></header></body>"
      iex> doc |> xmap(message: ~x"//p/text()", a_in_li: ~x".//li/a/text()"l)
      %{a_in_li: ['Two'], message: 'Message'}

  With optional mapping and nesting:

      iex> import SweetXml
      iex> doc = "<body><header><p>Message</p><ul><li>One</li><li><a>Two</a></li></ul></header></body>"
      iex> doc
      ...> |> xmap(
      ...>      message: ~x"//p/text()",
      ...>      ul: [
      ...>        ~x"//ul",
      ...>        a: ~x"./li/a/text()"
      ...>      ]
      ...>    )
      %{message: 'Message', ul: %{a: 'Two'}}
      iex> doc
      ...> |> xmap(
      ...>      message: ~x"//p/text()",
      ...>      ul: [
      ...>        ~x"//ul"k,
      ...>        a: ~x"./li/a/text()"
      ...>      ]
      ...>    )
      %{message: 'Message', ul: [a: 'Two']}
      iex> doc
      ...> |> xmap([
      ...>      message: ~x"//p/text()",
      ...>      ul: [
      ...>        ~x"//ul",
      ...>        a: ~x"./li/a/text()"
      ...>      ]
      ...>    ], true)
      [message: 'Message', ul: %{a: 'Two'}]

  ## Security

  Whenever you are working with some xml that was not generated by your system,
  it is highly recommended that you restrain some functionalities of XML
  during the parsing. SweetXml allows in particular to prevent DTD parsing and fetching.
  Unless you know exactly what kind of DTD you want to permit in your xml,
  it is recommended that you use the following code example to prevent possible attacks:
  ```
  doc
  |> parse(dtd: :none)
  |> xmap(specs, options)
  ```
  For more details, see `parse/2`.
  """
  @spec xmap(parent :: (doc | xmlElement), mapping :: specs, options :: (boolean | map)) :: (map | keyword)
  when specs: keyword(spec | specs)
  def xmap(parent, mapping, options \\ false)

  def xmap(nil, _, %{is_optional: true}), do: nil

  def xmap(parent, [], atom) when is_atom(atom), do: xmap(parent, [], %{is_keyword: atom})

  def xmap(_, [], %{is_keyword: false}), do: %{}

  def xmap(_, [], %{is_keyword: true}), do: []

  def xmap(parent, [{label, spec} | tail], is_keyword) when is_list(spec) do
    [sweet_xpath | subspec] = spec
    result = xmap(parent, tail, is_keyword)
    put_in result[label], xpath(parent, sweet_xpath, subspec)
  end

  def xmap(parent, [{label, sweet_xpath} | tail], is_keyword) do
    result = xmap(parent, tail, is_keyword)
    put_in result[label], xpath(parent, sweet_xpath)
  end

  @doc """
  Tags `%SweetXpath{}` with `fun` to be applied at the end of `xpath` query.

  ## Examples

      iex> import SweetXml
      iex> string_to_range = fn str ->
      ...>     [first, last] = str |> String.split("-", trim: true) |> Enum.map(&String.to_integer/1)
      ...>     first..last
      ...>   end
      iex> doc = "<weather><zone><name>north</name><wind-speed>5-15</wind-speed></zone></weather>"
      iex> doc
      ...> |> xpath(
      ...>      ~x"//weather/zone"l,
      ...>      name: ~x"//name/text()"s |> transform_by(&String.capitalize/1),
      ...>      wind_speed: ~x"./wind-speed/text()"s |> transform_by(string_to_range)
      ...>    )
      [%{name: "North", wind_speed: 5..15}]
  """
  def transform_by(%SweetXpath{}=sweet_xpath, fun) when is_function(fun) do
    %{sweet_xpath | transform_fun: fun}
  end

  defp _value(entity) do
    cond do
      is_record? entity, :xmlText ->
        xmlText(entity, :value)
      is_record? entity, :xmlComment ->
        xmlComment(entity, :value)
      is_record? entity, :xmlPI ->
        xmlPI(entity, :value)
      is_record? entity, :xmlAttribute ->
        xmlAttribute(entity, :value)
      is_record? entity, :xmlObj ->
        xmlObj(entity, :value)
      true ->
        entity
    end
  end

  defp is_record?(data, kind) do
    is_tuple(data) and tuple_size(data) > 0 and :erlang.element(1, data) == kind
  end

  defp continuation_opts(enum, waiter \\ nil) do
    [{
       :continuation_fun,
       fn xcont, xexc, xstate ->
         case :xmerl_scan.cont_state(xstate).({:cont, []}) do
	   {:halted, _acc} ->
	     xexc.(xstate)
           {:suspended, bin, cont}->
             case waiter do
               nil -> :ok
               {parent, ref} ->
                 send(parent, {:wait, ref}) # continuation behaviour, pause and wait stream decision
                 receive do
                   {:continue, ^ref} -> # stream continuation fun has been called: parse to find more elements
                     :ok
                   {:halt, ^ref} -> # stream halted: halt the underlying stream and exit parsing process
                     cont.({:halt, []})
                     exit(:normal)
                 end
             end
             xcont.(bin, :xmerl_scan.cont_state(cont, xstate))
           {:done, _} -> xexc.(xstate)
         end
       end,
       &Enumerable.reduce(split_by_whitespace(enum), &1, fn bin, _ -> {:suspend, bin} end)
     },
     {
       :close_fun,
       fn xstate -> # make sure the XML end halts the binary stream (if more bytes are available after XML)
         :xmerl_scan.cont_state(xstate).({:halt,[]})
         xstate
       end
     }]
  end

  defp split_by_whitespace(enum) do
    reducer = fn
      :last, prev ->
        {[:erlang.binary_to_list(prev)], :done}
      bin, prev ->
        bin = if (prev === ""), do: bin, else: IO.iodata_to_binary([prev, bin])
        case split_last_whitespace(bin) do
          :white_bin -> {[], bin}
          {head, tail} -> {[:erlang.binary_to_list(head)], tail}
        end
    end

    Stream.concat(enum, [:last]) |> Stream.transform("", reducer)
  end

  defp split_last_whitespace(bin), do: split_last_whitespace(byte_size(bin) - 1, bin)
  defp split_last_whitespace(0, _), do: :white_bin
  defp split_last_whitespace(size, bin) do
    case bin do
      <<_::binary - size(size), h>> <> tail when h == ?\s or h == ?\n or h == ?\r or h == ?\t ->
        {head, _} = :erlang.split_binary(bin, size + 1)
        {head, tail}
      _ ->
        split_last_whitespace(size - 1, bin)
    end
  end

  defp flush_halt(pid, ref) do
    receive do
      {:event, ^ref, _} ->
        flush_halt(pid, ref) # flush all emitted elems after :halt
      {:wait, ^ref} ->
        send(pid, {:halt, ref}) # tell the continuation function to halt the underlying stream
    end
  end

  defp get_current_entities(parent, %SweetXpath{path: path, is_list: true, namespaces: namespaces}) do
    :xmerl_xpath.string(path, parent, [namespace: namespaces]) |> List.wrap
  end

  defp get_current_entities(parent, %SweetXpath{path: path, is_list: false, namespaces: namespaces}) do
    ret = :xmerl_xpath.string(path, parent, [namespace: namespaces])
    if is_record?(ret, :xmlObj) do
      ret
    else
      List.first(ret)
    end
  end

  defp to_cast(value, false, _is_opt?), do: value
  defp to_cast(nil, _, true), do: nil
  defp to_cast(value, :string, _is_opt?), do: to_string(value)
  defp to_cast(value, :integer, _is_opt?), do: String.to_integer(to_string(value))
  defp to_cast(value, :float, _is_opt?) do
   {float,_} = Float.parse(to_string(value))
   float
  end
  defp to_cast(value, :soft_string, is_opt?) do
    if String.Chars.impl_for(value) do
      to_string(value)
    else
      if is_opt?, do: nil, else: ""
    end
  end
  defp to_cast(value, :soft_integer, is_opt?) do
    if String.Chars.impl_for(value) do
      case Integer.parse(to_string(value)) do
        :error-> if is_opt?, do: nil, else: 0
        {int,_}-> int
      end
    else
      if is_opt?, do: nil, else: 0
    end
  end
  defp to_cast(value, :soft_float, is_opt?) do
    if String.Chars.impl_for(value) do
      case Float.parse(to_string(value)) do
        :error-> if is_opt?, do: nil, else: 0.0
        {float,_}->float
      end
    else
      if is_opt?, do: nil, else: 0.0
    end
  end
end