lib/xml_schema.ex

defmodule XmlSchema do
  @moduledoc """
  Adds XML parsing and generation to modules via `use XmlSchema`.

  Define Xml structures with:
  - [xml](#xml/1)
  - [xml_tag](#xml_tag/3)
  - [xml_one](#xml_one/3)
  - [xml_many](#xml/3)

  Options to `use`:
    `tag: <string>` string to use as xml tag name for this element.
     This allows a tag name not derived from the module name.

    `print: <boolean>` will print the macro generated code, e.g:
      `print: Mix.env() in [:test, :dev]`


  # Generated functions

  An XmlSchema will provide the generated functions:
  - parse_xml/1 - accepts an xml string to parse
  - xml_tag/0 - returns the tag used when generating xml from this node
  - xml_tag_list - order list of tags in this schema
  - transform/1 - overridable function that can be used to transform tag names during parsing

  # Example

  ```elixir
  defmodule Simple do
    use XmlSchema, xml_name: "a"
    xml do
      xml_tag :x, :string
      xml_tag :y, :boolean
      xml_one :z, Z do
        xml_tag :a, :string
        xml_tag :b, :string
      end
      xml_many :j, J do
        xml_tag :q, :string
      end
      xml_tag :g, {:array, :string}
    end
  end
  ```

  Will parse

  ```xml
  <?xml encoding="utf-8" ?>
  <a someattr="blue" otherattr="red">
    <x>hill</x>
    <y>false</y>
    <z>
      <a>tree</a>
      <b>bush</b>
    </z>
    <j>
      <q>cat</q>
    </j>
    <j>
      <q>dog</q>
    </j>
    <g>hippo</g>
    <g>elephant</g>
    <g>rhino</g>
  </a>
  ```

  Into:

  ```elixir
  %Simple{
    _attributes: %{"otherattr" => "red", "someattr" => "blue"},
    x: "hill",
    y: false,
    z: %Simple.Z{_attributes: nil, a: "tree", b: "bush"},
    j: [
      %Simple.J{_attributes: nil, q: "cat"},
      %Simple.J{_attributes: nil, q: "dog"}
    ],
    g: ["hippo", "elephant", "rhino"]
  }
  ```

  # Attributes

  Parsed attibutes are stored in a map, this precludes multiple same named
  attribute in a single tag. It also does not preserve attribute ordering.

  Terminal nodes defined with `xml_tag` provide bare values. If you wish to
  capture attributes the type argument takes the form of `{:param, type}` which
  will result in not bare values but a tuple of `{value, %{attribute map}}`.


  An example:
  ```elixir
  #{File.read!("test/support/example/attribute.ex")}
  ```


  For more examples, see test/support.
  """

  @doc """
  Start definition of xml document structure.
  """
  defmacro xml(block) do
    quote do
      @primary_key false

      embedded_schema do
        field :_attributes, :map
        unquote(block)
      end
    end
  end

  @doc """
  Declare an XML tag with a scalar value with type conversion.
  Any type accepted by Ecto.Schema can be used. Custom types
  can be defined in the same manner. An xml_tag is terminal, for
  tags containing substructure use xml_one or xml_many.
  See attributes section above for details on how to capture
  tag attributes.

  ```
  #{File.read!("test/support/example/tag.ex")}
  ```

  """

  defmacro xml_tag(name, type, opts \\ []) do
    Module.put_attribute(__CALLER__.module, :tag_order, name)

    quote do
      field unquote(name), unquote(type), unquote(opts)
    end
  end

  @doc """
  Declare a tag that occurs once with XML tag `name` as either
  a block or a declared module. Modules generated by blocks automatically
  `use XmlSchema`. nil value is used if a tag is not present.

  Block form example:

  ```
  #{File.read!("test/support/example/one_block.ex")}
  ```

  In this example (from test/support/example/one.ex)
  `doc` is an xml sample that will result in the `expect` struct.

  Defined module form:
  ```
  #{File.read!("test/support/example/one.ex")}
  ```

  Depends on the definition of module `Example.Tag`.
  """

  defmacro xml_one(name, schema, opts \\ [])

  defmacro xml_one(name, schema, do: block) do
    Module.put_attribute(__CALLER__.module, :tag_order, name)

    block =
      quote do
        use XmlSchema

        xml do
          unquote(block)
        end
      end

    new_name = Macro.expand(schema, __CALLER__)
    module = Module.concat(__CALLER__.module, new_name)
    Module.create(module, block, __CALLER__)

    quote do
      embeds_one(unquote(name), unquote(module), [])
    end
  end

  defmacro xml_one(name, schema, opts) do
    Module.put_attribute(__CALLER__.module, :tag_order, name)

    quote do
      embeds_one(unquote(name), unquote(schema), unquote(opts))
    end
  end

  @doc """
  Similar to [`xml_one/3`](#xml_one/3) except the tag can occur 0 or
  more times. Values will be presented in a list. Unlike `xml_one` if
  the tag is not present an empty list will result.

  In this example (from test/support/example/many.ex),
  `doc` is an xml sample that will result in the `expect` struct.

  ```
  #{File.read!("test/support/example/many.ex")}
  ```

  """

  defmacro xml_many(name, schema, opts \\ [])

  defmacro xml_many(name, schema, do: block) do
    Module.put_attribute(__CALLER__.module, :tag_order, name)

    block =
      quote do
        use XmlSchema

        xml do
          unquote(block)
        end
      end

    new_name = Macro.expand(schema, __CALLER__)
    module = Module.concat(__CALLER__.module, new_name)
    Module.create(module, block, __CALLER__)

    quote do
      embeds_many(unquote(name), unquote(module), [])
    end
  end

  defmacro xml_many(name, schema, opts) do
    Module.put_attribute(__CALLER__.module, :tag_order, name)

    quote do
      embeds_many(unquote(name), unquote(schema), unquote(opts))
    end
  end

  defmodule ParseError do
    @moduledoc "Raised when type conversion from xml string fails"
    defexception message: "probem parsing xml into schema"
  end

  @doc """
  Parse an xml string based on the structure of `module`, which must be an
  XmlSchema. This function is added to any module that uses XmlSchema and
  is simplest to call the function there instead of this one.
  """
  def parse_xml(xml_string, module) do
    :erlsom.simple_form(xml_string)
    |> case do
      {:ok, sxml, _extra} ->
        unpack_xml(sxml, module)

      other ->
        other
    end
  end

  defp unpack_xml({_tag, attr, children}, module) do
    Enum.reduce(children, module.__struct__(), &unpack_tag(&1, module, &2))
    |> add_attrs(attr)
  end

  defp add_attrs(ms, []), do: ms

  defp add_attrs(ms, attr) do
    %{
      ms
      | :_attributes => attr_translate(attr)
    }
  end

  defp attr_translate(attr) do
    Map.new(attr, fn {k, v} -> {to_string(k), to_string(v)} end)
  end

  @nsre Regex.compile!("^({(?<ns>[^}]+)})?(?<tag>.*)")

  defp ns_tag(tag, attr) do
    with [?{ | _] <- tag,
         str_tag <- to_string(tag),
         %{"ns" => ns, "tag" => bare_tag} <- Regex.named_captures(@nsre, str_tag) do
      {bare_tag, [{~c"_ns", ns} | attr]}
    else
      _unscoped ->
        {to_string(tag), attr}
    end
  end

  defp unpack_tag({tag, attr_in, child}, module, acc) do
    {str_field, attr} = ns_tag(tag, attr_in)

    module.get_tag(str_field)
    |> case do
      nil ->
        try_transform({str_field, attr, child}, module, acc)

      field when is_atom(field) ->
        unpack_tag_aux(field, {tag, attr, child}, module, acc)
    end
  end

  defp unpack_tag_aux(field, {tag, attr, child}, module, acc) do
    type = module.__schema__(:type, field)

    case type do
      nil ->
        acc

      {:array, subtype} ->
        update_array(acc, field, charlist_to_type(child, subtype, field, attr))

      {:parameterized, Ecto.Embedded, params} ->
        embed(acc, field, {tag, attr, child}, params)
      
      {:parameterized, {Ecto.Embedded, params}} ->
        embed(acc, field, {tag, attr, child}, params)

      subtype ->
        Map.put(acc, field, charlist_to_type(child, subtype, field, attr))
    end
  end

  defp subtype_attr({value, nil}, attr) do
    {value, attr_translate(attr)}
  end

  defp subtype_attr({value, %{} = attr_map}, attr) do
    {value, Map.merge(attr_map, attr_translate(attr))}
  end

  defp subtype_attr(st, _attr), do: st

  defp try_transform({tag, attr, child}, module, acc) do
    tag
    |> module.transform()
    |> module.get_tag()
    |> case do
      nil ->
        acc

      ^tag ->
        acc

      new_tag ->
        unpack_tag_aux(new_tag, {new_tag, [{~c"_tag", tag} | attr], child}, module, acc)
    end
  end

  defp embed(base, field, val, %{cardinality: :one} = params) do
    Map.put(base, field, unpack_xml(val, params.related))
  end

  defp embed(base, field, val, %{cardinality: :many} = params) do
    cur = Map.get(base, field, [])
    cur = cur ++ [unpack_xml(val, params.related)]
    Map.put(base, field, cur)
  end

  defp update_array(base, field, val) do
    Map.get(base, field)
    |> case do
      nil ->
        Map.put(base, field, [val])

      list ->
        Map.put(base, field, list ++ [val])
    end
  end

  defp charlist_to_type([], _type, _field, _attr), do: nil

  defp charlist_to_type([cl], type, _field, attr) when is_list(cl) do
    # IO.puts( "charlist_to_type: val=#{inspect(cl)} type=#{type}")

    cond do
      is_atom(type) and
        Code.ensure_compiled(type) == {:module, type} and
          function_exported?(type, :type, 0) ->
        type.cast(cl)

      true ->
        case type do
          :atom ->
            {:ok, List.to_atom(cl)}

          :string ->
            {:ok, List.to_string(cl)}

          :boolean ->
            Ecto.Type.cast(:boolean, List.to_string(cl) |> String.downcase())

          {:param, subtype} ->
            {:ok, {charlist_to_type([cl], subtype, nil, attr), nil}}

          prim ->
            Ecto.Type.cast(prim, List.to_string(cl))
        end
        |> case do
          {:ok, val} -> val
          other -> other
        end
    end
    |> subtype_attr(attr)
  end

  defp charlist_to_type(other, type, field, _attr) do
    raise ParseError,
      message: "tag #{field} to #{inspect(type)} is not a string input: #{inspect(other)}"
  end

  defp generate_custom(maybe_custom, fld_c, list_comp_val, acc) when is_list(list_comp_val) do
    Enum.reduce(list_comp_val, acc, fn sval, acc2 ->
      generate_custom(maybe_custom, fld_c, sval, acc2)
    end)
  end

  defp generate_custom(maybe_custom, fld_c, comp_val, acc) do
    {val, attr} =
      case comp_val do
        {v1, a1} when is_map(a1) -> {v1, Map.to_list(a1)}
        v1 -> {v1, []}
      end

    cond do
      Ecto.Type.base?(maybe_custom) ->
        [{fld_c, attr, val} | acc]

      is_atom(maybe_custom) and
        Code.ensure_compiled(maybe_custom) == {:module, maybe_custom} and
          function_exported?(maybe_custom, :dump, 1) ->
        [{fld_c, attr, maybe_custom.dump(val)} | acc]

      true ->
        acc
    end
  end

  defp generate_value_array({:array, ptype} = type, fld_c, [h | t], acc) do
    generate_value_array(type, fld_c, t, generate_value(ptype, fld_c, h, acc))
  end

  defp generate_value_array(_type, _fld_c, [], acc), do: acc

  defp generate_value(type, fld_c, val, acc) do
    case type do
      {:parameterized, Ecto.Embedded, %{cardinality: :one}} ->
        [generate_child(val, fld_c) | acc]

      {:parameterized, Ecto.Embedded, %{cardinality: :many}} ->
        [Enum.map(val, &generate_child(&1, fld_c)) | acc]

      {:parameterized, {Ecto.Embedded, %{cardinality: :one}}} ->
        [generate_child(val, fld_c) | acc]

      {:parameterized, {Ecto.Embedded, %{cardinality: :many}}} ->
        [Enum.map(val, &generate_child(&1, fld_c)) | acc]

      {:array, _ptype} ->
        generate_value_array(type, fld_c, Enum.reverse(val), acc)

      {:param, ptype} ->
        generate_custom(ptype, fld_c, val, acc)

      _maybe_custom ->
        generate_custom(type, fld_c, val, acc)
    end
  end

  defp generate_child(schema, as_tag) do
    attr = (Map.get(schema, :_attributes) || %{}) |> Map.to_list()

    children =
      schema.__struct__.xml_tag_list()
      |> Enum.reverse()
      |> Enum.reduce([], fn
        :_attributes, acc ->
          acc

        fld, acc ->
          fld_c = to_charlist(fld)

          Map.get(schema, fld)
          |> case do
            nil ->
              acc

            [] ->
              acc

            val ->
              schema.__struct__.__schema__(:type, fld)
              |> generate_value(fld_c, val, acc)
          end
      end)

    {as_tag, attr, children}
  end

  @doc """
  Generate xml document from xml schema
  """

  def generate_xml(schema) do
    generate_xml(schema, schema.__struct__.xml_tag())
  end

  @doc "Generate xml document from xml schema with `name` top tag"
  def generate_xml(schema, name) do
    generate_child(schema, name)
    |> XmlBuilder.document()
    |> XmlBuilder.generate(pretty: true)
  end

  defp module_tail_to_string(module) do
    [name | _] = Module.split(module) |> Enum.reverse()
    name
  end

  defp code_generate(xml_name) do
    quote bind_quoted: [xml_name: xml_name] do
      @moduledoc "See XmlSchema for information"
      use Ecto.Schema
      import XmlSchema
      @before_compile XmlSchema.Precompile

      @doc "Parse xml_string using #{xml_name} as starting point"
      def parse_xml(xml_string) do
        XmlSchema.parse_xml(xml_string, __MODULE__)
      end

      @doc "return tag name to use when generating xml"
      def xml_tag, do: unquote(xml_name)

      @doc "list of ordered tags in this schema"
      def xml_tag_list, do: Enum.reverse(@tag_order)

      @doc "Overridable function for transforming input tags"
      def transform(tag), do: tag
      defoverridable transform: 1
    end
  end

  defmacro __using__(opts) do
    xml_name = Keyword.get(opts, :tag, module_tail_to_string(__CALLER__.module))

    Module.register_attribute(__CALLER__.module, :tag_order, accumulate: true)

    code_generate(xml_name)
    |> tap(fn gen ->
      if Keyword.get(opts, :print, false),
        do: IO.puts("XmlSchema generation for #{__CALLER__.module}:\n" <> Macro.to_string(gen))
    end)
  end
end