defmodule XmlQuery do
# @related [tests](test/xml_query_test.exs)
@moduledoc """
Some simple XML query functions.
"""
import Record
alias XmlQuery.QueryError
alias XmlQuery.Xmerl
require XmlQuery.Xmerl
@type xml() :: xml_binary() | xml_document() | xml_element() | XmlQuery.Element.t()
@type xml_attribute() :: Xmerl.xml_attribute()
@type xml_binary() :: binary()
@type xml_document() :: Xmerl.xml_document()
@type xml_element() :: Xmerl.xml_element()
@type xml_text() :: Xmerl.xml_text()
@type xpath() :: binary() | charlist()
@module_name __MODULE__ |> Module.split() |> Enum.join(".")
defguard is_xml_struct(struct)
when is_struct(struct) and
struct.__struct__ in [XmlQuery.Attribute, XmlQuery.Element, XmlQuery.Text]
@doc """
Finds all elements in an XML document that match `xpath`, returning a list of records.
Depending on the given xpath, the type of the record may be different.
"""
@spec all(xml(), xpath()) :: [XmlQuery.Element.t()]
def all(xml, xpath) when is_binary(xpath),
do: xml |> all(String.to_charlist(xpath))
def all(xml, xpath) when is_binary(xml) or is_tuple(xml),
do: xml |> parse() |> all(xpath)
def all(xml, xpath) when is_struct(xml),
do: :xmerl_xpath.string(xpath, xml.shadows) |> Enum.map(&into/1)
@doc """
Returns the value of `attr` from the outermost element of `xml`.
"""
@spec attr(xml(), String.t()) :: XmlQuery.Attribute.t() | nil
def attr(xml, attr) do
case xml
|> parse()
|> first!("Consider using Enum.map(xml, &#{@module_name}.attr(&1, #{inspect(attr)}))")
|> find("@#{attr}") do
%XmlQuery.Attribute{value: value} -> to_string(value)
nil -> nil
end
end
@doc """
Finds the first element, attribute, or element text in `xml` that matches `xpath`.
```elixir
iex> alias XmlQuery, as: Xq
iex> xml = \"""
...> <?xml version="1.0"?>
...> <root><child property="oldest" /><child property="youngest" /></root>
...> \"""
iex> %Xq.Element{name: :child, attributes: [%Xq.Attribute{value: ~c"oldest"}]} = Xq.find(xml, "//child")
```
"""
@spec find(xml(), xpath()) :: XmlQuery.Element.t() | XmlQuery.Attribute.t() | XmlQuery.Text.t() | nil
def find(xml, xpath),
do: xml |> all(xpath) |> List.first()
@doc """
Like `find/2` but raises unless exactly one node is found.
"""
@spec find!(xml(), xpath()) :: XmlQuery.Element.t() | XmlQuery.Attribute.t() | XmlQuery.Text.t()
def find!(xml, xpath),
do: all(xml, xpath) |> first!("XPath: #{xpath}")
@doc """
Parses an XML document using `:xmerl_scan.string/2`, returning an `XmlQuery.Element` struct.
Given an xml tuple that has already been created by `:xmerl`, wraps the tuple in an
`XmlQuery`-specific struct.
```elixir
iex> xml = \"""
...> <?xml version="1.0"?>
...> <root />
...> \"""
iex> %Xq.Element{name: :root} = XmlQuery.parse(xml)
iex> xml = \"""
...> <?xml version="1.0"?>
...> <root property="root-value" />
...> \"""
iex> %Xq.Attribute{name: :property, value: ~c"root-value"} = XmlQuery.find(xml, "//root/@property") |> XmlQuery.parse()
```
"""
@spec parse(xml()) :: XmlQuery.Element.t() | XmlQuery.Attribute.t() | XmlQuery.Text.t()
def parse(node) when is_xml_struct(node),
do: node
def parse([node | _] = list) when is_xml_struct(node),
do: list
def parse(xml) when is_tuple(xml),
do: xml |> into()
def parse(xml) when is_binary(xml) do
{doc, []} =
xml
|> String.to_charlist()
|> :xmerl_scan.string(acc_fun: &accumulate_xml/3, quiet: true, space: :normalize, xmlbase: ~c"/")
into(doc)
end
@doc """
Returns `xml` as a prettified string.
Elements and text nodes are sorted and indented relative to parent elements.
"""
@spec pretty(xml()) :: binary()
def pretty(node)
when is_struct(node, XmlQuery.Element) or is_struct(node, XmlQuery.Attribute) or is_struct(node, XmlQuery.Text),
do: node.__struct__.pretty(node)
def pretty(xml) when is_binary(xml) or is_tuple(xml),
do: xml |> parse() |> pretty()
@doc """
Returns the text value of `xml`.
"""
@spec text(xml()) :: binary()
def text(xml) do
case xml
|> parse()
|> first!("Consider using Enum.map(xml, &#{@module_name}.text/1)") do
%XmlQuery.Element{shadows: doc} ->
:xmerl_xpath.string(~c"//text()", doc)
|> Enum.reduce("", fn node, acc ->
case XmlQuery.Text.to_string(node) do
"" -> acc
text -> String.trim(acc <> " " <> text)
end
end)
end
end
# # #
@doc false
def into(nil), do: nil
def into(attribute) when is_record(attribute, :xmlAttribute),
do: XmlQuery.Attribute.new(attribute)
def into(element) when is_record(element, :xmlElement),
do: XmlQuery.Element.new(element)
def into(text) when is_record(text, :xmlText),
do: XmlQuery.Text.new(text)
# # #
defp accumulate_xml({:xmlText, _, _, _, ~c" ", _} = text, acc, str) do
{acc, XmlQuery.Xmerl.xmlText(text, :pos), str}
end
defp accumulate_xml(node, acc, str),
do: {[node | acc], str}
defp first!([], hint) do
raise(QueryError, """
Expected a single XML element but found none.
#{hint}
""")
end
defp first!([element], _hint),
do: element
defp first!(node, _hint) when is_xml_struct(node),
do: node
defp first!(_xml, hint) do
raise QueryError, """
Expected a single XML node but found multiple:
#{hint}
"""
end
end