lib/rdf/xml/encoder.ex

defmodule RDF.XML.Encoder do
  @moduledoc """
  An encoder for RDF/XML serializations of RDF.ex data structures.

  As for all encoders of `RDF.Serialization.Format`s, you normally won't use these
  functions directly, but via one of the `write_` functions on the `RDF.XML` format
  module or the generic `RDF.Serialization` module.


  ## Options

  - `:base`: : Allows to specify the base URI to be used for a `xml:base` declaration.
    If not specified the one from the given graph is used or if there is also none
    specified for the graph the `RDF.default_base_iri/0`.
  - `:prefixes`: Allows to specify the prefixes to be used as a `RDF.PrefixMap` or
    anything from which a `RDF.PrefixMap` can be created with `RDF.PrefixMap.new/1`.
    If not specified the ones from the given graph are used or if these are also not
    present the `RDF.default_prefixes/0`.
  - `:implicit_base`: Allows to specify that the used base URI should not be encoded
    in the generated serialization (default: `false`).
  - `:use_rdf_id`: Allows to determine if `rdf:ID` should be used when possible.
     You can either provide a boolean value or a function which should return a boolean
     value for a given `RDF.Description`. (default: `false`)
  - `:producer`: This option allows you to provide a producer function, which will get
    the input data (usually a `RDF.Graph`) and should produce a stream of the descriptions
    to be encoded. This allows you to control the order of the descriptions, apply filters
    etc.

          iex> RDF.Graph.new([
          ...>   EX.S1 |> EX.p1(EX.O1),
          ...>   EX.S2 |> EX.p2(EX.O2),
          ...> ])
          ...> |> RDF.XML.write_string!(
          ...>     prefixes: [ex: EX],
          ...>     producer: fn graph ->
          ...>       {first, rest} = RDF.Graph.pop(graph, EX.S2)
          ...>       Stream.concat([first], RDF.Graph.descriptions(rest))
          ...>     end)
          ~S(<?xml version="1.0" encoding="utf-8"?><rdf:RDF xmlns:ex="http://example.com/">\
  <rdf:Description rdf:about="http://example.com/S2"><ex:p2 rdf:resource="http://example.com/O2"/></rdf:Description>\
  <rdf:Description rdf:about="http://example.com/S1"><ex:p1 rdf:resource="http://example.com/O1"/></rdf:Description>\
  </rdf:RDF>)

  """

  use RDF.Serialization.Encoder

  alias RDF.{Description, Graph, Dataset, IRI, BlankNode, Literal, LangString, XSD, PrefixMap}
  import RDF.Utils
  import Saxy.XML

  @doc """
  Encodes the given RDF `data` structure to a RDF/XML string.

  The result is returned in an `:ok` tuple or an `:error` tuple in case of an error.

  For a description of the available options see the [module documentation](`RDF.XML.Encoder`).
  """
  @impl RDF.Serialization.Encoder
  @spec encode(Graph.t(), keyword) :: {:ok, String.t()} | {:error, any}
  def encode(data, opts \\ []) do
    base = Keyword.get(opts, :base, Keyword.get(opts, :base_iri)) |> base_iri(data)
    prefixes = Keyword.get(opts, :prefixes) |> prefix_map(data)
    use_rdf_id = Keyword.get(opts, :use_rdf_id) || false

    with {:ok, root} <- document(data, base, prefixes, use_rdf_id, opts) do
      {:ok, Saxy.encode!(root, version: "1.0", encoding: :utf8)}
    end
  end

  @doc """
  Encodes the given RDF `data` structure to a RDF/XML stream.

  By default the RDF/XML stream will emit single line strings for each of the
  descriptions in the given `data`. But you can also receive the serialized RDF/XML
  description as IO lists aka iodata by setting the `:mode` option to `:iodata`.

  For a description of the other available options see the [module documentation](`RDF.XML.Encoder`).
  """
  @impl RDF.Serialization.Encoder
  @spec stream(Graph.t(), keyword) :: Enumerable.t()
  def stream(data, opts \\ []) do
    base = Keyword.get(opts, :base, Keyword.get(opts, :base_iri)) |> base_iri(data)
    prefixes = Keyword.get(opts, :prefixes) |> prefix_map(data)
    use_rdf_id = Keyword.get(opts, :use_rdf_id, false)
    stream_mode = Keyword.get(opts, :mode, :string)
    input = input(data, opts)

    {rdf_close, rdf_open} =
      Saxy.encode_to_iodata!(
        {"rdf:RDF", ns_declarations(prefixes, base, implicit_base(opts)), [{:characters, "\n"}]}
      )
      |> List.pop_at(-1)

    {rdf_close, rdf_open} =
      if stream_mode == :string do
        {IO.iodata_to_binary(rdf_close), IO.iodata_to_binary(rdf_open)}
      else
        {rdf_close, rdf_open}
      end

    Stream.concat([
      [~s[<?xml version="1.0" encoding="utf-8"?>\n]],
      [rdf_open],
      description_stream(input, base, prefixes, use_rdf_id, stream_mode),
      [rdf_close]
    ])
  end

  defp input(data, opts) do
    case Keyword.get(opts, :producer) do
      fun when is_function(fun) -> fun.(data)
      nil -> data
    end
  end

  defp implicit_base(opts) do
    Keyword.get(opts, :implicit_base, false)
  end

  defp base_iri(nil, %Graph{base_iri: base}) when not is_nil(base), do: validate_base_iri(base)
  defp base_iri(nil, _), do: RDF.default_base_iri() |> validate_base_iri()
  defp base_iri(base_iri, _), do: base_iri |> IRI.coerce_base() |> validate_base_iri()

  defp validate_base_iri(nil), do: nil

  defp validate_base_iri(base_iri) do
    uri = base_iri |> to_string() |> URI.parse()
    to_string(%{uri | fragment: nil})
  end

  defp prefix_map(nil, %Graph{prefixes: prefixes}) when not is_nil(prefixes), do: prefixes

  defp prefix_map(nil, %Dataset{} = dataset) do
    prefixes = Dataset.prefixes(dataset)

    if Enum.empty?(prefixes) do
      RDF.default_prefixes()
    else
      prefixes
    end
  end

  defp prefix_map(nil, _), do: RDF.default_prefixes()
  defp prefix_map(prefixes, _), do: PrefixMap.new(prefixes)

  defp ns_declarations(prefixes, nil, _) do
    Enum.map(prefixes, fn
      {nil, namespace} -> {"xmlns", to_string(namespace)}
      {prefix, namespace} -> {"xmlns:#{prefix}", to_string(namespace)}
    end)
  end

  defp ns_declarations(prefixes, _, true) do
    ns_declarations(prefixes, nil, true)
  end

  defp ns_declarations(prefixes, base, implicit_base) do
    [{"xml:base", to_string(base)} | ns_declarations(prefixes, nil, implicit_base)]
  end

  defp document(graph, base, prefixes, use_rdf_id, opts) do
    with {:ok, descriptions} <-
           graph
           |> input(opts)
           |> descriptions(base, prefixes, use_rdf_id) do
      {:ok,
       element(
         "rdf:RDF",
         ns_declarations(prefixes, base, implicit_base(opts)),
         descriptions
       )}
    end
  end

  defp descriptions(%Graph{} = graph, base, prefixes, use_rdf_id) do
    graph
    |> Graph.descriptions()
    |> descriptions(base, prefixes, use_rdf_id)
  end

  defp descriptions(input, base, prefixes, use_rdf_id) do
    map_while_ok(input, &description(&1, base, prefixes, use_rdf_id))
  end

  defp description_stream(%Graph{} = graph, base, prefixes, use_rdf_id, stream_mode) do
    graph
    |> Graph.descriptions()
    |> description_stream(base, prefixes, use_rdf_id, stream_mode)
  end

  @dialyzer {:nowarn_function, description_stream: 5}
  defp description_stream(input, base, prefixes, use_rdf_id, stream_mode) do
    Stream.map(input, fn description ->
      case description(description, base, prefixes, use_rdf_id) do
        {:ok, simple_form} when stream_mode == :string ->
          Saxy.encode!(simple_form) <> "\n"

        {:ok, simple_form} when stream_mode == :iodata ->
          [Saxy.encode_to_iodata!(simple_form) | "\n"]

        {:error, error} ->
          raise error
      end
    end)
  end

  defp description(%Description{} = description, base, prefixes, use_rdf_id) do
    {type_node, stripped_description} = type_node(description, prefixes)

    with {:ok, predications} <- predications(stripped_description, base, prefixes) do
      {:ok,
       element(
         type_node || "rdf:Description",
         [description_id(description.subject, base, use_rdf_id, description)],
         predications
       )}
    end
  end

  defp type_node(description, prefixes) do
    description
    |> Description.get(RDF.type())
    |> List.wrap()
    |> Enum.find_value(fn object ->
      if qname = qname(object, prefixes) do
        {qname, object}
      end
    end)
    |> case do
      nil -> {nil, description}
      {qname, type} -> {qname, Description.delete(description, {RDF.type(), type})}
    end
  end

  defp description_id(%BlankNode{value: bnode}, _base, _, _) do
    {"rdf:nodeID", bnode}
  end

  defp description_id(%IRI{} = iri, base, fun, description) when is_function(fun) do
    description_id(iri, base, fun.(description), description)
  end

  defp description_id(%IRI{} = iri, base, true, _) do
    case attr_val_uri(iri, base) do
      "#" <> value -> {"rdf:ID", value}
      value -> {"rdf:about", value}
    end
  end

  defp description_id(%IRI{} = iri, base, false, _) do
    {"rdf:about", attr_val_uri(iri, base)}
  end

  defp predications(description, base, prefixes) do
    flat_map_while_ok(description.predications, fn {predicate, objects} ->
      predications_for_property(predicate, objects, base, prefixes)
    end)
  end

  defp predications_for_property(property, objects, base, prefixes) do
    if property_name = qname(property, prefixes) do
      {:ok,
       objects
       |> Map.keys()
       |> Enum.map(&statement(property_name, &1, base, prefixes))}
    else
      {:error,
       %RDF.XML.EncodeError{message: "no namespace declaration for property #{property} found"}}
    end
  end

  defp statement(property_name, %IRI{} = iri, base, _) do
    element(property_name, [{"rdf:resource", attr_val_uri(iri, base)}], [])
  end

  defp statement(property_name, %BlankNode{value: value}, _base, _) do
    element(property_name, [{"rdf:nodeID", value}], [])
  end

  @xml_literal IRI.to_string(RDF.XMLLiteral)

  # TODO: This dialyzer exception rule can be removed when this got merged: https://github.com/qcam/saxy/pull/82
  @dialyzer {:nowarn_function, statement: 4}
  defp statement(property_name, %Literal{literal: %{datatype: @xml_literal}} = literal, _, _) do
    element(
      property_name,
      [{"rdf:parseType", "Literal"}],
      Literal.lexical(literal)
    )
  end

  defp statement(property_name, %Literal{} = literal, base, _) do
    element(
      property_name,
      literal_attributes(literal, base),
      [{:characters, Literal.lexical(literal)}]
    )
  end

  defp literal_attributes(%Literal{literal: %LangString{language: language}}, _),
    do: [{"xml:lang", language}]

  defp literal_attributes(%Literal{literal: %XSD.String{}}, _), do: []

  defp literal_attributes(%Literal{literal: %datatype{}}, base),
    do: [{"rdf:datatype", datatype.id() |> attr_val_uri(base)}]

  defp literal_attributes(_, _), do: []

  defp attr_val_uri(iri, nil), do: iri
  defp attr_val_uri(%IRI{value: uri}, base), do: attr_val_uri(uri, base)

  defp attr_val_uri(iri, base) do
    String.replace_prefix(iri, base, "")
  end

  defp qname(iri, prefixes) do
    case PrefixMap.prefixed_name(prefixes, iri) do
      nil -> nil
      ":" <> name -> name
      name -> name
    end
  end
end