lib/rdf/model/dataset.ex

Select File:
lib/rdf/model/dataset.ex

defmodule RDF.Dataset do
  @moduledoc """
  A set of `RDF.Graph`s.

  It may have multiple named graphs and at most one unnamed ("default") graph.

  `RDF.Dataset` implements:

  - Elixir's `Access` behaviour
  - Elixir's `Enumerable` protocol
  - Elixir's `Collectable` protocol
  - Elixir's `Inspect` protocol
  - the `RDF.Data` protocol

  """

  defstruct name: nil, graphs: %{}

  @behaviour Access

  alias RDF.{Graph, Description, IRI, Statement, Quad, Triple, PrefixMap, PropertyMap}
  import RDF.Statement, only: [coerce_subject: 1, coerce_graph_name: 1]
  import RDF.Utils

  @type graph_name :: IRI.t() | nil

  @type t :: %__MODULE__{
          name: graph_name,
          graphs: %{graph_name => Graph.t()}
        }

  @type input :: Graph.input() | t

  @type update_graph_fun :: (Graph.t() -> {Graph.t()})

  @type get_and_update_graph_fun :: (Graph.t() -> {Graph.t(), input} | :pop)

  @doc """
  Creates an empty unnamed `RDF.Dataset`.
  """
  @spec new :: t
  def new, do: %__MODULE__{}

  @doc """
  Creates an `RDF.Dataset`.

  If a keyword list is given an empty dataset is created.
  Otherwise, an unnamed dataset initialized with the given data is created.

  See `new/2` for available arguments and the different ways to provide data.

  ## Examples

      RDF.Dataset.new(name: EX.GraphName)

      RDF.Dataset.new(init: {EX.S, EX.p, EX.O})

      RDF.Dataset.new({EX.S, EX.p, EX.O})

  """
  @spec new(input | keyword) :: t
  def new(data_or_opts)

  def new(data_or_opts) when is_list(data_or_opts) and length(data_or_opts) != 0 do
    if Keyword.keyword?(data_or_opts) do
      {data, options} = Keyword.pop(data_or_opts, :init)
      new(data, options)
    else
      new(data_or_opts, [])
    end
  end

  def new(data), do: new(data, [])

  @doc """
  Creates an `RDF.Dataset` initialized with data.

  The initial RDF triples can be provided in any form accepted by `add/3`.

  Available options:

  - `name`: the name of the dataset to be created
  - `init`: some data with which the dataset should be initialized; the data can be
    provided in any form accepted by `add/3` and above that also with a function returning
    the initialization data in any of these forms

  """
  @spec new(input, keyword) :: t
  def new(data, opts)

  def new(%__MODULE__{} = graph, opts) do
    %__MODULE__{graph | name: opts |> Keyword.get(:name) |> coerce_graph_name()}
  end

  def new(data, opts) do
    %__MODULE__{}
    |> new(opts)
    |> init(data, opts)
  end

  defp init(dataset, nil, _), do: dataset
  defp init(dataset, fun, opts) when is_function(fun), do: add(dataset, fun.(), opts)
  defp init(dataset, data, opts), do: add(dataset, data, opts)

  @doc """
  Returns the dataset name IRI of `dataset`.
  """
  @spec name(t) :: Statement.graph_name()
  def name(%__MODULE__{} = dataset), do: dataset.name

  @doc """
  Changes the dataset name of `dataset`.
  """
  @spec change_name(t, Statement.coercible_graph_name()) :: t
  def change_name(%__MODULE__{} = dataset, new_name) do
    %__MODULE__{dataset | name: coerce_graph_name(new_name)}
  end

  defp destination_graph(opts, default \\ nil) do
    opts
    |> Keyword.get(:graph, default)
    |> coerce_graph_name()
  end

  @doc """
  Adds triples and quads to a `RDF.Dataset`.

  The triples can be provided in any form accepted by `add/2`.

  - as a single statement tuple
  - an `RDF.Description`
  - an `RDF.Graph`
  - an `RDF.Dataset`
  - or a list with any combination of the former

  The `graph` option allows to set a different destination graph to which the
  statements should be added, ignoring the graph context of given quads or the
  name of given graphs in `input`.

  Note: When the statements to be added are given as another `RDF.Dataset` and
  a destination graph is set with the `graph` option, the descriptions of the
  subjects in the different graphs are aggregated.
  """
  @spec add(t, input, keyword) :: t
  def add(dataset, input, opts \\ [])

  def add(%__MODULE__{} = dataset, {_, _, _, graph} = quad, opts),
    do: do_add(dataset, destination_graph(opts, graph), quad, opts)

  def add(%__MODULE__{} = dataset, %Description{} = description, opts),
    do: do_add(dataset, destination_graph(opts), description, opts)

  def add(%__MODULE__{} = dataset, %Graph{} = graph, opts),
    do: do_add(dataset, destination_graph(opts, graph.name), graph, opts)

  def add(%__MODULE__{} = dataset, %__MODULE__{} = other_dataset, opts) do
    other_dataset
    |> graphs()
    |> Enum.reduce(dataset, &add(&2, &1, opts))
  end

  def add(dataset, input, opts)
      when is_list(input) or (is_map(input) and not is_struct(input)) do
    Enum.reduce(input, dataset, &add(&2, &1, opts))
  end

  def add(%__MODULE__{} = dataset, input, opts),
    do: do_add(dataset, destination_graph(opts), input, opts)

  defp do_add(dataset, graph_name, input, opts) do
    %__MODULE__{
      dataset
      | graphs:
          lazy_map_update(
            dataset.graphs,
            graph_name,
            # when new:
            fn -> Graph.new(input, Keyword.put(opts, :name, graph_name)) end,
            # when update:
            fn graph -> Graph.add(graph, input, opts) end
          )
    }
  end

  @doc """
  Adds statements to a `RDF.Dataset` overwriting existing statements with the subjects given in the `input` data.

  By overwriting statements with the same subject, this function has a similar
  semantics as `RDF.Graph.put/3`, if you want to replace whole graphs use
  `put_graph/3` instead.

  The `graph` option allows to set a different destination graph to which the
  statements should be added, ignoring the graph context of given quads or the
  name of given graphs in `input`.

  Note: When the statements to be added are given as another `RDF.Dataset` and
  a destination graph is set with the `graph` option, the descriptions of the
  subjects in the different graphs are aggregated.

  ## Examples

      iex> dataset = RDF.Dataset.new({EX.S, EX.P1, EX.O1})
      ...> RDF.Dataset.put(dataset, {EX.S, EX.P2, EX.O2})
      RDF.Dataset.new({EX.S, EX.P2, EX.O2})
      iex> RDF.Dataset.put(dataset, {EX.S2, EX.P2, EX.O2})
      RDF.Dataset.new([{EX.S, EX.P1, EX.O1}, {EX.S2, EX.P2, EX.O2}])
  """
  @spec put(t, input, keyword) :: t
  def put(dataset, input, opts \\ [])

  def put(%__MODULE__{} = dataset, %__MODULE__{} = input, opts) do
    input =
      if destination_graph = Keyword.get(opts, :graph) do
        input
        |> Graph.new(name: destination_graph)
        |> new()
      else
        input
      end

    %__MODULE__{
      dataset
      | graphs:
          Enum.reduce(
            input.graphs,
            dataset.graphs,
            fn {graph_name, graph}, graphs ->
              Map.update(
                graphs,
                graph_name,
                graph,
                fn current -> Graph.put(current, graph, opts) end
              )
            end
          )
    }
  end

  def put(%__MODULE__{} = dataset, input, opts) do
    put(dataset, new() |> add(input, opts), Keyword.delete(opts, :graph))
  end

  @doc """
  Adds statements to a `RDF.Dataset` and overwrites all existing statements with the same subject-predicate combinations given in the `input` data.

  The `graph` option allows to set a different destination graph to which the
  statements should be added, ignoring the graph context of given quads or the
  name of given graphs in `input`.

  Note: When the statements to be added are given as another `RDF.Dataset` and
  a destination graph is set with the `graph` option, the descriptions of the
  subjects in the different graphs are aggregated.

  ## Examples

      iex> dataset = RDF.Dataset.new({EX.S, EX.P1, EX.O1})
      ...> RDF.Dataset.put_properties(dataset, {EX.S, EX.P1, EX.O2})
      RDF.Dataset.new({EX.S, EX.P1, EX.O2})
      iex> RDF.Dataset.put_properties(dataset, {EX.S, EX.P2, EX.O2})
      RDF.Dataset.new([{EX.S, EX.P1, EX.O1}, {EX.S, EX.P2, EX.O2}])
      iex> RDF.Dataset.new([{EX.S1, EX.P1, EX.O1}, {EX.S2, EX.P2, EX.O2}])
      ...> |> RDF.Dataset.put_properties([{EX.S1, EX.P2, EX.O3}, {EX.S2, EX.P2, EX.O3}])
      RDF.Dataset.new([{EX.S1, EX.P1, EX.O1}, {EX.S1, EX.P2, EX.O3}, {EX.S2, EX.P2, EX.O3}])
  """
  @spec put_properties(t, input, keyword) :: t
  def put_properties(dataset, input, opts \\ [])

  def put_properties(%__MODULE__{} = dataset, %__MODULE__{} = input, opts) do
    %__MODULE__{
      dataset
      | graphs:
          Enum.reduce(
            input.graphs,
            dataset.graphs,
            fn {graph_name, graph}, graphs ->
              Map.update(
                graphs,
                graph_name,
                graph,
                fn current -> Graph.put_properties(current, graph, opts) end
              )
            end
          )
    }
  end

  def put_properties(%__MODULE__{} = dataset, input, opts) do
    put_properties(dataset, new() |> add(input, opts), opts)
  end

  @doc """
  Adds new graphs to a `RDF.Dataset` overwriting any existing graphs with the same name.

  The `graph` option allows to set a different destination graph to which the
  statements should be added, ignoring the graph context of given quads or the
  name of given graphs in `input`.

  Note: When the statements to be added are given as another `RDF.Dataset` and
  a destination graph is set with the `graph` option, the descriptions of the
  subjects in the different graphs are aggregated.

  """
  @spec put_graph(t, input, keyword) :: t
  def put_graph(dataset, input, opts \\ [])

  def put_graph(%__MODULE__{} = dataset, %__MODULE__{} = input, opts) do
    input =
      if destination_graph = Keyword.get(opts, :graph) do
        input
        |> Graph.new(name: destination_graph)
        |> new()
      else
        input
      end

    %__MODULE__{
      dataset
      | graphs:
          Enum.reduce(
            input.graphs,
            dataset.graphs,
            fn {graph_name, graph}, graphs ->
              Map.put(graphs, graph_name, graph)
            end
          )
    }
  end

  def put_graph(%__MODULE__{} = dataset, input, opts) do
    put_graph(dataset, new() |> add(input, opts), Keyword.delete(opts, :graph))
  end

  @doc """
  Updates a graph in `dataset` with the given function.

  If `graph_name` is present in `dataset`, `fun` is invoked with argument `graph`
  and its result is used as the new graph with the given `graph_name`.
  If `graph_name` is not present in `dataset`, `initial` is inserted with the
  given `graph_name`. If no `initial` value is given, the `dataset` remains unchanged.
  If `nil` is returned by `fun`, the respective graph will be removed from `dataset`.

  The initial value and the returned values by the update function will be
  coerced to proper RDF graphs before added. If the initial or returned
  graph is a `RDF.Graph` with another graph name, it will still be added
  using the given `graph_name`.

  ## Examples

      iex> RDF.Dataset.new({EX.S, EX.p, EX.O, EX.Graph})
      ...> |> RDF.Dataset.update(EX.Graph,
      ...>      fn graph -> RDF.Graph.add(graph, {EX.S, EX.p, EX.O2})
      ...>    end)
      RDF.Dataset.new([{EX.S, EX.p, EX.O, EX.Graph}, {EX.S, EX.p, EX.O2, EX.Graph}])

      iex> RDF.Dataset.new()
      ...> |> RDF.Dataset.update(EX.Graph, RDF.Graph.new({EX.S, EX.p, EX.O}),
      ...>      fn graph -> RDF.Graph.add(graph, {EX.S, EX.p, EX.O2})
      ...>    end)
      RDF.Dataset.new([{EX.S, EX.p, EX.O, EX.Graph}])

  """
  @spec update(
          t,
          Statement.graph_name(),
          Graph.input() | nil,
          update_graph_fun
        ) :: t
  def update(%__MODULE__{} = dataset, graph_name, initial \\ nil, fun) do
    graph_name = RDF.coerce_graph_name(graph_name)

    case get(dataset, graph_name) do
      nil ->
        if initial do
          add(dataset, Graph.new(initial, name: graph_name))
        else
          dataset
        end

      graph ->
        graph
        |> fun.()
        |> case do
          nil ->
            delete_graph(dataset, graph_name)

          new_graph ->
            dataset
            |> delete_graph(graph_name)
            |> add(Graph.new(new_graph, name: graph_name))
        end
    end
  end

  @doc """
  Deletes statements from a `RDF.Dataset`.

  The `graph` option allows to set a different destination graph from which the
  statements should be deleted, ignoring the graph context of given quads or the
  name of given graphs.

  Note: When the statements to be deleted are given as another `RDF.Dataset`,
  the dataset name must not match dataset name of the dataset from which the statements
  are deleted. If you want to delete only datasets with matching names, you can
  use `RDF.Data.delete/2`.
  """
  @spec delete(t, input, keyword) :: t
  def delete(dataset, input, opts \\ [])

  def delete(%__MODULE__{} = dataset, {_, _, _, graph} = quad, opts),
    do: do_delete(dataset, destination_graph(opts, graph), quad, opts)

  def delete(%__MODULE__{} = dataset, %Description{} = description, opts),
    do: do_delete(dataset, destination_graph(opts), description, opts)

  def delete(%__MODULE__{} = dataset, %Graph{} = graph, opts),
    do: do_delete(dataset, destination_graph(opts, graph.name), graph, opts)

  def delete(%__MODULE__{} = dataset, %__MODULE__{} = other_dataset, opts) do
    other_dataset
    |> graphs()
    |> Enum.reduce(dataset, &delete(&2, &1, opts))
  end

  def delete(dataset, input, opts)
      when is_list(input) or (is_map(input) and not is_struct(input)) do
    Enum.reduce(input, dataset, &delete(&2, &1, opts))
  end

  def delete(%__MODULE__{} = dataset, input, opts) when not is_struct(input),
    do: do_delete(dataset, destination_graph(opts), input, opts)

  defp do_delete(dataset, graph_name, input, opts) do
    if existing_graph = dataset.graphs[graph_name] do
      new_graph = Graph.delete(existing_graph, input, opts)

      %__MODULE__{
        dataset
        | graphs:
            if Graph.empty?(new_graph) do
              Map.delete(dataset.graphs, graph_name)
            else
              Map.put(dataset.graphs, graph_name, new_graph)
            end
      }
    else
      dataset
    end
  end

  @doc """
  Deletes the given graph.
  """
  @spec delete_graph(t, Statement.graph_name() | [Statement.graph_name()] | nil) :: t
  def delete_graph(dataset, graph_names)

  def delete_graph(%__MODULE__{} = dataset, graph_names) when is_list(graph_names) do
    Enum.reduce(graph_names, dataset, &delete_graph(&2, &1))
  end

  def delete_graph(%__MODULE__{} = dataset, graph_name) do
    %__MODULE__{dataset | graphs: Map.delete(dataset.graphs, coerce_graph_name(graph_name))}
  end

  @doc """
  Deletes the default graph.
  """
  @spec delete_default_graph(t) :: t
  def delete_default_graph(%__MODULE__{} = graph),
    do: delete_graph(graph, nil)

  @doc """
  Fetches the `RDF.Graph` with the given name.

  When a graph with the given name can not be found can not be found `:error` is returned.

  ## Examples

      iex> dataset = RDF.Dataset.new([{EX.S1, EX.P1, EX.O1, EX.Graph}, {EX.S2, EX.P2, EX.O2}])
      ...> RDF.Dataset.fetch(dataset, EX.Graph)
      {:ok, RDF.Graph.new({EX.S1, EX.P1, EX.O1}, name: EX.Graph)}
      iex> RDF.Dataset.fetch(dataset, nil)
      {:ok, RDF.Graph.new({EX.S2, EX.P2, EX.O2})}
      iex> RDF.Dataset.fetch(dataset, EX.Foo)
      :error
  """
  @impl Access
  @spec fetch(t, Statement.graph_name() | nil) :: {:ok, Graph.t()} | :error
  def fetch(%__MODULE__{} = dataset, graph_name) do
    Access.fetch(dataset.graphs, coerce_graph_name(graph_name))
  end

  @doc """
  Fetches the `RDF.Graph` with the given name.

  When a graph with the given name can not be found can not be found the optionally
  given default value or `nil` is returned

  ## Examples

      iex> dataset = RDF.Dataset.new([{EX.S1, EX.P1, EX.O1, EX.Graph}, {EX.S2, EX.P2, EX.O2}])
      ...> RDF.Dataset.get(dataset, EX.Graph)
      RDF.Graph.new({EX.S1, EX.P1, EX.O1}, name: EX.Graph)
      iex> RDF.Dataset.get(dataset, nil)
      RDF.Graph.new({EX.S2, EX.P2, EX.O2})
      iex> RDF.Dataset.get(dataset, EX.Foo)
      nil
      iex> RDF.Dataset.get(dataset, EX.Foo, :bar)
      :bar
  """
  @spec get(t, Statement.graph_name() | nil, Graph.t() | nil) :: Graph.t() | nil
  def get(%__MODULE__{} = dataset, graph_name, default \\ nil) do
    case fetch(dataset, graph_name) do
      {:ok, value} -> value
      :error -> default
    end
  end

  @doc """
  The graph with given name.
  """
  @spec graph(t, Statement.graph_name() | nil) :: Graph.t()
  def graph(%__MODULE__{} = dataset, graph_name) do
    Map.get(dataset.graphs, coerce_graph_name(graph_name))
  end

  @doc """
  The default graph of a `RDF.Dataset`.
  """
  @spec default_graph(t) :: Graph.t()
  def default_graph(%__MODULE__{} = dataset) do
    Map.get(dataset.graphs, nil, Graph.new())
  end

  @doc """
  The set of all graphs.
  """
  @spec graphs(t) :: [Graph.t()]
  def graphs(%__MODULE__{} = dataset), do: Map.values(dataset.graphs)

  @doc """
  Gets and updates the graph with the given name, in a single pass.

  Invokes the passed function on the `RDF.Graph` with the given name;
  this function should return either `{graph_to_return, new_graph}` or `:pop`.

  If the passed function returns `{graph_to_return, new_graph}`, the
  return value of `get_and_update` is `{graph_to_return, new_dataset}` where
  `new_dataset` is the input `Dataset` updated with `new_graph` for
  the given name.

  If the passed function returns `:pop` the graph with the given name is
  removed and a `{removed_graph, new_dataset}` tuple gets returned.

  ## Examples

      iex> dataset = RDF.Dataset.new({EX.S, EX.P, EX.O, EX.Graph})
      ...> RDF.Dataset.get_and_update(dataset, EX.Graph, fn current_graph ->
      ...>     {current_graph, {EX.S, EX.P, EX.NEW}}
      ...>   end)
      {RDF.Graph.new({EX.S, EX.P, EX.O}, name: EX.Graph), RDF.Dataset.new({EX.S, EX.P, EX.NEW, EX.Graph})}
  """
  @impl Access
  @spec get_and_update(t, Statement.graph_name() | nil, get_and_update_graph_fun) ::
          {Graph.t(), t}
  def get_and_update(%__MODULE__{} = dataset, graph_name, fun) do
    graph_context = coerce_graph_name(graph_name)

    case fun.(get(dataset, graph_context)) do
      {old_graph, new_graph} ->
        {old_graph, put(dataset, new_graph, graph: graph_context)}

      :pop ->
        pop(dataset, graph_context)

      other ->
        raise "the given function must return a two-element tuple or :pop, got: #{inspect(other)}"
    end
  end

  @doc """
  Pops an arbitrary statement from a `RDF.Dataset`.
  """
  @spec pop(t) :: {Statement.t() | nil, t}
  def pop(dataset)

  def pop(%__MODULE__{graphs: graphs} = dataset)
      when graphs == %{},
      do: {nil, dataset}

  def pop(%__MODULE__{graphs: graphs} = dataset) do
    # TODO: Find a faster way ...
    [{graph_name, graph}] = Enum.take(graphs, 1)
    {{s, p, o}, popped_graph} = Graph.pop(graph)

    popped =
      if Graph.empty?(popped_graph),
        do: graphs |> Map.delete(graph_name),
        else: graphs |> Map.put(graph_name, popped_graph)

    {
      {s, p, o, graph_name},
      %__MODULE__{dataset | graphs: popped}
    }
  end

  @doc """
  Pops the graph with the given name.

  Removes the graph of the given `graph_name` from `dataset`.

  Returns a tuple containing the graph of the given name
  and the updated dataset without this graph.
  `nil` is returned instead of the graph if `dataset` does
  not contain a graph_name of the given `graph_name`.

  ## Examples

      iex> dataset = RDF.Dataset.new([
      ...>   {EX.S1, EX.P1, EX.O1, EX.Graph},
      ...>   {EX.S2, EX.P2, EX.O2}])
      ...> RDF.Dataset.pop(dataset, EX.Graph)
      {
        RDF.Graph.new({EX.S1, EX.P1, EX.O1}, name: EX.Graph),
        RDF.Dataset.new({EX.S2, EX.P2, EX.O2})
      }
      iex> RDF.Dataset.pop(dataset, EX.Foo)
      {nil, dataset}
  """
  @impl Access
  @spec pop(t, Statement.coercible_graph_name()) :: {Graph.t() | nil, t}
  def pop(%__MODULE__{} = dataset, graph_name) do
    case Access.pop(dataset.graphs, coerce_graph_name(graph_name)) do
      {nil, _} ->
        {nil, dataset}

      {graph, new_graphs} ->
        {graph, %__MODULE__{dataset | graphs: new_graphs}}
    end
  end

  @doc """
  The number of graphs within a `RDF.Dataset`.

  ## Examples

      iex> RDF.Dataset.new([
      ...>   {EX.S1, EX.p1, EX.O1},
      ...>   {EX.S2, EX.p2, EX.O2},
      ...>   {EX.S1, EX.p2, EX.O3, EX.Graph}])
      ...> |> RDF.Dataset.graph_count()
      2

  """
  @spec graph_count(t) :: non_neg_integer
  def graph_count(%__MODULE__{} = dataset) do
    map_size(dataset.graphs)
  end

  @doc """
  The number of statements within a `RDF.Dataset`.

  ## Examples

      iex> RDF.Dataset.new([
      ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
      ...>   {EX.S2, EX.p2, EX.O2},
      ...>   {EX.S1, EX.p2, EX.O3}]) |>
      ...>   RDF.Dataset.statement_count()
      3
  """
  @spec statement_count(t) :: non_neg_integer
  def statement_count(%__MODULE__{} = dataset) do
    Enum.reduce(dataset.graphs, 0, fn {_, graph}, count ->
      count + Graph.triple_count(graph)
    end)
  end

  @doc """
  The set of all subjects used in the statement within all graphs of a `RDF.Dataset`.

  ## Examples

      iex> RDF.Dataset.new([
      ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
      ...>   {EX.S2, EX.p2, EX.O2},
      ...>   {EX.S1, EX.p2, EX.O3}]) |>
      ...>   RDF.Dataset.subjects()
      MapSet.new([RDF.iri(EX.S1), RDF.iri(EX.S2)])
  """
  def subjects(%__MODULE__{} = dataset) do
    Enum.reduce(dataset.graphs, MapSet.new(), fn {_, graph}, subjects ->
      MapSet.union(subjects, Graph.subjects(graph))
    end)
  end

  @doc """
  The set of all properties used in the predicates within all graphs of a `RDF.Dataset`.

  ## Examples

      iex> RDF.Dataset.new([
      ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
      ...>   {EX.S2, EX.p2, EX.O2},
      ...>   {EX.S1, EX.p2, EX.O3}]) |>
      ...>   RDF.Dataset.predicates()
      MapSet.new([EX.p1, EX.p2])
  """
  def predicates(%__MODULE__{} = dataset) do
    Enum.reduce(dataset.graphs, MapSet.new(), fn {_, graph}, predicates ->
      MapSet.union(predicates, Graph.predicates(graph))
    end)
  end

  @doc """
  The set of all resources used in the objects within a `RDF.Dataset`.

  Note: This function does collect only IRIs and BlankNodes, not Literals.

  ## Examples

      iex> RDF.Dataset.new([
      ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
      ...>   {EX.S2, EX.p2, EX.O2, EX.Graph},
      ...>   {EX.S3, EX.p1, EX.O2},
      ...>   {EX.S4, EX.p2, RDF.bnode(:bnode)},
      ...>   {EX.S5, EX.p3, "foo"}
      ...> ]) |> RDF.Dataset.objects()
      MapSet.new([RDF.iri(EX.O1), RDF.iri(EX.O2), RDF.bnode(:bnode)])
  """
  def objects(%__MODULE__{} = dataset) do
    Enum.reduce(dataset.graphs, MapSet.new(), fn {_, graph}, objects ->
      MapSet.union(objects, Graph.objects(graph))
    end)
  end

  @doc """
  The set of all resources used within a `RDF.Dataset`.

  ## Examples

    iex> RDF.Dataset.new([
    ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
    ...>   {EX.S2, EX.p1, EX.O2, EX.Graph},
    ...>   {EX.S2, EX.p2, RDF.bnode(:bnode)},
    ...>   {EX.S3, EX.p1, "foo"}
    ...> ]) |> RDF.Dataset.resources()
    MapSet.new([RDF.iri(EX.S1), RDF.iri(EX.S2), RDF.iri(EX.S3),
      RDF.iri(EX.O1), RDF.iri(EX.O2), RDF.bnode(:bnode), EX.p1, EX.p2])
  """
  def resources(%__MODULE__{} = dataset) do
    Enum.reduce(dataset.graphs, MapSet.new(), fn {_, graph}, resources ->
      MapSet.union(resources, Graph.resources(graph))
    end)
  end

  @doc """
  All statements within all graphs of a `RDF.Dataset`.

  While the statements of named graphs are returned as quad tuples, the statements
  of the default graph are returned as triples. If you want to get quads or triples
  uniformly, use the `quads/2` resp. `triples/2` functions instead.

  When the optional `:filter_star` flag is set to `true` RDF-star statements with
  a triple as subject or object will be filtered. The default value is `false`.

  ## Examples

        iex> RDF.Dataset.new([
        ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
        ...>   {EX.S2, EX.p2, EX.O2},
        ...>   {EX.S1, EX.p2, EX.O3}])
        ...> |> RDF.Dataset.statements()
        [{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3)},
         {RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)},
         {RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1), RDF.iri(EX.Graph)}]
  """
  @spec statements(t, keyword) :: [Statement.t()]
  def statements(%__MODULE__{} = dataset, opts \\ []) do
    Enum.flat_map(dataset.graphs, fn
      {nil, graph} -> Graph.triples(graph, opts)
      {_, graph} -> Graph.quads(graph, opts)
    end)
  end

  @doc """
  All statements within all graphs of a `RDF.Dataset` as quads.

  When the optional `:filter_star` flag is set to `true` RDF-star statements with
  a triple as subject or object will be filtered. The default value is `false`.

  ## Examples

        iex> RDF.Dataset.new([
        ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
        ...>   {EX.S2, EX.p2, EX.O2},
        ...>   {EX.S1, EX.p2, EX.O3}])
        ...> |> RDF.Dataset.quads()
        [{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3), nil},
         {RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2), nil},
         {RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1), RDF.iri(EX.Graph)}]
  """
  @spec quads(t, keyword) :: [Quad.t()]
  def quads(%__MODULE__{} = dataset, opts \\ []) do
    Enum.flat_map(dataset.graphs, fn {_, graph} -> Graph.quads(graph, opts) end)
  end

  @doc """
  All statements within all graphs of a `RDF.Dataset` as triples.

  When the optional `:filter_star` flag is set to `true` RDF-star statements with
  a triple as subject or object will be filtered. The default value is `false`.

  Note: When a triple is present in multiple graphs it will be present in the resulting
  list of triples multiple times for performance reasons. If you want to get a list with
  unique triples, you'll have to apply `Enum.uniq/1` on the result.

  ## Examples

        iex> RDF.Dataset.new([
        ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
        ...>   {EX.S2, EX.p2, EX.O2},
        ...>   {EX.S1, EX.p2, EX.O3}])
        ...> |> RDF.Dataset.triples()
        [{RDF.iri(EX.S1), RDF.iri(EX.p2), RDF.iri(EX.O3)},
         {RDF.iri(EX.S2), RDF.iri(EX.p2), RDF.iri(EX.O2)},
         {RDF.iri(EX.S1), RDF.iri(EX.p1), RDF.iri(EX.O1)}]
  """
  @spec triples(t, keyword) :: [Triple.t()]
  def triples(%__MODULE__{} = dataset, opts \\ []) do
    Enum.flat_map(dataset.graphs, fn {_, graph} -> Graph.triples(graph, opts) end)
  end

  @doc """
  Returns if the given `dataset` is empty.

  Note: You should always prefer this over the use of `Enum.empty?/1` as it is significantly faster.
  """
  @spec empty?(t) :: boolean
  def empty?(%__MODULE__{} = dataset) do
    Enum.empty?(dataset.graphs) or dataset |> graphs() |> Enum.all?(&Graph.empty?/1)
  end

  @doc """
  Checks if the given `input` statements exist within `dataset`.

  The `graph` option allows to set a different destination graph in which the
  statements should be checked, ignoring the graph context of given quads or the
  name of given graphs.

  ## Examples

        iex> dataset = RDF.Dataset.new([
        ...>   {EX.S1, EX.p1, EX.O1, EX.Graph},
        ...>   {EX.S2, EX.p2, EX.O2},
        ...>   {EX.S1, EX.p2, EX.O3}])
        ...> RDF.Dataset.include?(dataset, {EX.S1, EX.p1, EX.O1, EX.Graph})
        true
  """
  @spec include?(t, input, keyword) :: boolean
  def include?(dataset, input, opts \\ [])

  def include?(%__MODULE__{} = dataset, {_, _, _, graph} = quad, opts),
    do: do_include?(dataset, destination_graph(opts, graph), quad, opts)

  def include?(%__MODULE__{} = dataset, %Description{} = description, opts),
    do: do_include?(dataset, destination_graph(opts), description, opts)

  def include?(%__MODULE__{} = dataset, %Graph{} = graph, opts),
    do: do_include?(dataset, destination_graph(opts, graph.name), graph, opts)

  def include?(%__MODULE__{} = dataset, %__MODULE__{} = other_dataset, opts) do
    other_dataset
    |> graphs()
    |> Enum.all?(&include?(dataset, &1, opts))
  end

  def include?(dataset, input, opts)
      when is_list(input) or (is_map(input) and not is_struct(input)) do
    Enum.all?(input, &include?(dataset, &1, opts))
  end

  def include?(dataset, input, opts) when not is_struct(input),
    do: do_include?(dataset, destination_graph(opts), input, opts)

  defp do_include?(%__MODULE__{} = dataset, graph_name, input, opts) do
    if graph = dataset.graphs[graph_name] do
      Graph.include?(graph, input, opts)
    else
      false
    end
  end

  @doc """
  Checks if a graph of a `RDF.Dataset` contains statements about the given resource.

  ## Examples

        iex> RDF.Dataset.new([{EX.S1, EX.p1, EX.O1}]) |> RDF.Dataset.describes?(EX.S1)
        true
        iex> RDF.Dataset.new([{EX.S1, EX.p1, EX.O1}]) |> RDF.Dataset.describes?(EX.S2)
        false
  """
  @spec describes?(t, Statement.t(), Statement.coercible_graph_name() | nil) :: boolean
  def describes?(%__MODULE__{} = dataset, subject, graph_context \\ nil) do
    if graph = dataset.graphs[coerce_graph_name(graph_context)] do
      Graph.describes?(graph, subject)
    else
      false
    end
  end

  @doc """
  Returns the names of all graphs of a `RDF.Dataset` containing statements about the given subject.

  ## Examples
        iex> dataset = RDF.Dataset.new([
        ...>   {EX.S1, EX.p, EX.O},
        ...>   {EX.S2, EX.p, EX.O},
        ...>   {EX.S1, EX.p, EX.O, EX.Graph1},
        ...>   {EX.S2, EX.p, EX.O, EX.Graph2}])
        ...> RDF.Dataset.who_describes(dataset, EX.S1)
        [nil, RDF.iri(EX.Graph1)]
  """
  @spec who_describes(t, Statement.coercible_subject()) :: [Graph.t()]
  def who_describes(%__MODULE__{} = dataset, subject) do
    subject = coerce_subject(subject)

    dataset.graphs
    |> Map.values()
    |> Stream.filter(&Graph.describes?(&1, subject))
    |> Enum.map(& &1.name)
  end

  # This function relies on Map.intersect/3 that was added in Elixir v1.15
  if Version.match?(System.version(), ">= 1.15.0") do
    @doc """
    Returns a new dataset that is the intersection of the given `dataset` with the given `data`.

    The `data` can be given in any form an `RDF.Dataset` can be created from.

    ## Examples

        iex> RDF.Dataset.new([
        ...>   {EX.S1, EX.p(), [EX.O1, EX.O2]},
        ...>   {EX.S2, EX.p(), EX.O3, EX.Graph}
        ...> ])
        ...> |> RDF.Dataset.intersection([
        ...>     {EX.S1, EX.p(), EX.O2},
        ...>     {EX.S2, EX.p(), EX.O3}
        ...>   ])
        RDF.Dataset.new({EX.S1, EX.p(), EX.O2})

    """
    @spec intersection(t(), t() | Graph.t() | Description.t() | input()) :: t()
    def intersection(dataset, data)

    def intersection(%__MODULE__{} = dataset1, %__MODULE__{} = dataset2) do
      intersection =
        dataset1.graphs
        |> Map.intersect(dataset2.graphs, fn _, g1, g2 ->
          graph_intersection = Graph.intersection(g1, g2)
          unless Graph.empty?(graph_intersection), do: graph_intersection
        end)
        |> RDF.Utils.reject_empty_map_values()

      %__MODULE__{dataset1 | graphs: intersection}
    end

    def intersection(%__MODULE__{} = dataset, data) do
      intersection(dataset, new(data))
    end
  end

  @doc """
  Returns a nested map of the native Elixir values of a `RDF.Dataset`.

  When a `:context` option is given with a `RDF.PropertyMap`, predicates will
  be mapped to the terms defined in the `RDF.PropertyMap`, if present.

  ## Examples

      iex> [
      ...>   {~I<http://example.com/S>, ~I<http://example.com/p>, ~L"Foo", ~I<http://example.com/Graph>},
      ...>   {~I<http://example.com/S>, ~I<http://example.com/p>, RDF.XSD.integer(42), }
      ...> ]
      ...> |> RDF.Dataset.new()
      ...> |> RDF.Dataset.values()
      %{
        "http://example.com/Graph" => %{
          "http://example.com/S" => %{"http://example.com/p" => ["Foo"]}
        },
        nil => %{
          "http://example.com/S" => %{"http://example.com/p" => [42]}
        }
      }

  """
  @spec values(t, keyword) :: map
  def values(%__MODULE__{} = dataset, opts \\ []) do
    if property_map = PropertyMap.from_opts(opts) do
      map(dataset, Statement.default_property_mapping(property_map))
    else
      map(dataset, &Statement.default_term_mapping/1)
    end
  end

  @doc """
  Returns a nested map of a `RDF.Dataset` where each element from its quads is mapped with the given function.

  The function `fun` will receive a tuple `{statement_position, rdf_term}` where
  `statement_position` is one of the atoms `:subject`, `:predicate`, `:object` or
  `:graph_name` while `rdf_term` is the RDF term to be mapped. When the given function
  returns `nil` this will be interpreted as an error and will become the overhaul
  result of the `map/2` call.

  ## Examples

      iex> [
      ...>   {~I<http://example.com/S>, ~I<http://example.com/p>, ~L"Foo", ~I<http://example.com/Graph>},
      ...>   {~I<http://example.com/S>, ~I<http://example.com/p>, RDF.XSD.integer(42), }
      ...> ]
      ...> |> RDF.Dataset.new()
      ...> |> RDF.Dataset.map(fn
      ...>      {:graph_name, graph_name} ->
      ...>        graph_name
      ...>      {:predicate, predicate} ->
      ...>        predicate
      ...>        |> to_string()
      ...>        |> String.split("/")
      ...>        |> List.last()
      ...>        |> String.to_atom()
      ...>    {_, term} ->
      ...>      RDF.Term.value(term)
      ...>    end)
      %{
        ~I<http://example.com/Graph> => %{
          "http://example.com/S" => %{p: ["Foo"]}
        },
        nil => %{
          "http://example.com/S" => %{p: [42]}
        }
      }

  """
  @spec map(t, Statement.term_mapping()) :: map
  def map(dataset, fun)

  def map(%__MODULE__{} = dataset, fun) do
    Map.new(dataset.graphs, fn {graph_name, graph} ->
      {fun.({:graph_name, graph_name}), Graph.map(graph, fun)}
    end)
  end

  @doc """
  Checks if two `RDF.Dataset`s are equal.

  Two `RDF.Dataset`s are considered to be equal if they contain the same triples
  and have the same name.
  """
  @spec equal?(t | any, t | any) :: boolean
  def equal?(dataset1, dataset2)

  def equal?(%__MODULE__{} = dataset1, %__MODULE__{} = dataset2) do
    clear_metadata(dataset1) == clear_metadata(dataset2)
  end

  def equal?(_, _), do: false

  defdelegate isomorphic?(a, b), to: RDF.Canonicalization

  @doc """
  Canonicalizes the blank nodes of a dataset according to the RDF Dataset Canonicalization spec.

  See the `RDF.Canonicalization` module documentation on available options.

  ## Example

      iex> RDF.Dataset.new([{~B<foo>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<foo>}])
      ...> |> RDF.Dataset.canonicalize()
      RDF.Dataset.new([{~B<c14n0>, EX.p(), ~B<c14n1>}, {~B<c14n1>, EX.p(), ~B<c14n0>}])

  """
  @spec canonicalize(RDF.Dataset.t() | RDF.Graph.t(), keyword) :: RDF.Dataset.t()
  def canonicalize(%graph_or_dataset{} = dataset, opts \\ [])
      when graph_or_dataset in [__MODULE__, Graph] do
    {canonicalized_dataset, _} = RDF.Canonicalization.canonicalize(dataset, opts)
    canonicalized_dataset
  end

  @doc """
  Returns a hash of the canonical form of the given dataset.

  This hash is computed as follows:

  1. Compute the canonical form of the dataset according to the RDF Dataset Canonicalization spec
     using `canonicalize/1`.
  2. Serialize this canonical dataset to N-Quads sorted by Unicode code point order.
  3. Compute the SHA-256 of this N-Quads serialization.

  Note that the data structure is not relevant for the canonical hash, i.e.
  the same hash is generated for the same data regardless of whether it is
  passed in an `RDF.Graph`, `RDF.Dataset` or `RDF.Description`.

  ## Options

  - `:hash_algorithm` (default: `:sha256`): Allows to set the hash algorithm to be used in step 3.
    Any of the `:crypto.hash_algorithm()` values of Erlang's `:crypto` module are allowed.
    Note that this does NOT affect the hash function used during the


  ## Example

      iex> RDF.Dataset.new([{~B<foo>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<foo>}])
      ...> |> RDF.Dataset.canonical_hash()
      "053688e09a20a49acc3e1a5e6403c827b817eef9e4c90bfd71f2360e2a6446aa"

      iex> RDF.Graph.new([{~B<other>, EX.p(), ~B<bar>}, {~B<bar>, EX.p(), ~B<other>}])
      ...> |> RDF.Graph.canonical_hash()
      "053688e09a20a49acc3e1a5e6403c827b817eef9e4c90bfd71f2360e2a6446aa"
  """
  @spec canonical_hash(RDF.Dataset.t() | RDF.Graph.t(), keyword) :: binary
  def canonical_hash(%graph_or_dataset{} = dataset, opts \\ [])
      when graph_or_dataset in [__MODULE__, Graph] do
    canonical_nquads =
      dataset
      |> canonicalize()
      |> RDF.NQuads.write_string!(sort: true)

    opts
    |> Keyword.get(:hash_algorithm, :sha256)
    |> :crypto.hash(canonical_nquads)
    |> Base.encode16(case: :lower)
  end

  @doc """
  Returns the aggregated prefixes of all graphs of `dataset` as a `RDF.PrefixMap`.
  """
  @spec prefixes(t) :: PrefixMap.t() | nil
  def prefixes(%__MODULE__{} = dataset) do
    dataset
    |> graphs()
    |> Enum.reduce(PrefixMap.new(), fn graph, prefixes ->
      if graph.prefixes do
        PrefixMap.merge!(prefixes, graph.prefixes, :ignore)
      else
        prefixes
      end
    end)
  end

  defp clear_metadata(%__MODULE__{} = dataset) do
    %__MODULE__{
      dataset
      | graphs:
          Map.new(dataset.graphs, fn {name, graph} ->
            {name, Graph.clear_metadata(graph)}
          end)
    }
  end

  defimpl Enumerable do
    alias RDF.Dataset

    def member?(dataset, statement), do: {:ok, Dataset.include?(dataset, statement)}
    def count(dataset), do: {:ok, Dataset.statement_count(dataset)}

    if Version.match?(System.version(), ">= 1.14.0") do
      def slice(dataset) do
        size = Dataset.statement_count(dataset)
        {:ok, size, &Dataset.statements/1}
      end
    else
      def slice(dataset) do
        size = Dataset.statement_count(dataset)
        {:ok, size, &Enumerable.List.slice(Dataset.statements(dataset), &1, &2, size)}
      end
    end

    def reduce(dataset, acc, fun) do
      dataset
      |> Dataset.statements()
      |> Enumerable.List.reduce(acc, fun)
    end
  end

  defimpl Collectable do
    alias RDF.Dataset

    def into(original) do
      collector_fun = fn
        dataset, {:cont, list} when is_list(list) ->
          IO.warn(
            "statements as lists in `Collectable` implementation of `RDF.Dataset` are deprecated and will be removed in RDF.ex v2.0; use statements as tuples instead"
          )

          Dataset.add(dataset, List.to_tuple(list))

        dataset, {:cont, elem} ->
          Dataset.add(dataset, elem)

        dataset, :done ->
          dataset

        _dataset, :halt ->
          :ok
      end

      {original, collector_fun}
    end
  end
end