Skip to main content

lib/terminus_db/woql/path.ex

defmodule TerminusDB.WOQL.Path do
  @moduledoc """
  Path pattern parser and structured builders for WOQL path queries.

  Supports two modes:

  ## String-compiled parser

      TerminusDB.WOQL.Path.parse("friend*{1,3}")
      #=> {:times, {:pred, "friend"}, 1, 3}

  Grammar: `predicate`, `<inverse`, `*` (star), `+` (plus), `{n}` / `{n,m}`
  (bounded), `|` (or), `,` (sequence), `.` (any), `(...)` (grouping).

  ## Structured builders

      WOQL.Path.path_star(WOQL.Path.path_pred("friend"))
      #=> {:star, {:pred, "friend"}}

  ## Serialization

  `to_jsonld/1` serializes a Path AST to the WOQL JSON-LD wire format.
  `from_jsonld/1` deserializes back. `normalize/1` accepts either a string
  or an AST tuple and returns an AST tuple.

  This module is used internally by `WOQL.path/3` and `WOQL.path/4` but
  can also be used directly for building path patterns programmatically.
  """

  # Path pattern parser and structured builders for WOQL path queries.
  #
  # Supports two modes:
  #   1. String-compiled: path("v:S", "<friend*{1,3}", "v:O") — the pattern
  #      string is tokenized and parsed into a Path AST.
  #   2. Structured: path("v:S", path_star(path_pred("friend")), "v:O") —
  #      builds the AST compositionally.
  #
  # Grammar:
  #   pattern      := or
  #   or           := sequence ('|' sequence)*
  #   sequence     := step (',' step)*
  #   step         := atom quantifier?
  #   quantifier   := '*' | '+' | '{' int '}' | '{' int ',' int '}' | '{' int ',' '}'
  #   atom         := predicate | '(' or ')' | inverse
  #   inverse      := '<' predicate
  #   predicate    := name | '.'
  #
  # Quantifier semantics:
  #   {n}   — exactly n   (from=n, to=n)
  #   {n,m} — from n to m (from=n, to=m)
  #   {n,}  — at least n  (from=n, to=nil/unbounded)
  #
  # AST nodes are tagged tuples:
  #   {:pred, name}           — PathPredicate
  #   {:any}                  — PathPredicate (any predicate, ".")
  #   {:or, [nodes]}          — PathOr
  #   {:seq, [nodes]}         — PathSequence
  #   {:star, node}           — PathStar
  #   {:plus, node}           — PathPlus
  #   {:times, node, from, to} — PathTimes
  #   {:inverse, name}        — InversePathPredicate

  # --------------------------------------------------------------------------
  # Structured builders
  # --------------------------------------------------------------------------

  @doc """
  Builds a path predicate node for the given predicate name.
  """
  @spec path_pred(String.t()) :: tuple()
  def path_pred(name) when is_binary(name), do: {:pred, name}

  @doc """
  Builds a path "any predicate" node (matches `.` in the string grammar).
  """
  @spec path_any :: tuple()
  def path_any, do: {:any}

  @doc """
  Builds a path sequence — each step is traversed in order.
  """
  @spec path_seq([tuple()]) :: tuple()
  def path_seq(steps) when is_list(steps), do: {:seq, steps}

  @doc """
  Builds a path alternation — any branch may be taken.
  """
  @spec path_or([tuple()]) :: tuple()
  def path_or(branches) when is_list(branches), do: {:or, branches}

  @doc """
  Builds a path star — zero or more repetitions.
  """
  @spec path_star(tuple()) :: tuple()
  def path_star(node), do: {:star, node}

  @doc """
  Builds a path plus — one or more repetitions.
  """
  @spec path_plus(tuple()) :: tuple()
  def path_plus(node), do: {:plus, node}

  @doc """
  Builds a path times — `from` to `to` repetitions. `to` may be `nil` for
  unbounded.
  """
  @spec path_times(tuple(), non_neg_integer(), non_neg_integer() | nil) :: tuple()
  def path_times(node, from, to \\ nil), do: {:times, node, from, to}

  @doc """
  Builds an inverse path — traverses the predicate in reverse direction.
  """
  @spec path_inverse(String.t()) :: tuple()
  def path_inverse(name) when is_binary(name), do: {:inverse, name}

  # --------------------------------------------------------------------------
  # String parser
  # --------------------------------------------------------------------------

  @doc """
  Parses a path pattern string into a Path AST.

  ## Examples

      iex> TerminusDB.WOQL.Path.parse("friend")
      {:pred, "friend"}

      iex> TerminusDB.WOQL.Path.parse("friend*")
      {:star, {:pred, "friend"}}

      iex> TerminusDB.WOQL.Path.parse("<friend")
      {:inverse, "friend"}

      iex> TerminusDB.WOQL.Path.parse("friend|foe")
      {:or, [{:pred, "friend"}, {:pred, "foe"}]}

  """
  @spec parse(String.t()) :: tuple()
  def parse("") do
    raise ArgumentError, "path pattern cannot be empty"
  end

  def parse(pattern) when is_binary(pattern) do
    tokens = tokenize(pattern)

    case parse_or(tokens) do
      {ast, []} -> ast
      _ -> raise ArgumentError, "invalid path pattern: #{inspect(pattern)}"
    end
  end

  defp tokenize(pattern), do: tokenize(pattern, [], "")

  defp tokenize("", tokens, acc), do: Enum.reverse(reverse_push(acc, tokens))

  defp tokenize(<<c::utf8, rest::binary>>, tokens, acc)
       when c in [?|, ?(, ?), ?*, ?+, ?{, ?}, ?<, ?,, ?.] do
    tokens = reverse_push(acc, tokens)
    char_token = char_to_token(<<c::utf8>>)
    tokenize(rest, [char_token | tokens], "")
  end

  defp tokenize(<<c::utf8, rest::binary>>, tokens, acc) do
    tokenize(rest, tokens, <<acc::binary, c::utf8>>)
  end

  defp char_to_token("|"), do: :pipe
  defp char_to_token("("), do: :lparen
  defp char_to_token(")"), do: :rparen
  defp char_to_token("*"), do: :star
  defp char_to_token("+"), do: :plus
  defp char_to_token("{"), do: :lbrace
  defp char_to_token("}"), do: :rbrace
  defp char_to_token("<"), do: :lt
  defp char_to_token(","), do: :comma
  defp char_to_token("."), do: :dot

  defp reverse_push("", tokens), do: tokens
  defp reverse_push(acc, tokens), do: [{:name, acc} | tokens]

  # parse_or := sequence ('|' sequence)*
  defp parse_or(tokens) do
    case parse_seq(tokens) do
      :no_match ->
        :no_match

      {first, rest} ->
        case rest do
          [:pipe | rest2] ->
            case parse_or(rest2) do
              :no_match -> :no_match
              {second, rest3} -> {{:or, [first, second]}, rest3}
            end

          _ ->
            {first, rest}
        end
    end
  end

  # parse_seq := step+  (returns single step if only one, {:seq, [...]} if multiple)
  defp parse_seq(tokens), do: parse_seq(tokens, [])

  defp parse_seq(tokens, []) do
    case parse_step(tokens) do
      :no_match ->
        :no_match

      {step, rest} ->
        rest = skip_comma(rest)

        if peek_stop(rest) do
          {step, rest}
        else
          parse_seq(rest, [step])
        end
    end
  end

  defp parse_seq(tokens, acc) do
    case parse_step(tokens) do
      :no_match ->
        {{:seq, Enum.reverse(acc)}, tokens}

      {step, rest} ->
        rest = skip_comma(rest)

        if peek_stop(rest) do
          {{:seq, Enum.reverse([step | acc])}, rest}
        else
          parse_seq(rest, [step | acc])
        end
    end
  end

  defp skip_comma([:comma | rest]), do: skip_comma(rest)
  defp skip_comma(rest), do: rest

  defp peek_stop([]), do: true
  defp peek_stop([:pipe | _]), do: true
  defp peek_stop([:rparen | _]), do: true
  defp peek_stop([:comma | _]), do: true
  defp peek_stop(_), do: false

  # parse_step := atom quantifier?
  defp parse_step(tokens) do
    case parse_atom(tokens) do
      :no_match ->
        :no_match

      {atom, rest} ->
        case parse_quantifier(rest) do
          {{:star, nil}, rest2} -> {{:star, atom}, rest2}
          {{:plus, nil}, rest2} -> {{:plus, atom}, rest2}
          {{:times, nil, from, to}, rest2} -> {{:times, atom, from, to}, rest2}
          :no_match -> {atom, rest}
        end
    end
  end

  # parse_atom := predicate | '(' or ')' | inverse
  defp parse_atom([:lt, {:name, name} | rest]) do
    {{:inverse, name}, rest}
  end

  defp parse_atom([:lt, :dot | rest]) do
    {{:inverse, "."}, rest}
  end

  defp parse_atom([:lparen | rest]) do
    case parse_or(rest) do
      {ast, [:rparen | rest2]} -> {ast, rest2}
      _ -> :no_match
    end
  end

  defp parse_atom([:dot | rest]) do
    {{:any}, rest}
  end

  defp parse_atom([{:name, name} | rest]) do
    {{:pred, name}, rest}
  end

  defp parse_atom(_), do: :no_match

  # parse_quantifier := '*' | '+' | '{' int (',' int?)? '}'
  defp parse_quantifier([:star | rest]), do: {{:star, nil}, rest}
  defp parse_quantifier([:plus | rest]), do: {{:plus, nil}, rest}

  defp parse_quantifier([:lbrace, {:name, n} | rest]) do
    with {from, ""} <- Integer.parse(n),
         {result, rest2} <- parse_times_rest(rest, from) do
      {result, rest2}
    else
      _ -> :no_match
    end
  end

  defp parse_quantifier(_), do: :no_match

  defp parse_times_rest([:comma, :rbrace | rest2], from) do
    {{:times, nil, from, nil}, rest2}
  end

  defp parse_times_rest([:comma, {:name, m}, :rbrace | rest2], from) do
    case Integer.parse(m) do
      {to, ""} -> {{:times, nil, from, to}, rest2}
      _ -> :no_match
    end
  end

  defp parse_times_rest([:rbrace | rest2], from) do
    {{:times, nil, from, from}, rest2}
  end

  defp parse_times_rest(_, _), do: :no_match

  # --------------------------------------------------------------------------
  # AST → JSON-LD serialization
  # --------------------------------------------------------------------------

  @doc """
  Serializes a Path AST node to the WOQL JSON-LD wire format.
  """
  @spec to_jsonld(tuple()) :: map()
  def to_jsonld({:pred, name}) do
    %{"@type" => "PathPredicate", "predicate" => name}
  end

  def to_jsonld({:any}) do
    %{"@type" => "PathPredicate", "predicate" => "."}
  end

  def to_jsonld({:or, branches}) do
    %{"@type" => "PathOr", "or" => Enum.map(branches, &to_jsonld/1)}
  end

  def to_jsonld({:seq, steps}) do
    %{"@type" => "PathSequence", "sequence" => Enum.map(steps, &to_jsonld/1)}
  end

  def to_jsonld({:star, node}) do
    %{"@type" => "PathStar", "star" => to_jsonld(node)}
  end

  def to_jsonld({:plus, node}) do
    %{"@type" => "PathPlus", "plus" => to_jsonld(node)}
  end

  def to_jsonld({:times, node, from, to}) do
    base = %{"@type" => "PathTimes", "times" => to_jsonld(node), "from" => from}

    if to do
      Map.put(base, "to", to)
    else
      base
    end
  end

  def to_jsonld({:inverse, name}) do
    %{"@type" => "InversePathPredicate", "predicate" => name}
  end

  # --------------------------------------------------------------------------
  # JSON-LD → AST deserialization
  # --------------------------------------------------------------------------

  @doc """
  Deserializes a WOQL JSON-LD path pattern back into a Path AST.
  """
  @spec from_jsonld(map()) :: tuple()
  def from_jsonld(%{"@type" => "PathPredicate", "predicate" => "."}) do
    {:any}
  end

  def from_jsonld(%{"@type" => "PathPredicate", "predicate" => name}) do
    {:pred, name}
  end

  def from_jsonld(%{"@type" => "PathOr", "or" => branches}) do
    {:or, Enum.map(branches, &from_jsonld/1)}
  end

  def from_jsonld(%{"@type" => "PathSequence", "sequence" => steps}) do
    {:seq, Enum.map(steps, &from_jsonld/1)}
  end

  def from_jsonld(%{"@type" => "PathStar", "star" => node}) do
    {:star, from_jsonld(node)}
  end

  def from_jsonld(%{"@type" => "PathPlus", "plus" => node}) do
    {:plus, from_jsonld(node)}
  end

  def from_jsonld(%{"@type" => "PathTimes"} = m) do
    to = if m["to"], do: m["to"], else: nil
    {:times, from_jsonld(m["times"]), m["from"], to}
  end

  def from_jsonld(%{"@type" => "InversePathPredicate", "predicate" => name}) do
    {:inverse, name}
  end

  # --------------------------------------------------------------------------
  # Pattern normalization — accepts string or AST, returns AST
  # --------------------------------------------------------------------------

  @doc """
  Normalizes a pattern — if it's a string, parses it; if it's already an AST
  tuple, returns it as-is.
  """
  @spec normalize(String.t() | tuple()) :: tuple()
  def normalize(pattern) when is_binary(pattern), do: parse(pattern)
  def normalize(pattern) when is_tuple(pattern), do: pattern
end