lib/archeometer/query/term.ex

defmodule Archeometer.Query.Term do
  @moduledoc """
  Restrict and adapt Elixir terms to a subset that is translatable to SQLite
  expressions. See [the language reference](https://sqlite.org/lang_expr.html)

  The following literals are allowed
  - Integers
  - Floating point numbers
  - Bitstrings
  - Booleans
  - Lists

  Plus some operators and functions
  - Boolean operators: `and`, `or`, `not`
  - Comparaison operators: `==`, `!=`, `<`, `>`, `<=`, `>=`
  - Arithmetic operators: `+`, `-`, `*`, `/`
  - Arithmetic functions: `round`
  - Search functions: `like`
  - Null checking: `is_nil`, `not is_nil`
  - Aggregation functions: `avg`, `count`, `max`, `min`, `sum`
  - Subquery expressions: `exists`
  - `in` operator

  And any kind of dot operator: `m.some.key.or.attribute` to represent table
  lookup. For example

      max(m.num_lines, 10 * m.num_args)

      m.cc > 10

      like(m.name, "Archeometer.%") and m.name != "Archeometer.Query.Term"

  Tuples and maps are allowed but only as a top level construct.

      {m.name, m.num_lines, avg(m.functions.cc)} # this is valid
      [name, num_lines * 10] # this as well

      {m.name, {m.functions.name, m.functions.num_lines}} # this is not
      [m.name, {m.num_lines}] # neither is this
  """

  @doc """
  Takes an AST and returns a validated AST.For the most part it is identical to
  the original, except with anotated `symbols.lookups` instead of dot operators.
  """
  def validate(ast)

  # literals
  def validate(i) when is_integer(i), do: {:ok, i}
  def validate(f) when is_number(f), do: {:ok, f}
  def validate(s) when is_bitstring(s), do: {:ok, s}
  def validate(p) when is_boolean(p), do: {:ok, p}

  # escaped fragment
  def validate({:^, _, [idx]} = ast) when is_integer(idx), do: {:ok, ast}

  # allow blocks only if they have a single element
  def validate({:__block__, _, [ast]}), do: validate(ast)

  def validate(ast) when is_list(ast) do
    with {:ok, exprs} <- Enum.reduce(ast, {:ok, []}, &validate_arg/2) do
      {:ok, {:list, [], Enum.reverse(exprs)}}
    end
  end

  # builtin operators
  @valid_builtins_arity [
    is_nil: 1,
    not: 1,
    avg: 1,
    count: 1,
    max: 1,
    min: 1,
    sum: 1,
    exists: 1,
    length: 1,
    round: 2,
    like: 2,
    and: 2,
    or: 2,
    in: 2,
    *: 2,
    /: 2,
    +: 2,
    -: 2,
    ==: 2,
    !=: 2,
    <: 2,
    >: 2,
    <=: 2,
    >=: 2,
    replace: 3
  ]

  @valid_builtins Keyword.keys(@valid_builtins_arity)

  def validate({op, _meta, args}) when op in @valid_builtins and is_list(args) do
    arity = @valid_builtins_arity[op]

    with {:ok, exprs} <- Enum.reduce(args, {:ok, []}, &validate_arg/2) do
      cond do
        length(exprs) > arity ->
          {:error, {:too_many_args, {op, arity, args}}}

        length(exprs) < arity ->
          {:error, {:not_enough_args, {op, arity, args}}}

        true ->
          {:ok, {op, [], Enum.reverse(exprs)}}
      end
    end
  end

  def validate({op, _meta, module}) when op in @valid_builtins and is_atom(module) do
    {:error, {:missing_args, op}}
  end

  # dot operators are left associative in Elixir, but we need right
  # associativity, so we need to do a little bit of juggling...
  def validate({{:., _, _}, _, []} = lookup), do: flatten_dots(lookup, [])

  def validate({atom, meta, module}) when is_atom(module) and is_atom(atom),
    do: {:ok, {:symb, [], [{atom, meta, nil}]}}

  def validate(other), do: {:error, {:invalid_exprs, other}}

  defp validate_arg(expr, acc) do
    with {:ok, exprs} <- acc,
         {:ok, valid_expr} <- validate(expr) do
      {:ok, [valid_expr | exprs]}
    end
  end

  defp dot_from_list([{atom, meta, nil}]), do: {atom, meta, Elixir}

  defp dot_from_list([{atom, meta, nil} | rest]),
    do: {{:., meta, [dot_from_list(rest), atom]}, [no_parens: true], []}

  defp flatten_dots({{:., meta, [subexpr, atom]}, _meta_outer, []}, symbs)
       when is_atom(atom),
       do: flatten_dots(subexpr, [{atom, meta, nil} | symbs])

  defp flatten_dots({atom, meta, module}, symbs) when is_atom(module) and is_atom(atom),
    do: {:ok, {:symb, [], [{atom, meta, nil} | symbs]}}

  defp flatten_dots(other, _), do: {:error, {:invalid_symb, other}}

  defp ok_do({:ok, term}, fun), do: {:ok, fun.(term)}
  defp ok_do({:error, _} = error, _fun), do: error

  @sqlite_functions [:avg, :count, :max, :min, :sum, :round, :length, :replace]
  @sqlite_prefix_operators [:not]
  @sqlite_subquery_operators [:exists]
  @sqlite_infix_operators [
    :and,
    :or,
    :like,
    :in,
    :+,
    :-,
    :*,
    :/,
    :==,
    :!=,
    :<,
    :>,
    :<=,
    :>=
  ]
  @sqlite_postfix_operators [:is_nil, :not_is_nil]

  @doc """
  Takes a validated AST and tries to transformed into an SQL-compatible io_data.
  The goal would be to obtain valid SQL just by calling `IO.iodata_to_binary/1`.
  """
  def to_iodata(ast)

  def to_iodata(i) when is_integer(i), do: {:ok, Integer.to_string(i)}
  def to_iodata(f) when is_number(f), do: {:ok, Float.to_string(f)}
  def to_iodata(s) when is_bitstring(s), do: {:ok, "'#{s}'"}
  def to_iodata(p) when is_boolean(p), do: {:ok, Atom.to_string(p)}

  def to_iodata({:symb, _meta, symbs}),
    do: {:ok, Enum.map_join(symbs, ".", &elem(&1, 0))}

  def to_iodata({:list, _meta, args}) do
    args
    |> to_iodata_arglist()
    |> ok_do(fn io_args -> ["(", io_args, ")"] end)
  end

  # NOT IS NULL is illegal SQL, so this extra case is hardcoded
  def to_iodata({:not, _, [{:is_nil, _, [arg]}]}),
    do: to_iodata({:not_is_nil, [], [arg]})

  def to_iodata({op, _meta, args}) when op in @sqlite_functions do
    args
    |> to_iodata_arglist()
    |> ok_do(fn io_args -> [builtin_str(op), "(", io_args, ")"] end)
  end

  def to_iodata({op, _meta, [arg]}) when op in @sqlite_prefix_operators do
    to_iodata(arg)
    |> ok_do(fn io_arg -> [builtin_str(op), " ", io_arg] end)
  end

  def to_iodata({op, _meta, [arg]})
      when op in @sqlite_subquery_operators and is_bitstring(arg) do
    {:ok, [builtin_str(op), " ", "(", arg, ")"]}
  end

  def to_iodata({op, _meta, [arg0, arg1]}) when op in @sqlite_infix_operators do
    with {:ok, io_arg0} <- to_iodata(arg0),
         {:ok, io_arg1} <- to_iodata(arg1) do
      {:ok, ["(", io_arg0, " ", builtin_str(op), " ", io_arg1, ")"]}
    end
  end

  def to_iodata({op, _meta, [arg]}) when op in @sqlite_postfix_operators do
    to_iodata(arg)
    |> ok_do(fn io_arg -> [io_arg, " ", builtin_str(op)] end)
  end

  def to_iodata({_op, _meta, _args} = other_ast),
    do: {:error, {:invalid_operator, other_ast}}

  defp to_iodata_arglist(args) do
    args
    |> Enum.reduce({:ok, []}, &to_iodata_arg/2)
    |> ok_do(fn io_args -> io_args |> Enum.reverse() |> Enum.intersperse(",") end)
  end

  defp to_iodata_arg(arg, {:ok, args}) do
    arg
    |> to_iodata()
    |> ok_do(fn io_arg -> [io_arg | args] end)
  end

  defp to_iodata_arg(_arg, {:error, _} = error), do: error

  # as far as possible, operators have the same representation
  # if not possible the string representation is harcoded here
  defp builtin_str(:not), do: "NOT"
  defp builtin_str(:and), do: "AND"
  defp builtin_str(:or), do: "OR"
  defp builtin_str(:like), do: "LIKE"
  defp builtin_str(:is_nil), do: "ISNULL"
  defp builtin_str(:not_is_nil), do: "NOTNULL"
  defp builtin_str(:exists), do: "EXISTS"
  defp builtin_str(op) when op in @valid_builtins, do: Atom.to_string(op)

  @doc """
  Takes a validated AST and tries to transformed into its original state.

  Most of the metadata is long gone by this point, but at least the original
  structure is preserved.
  """
  def to_ast(validated_ast)

  def to_ast(i) when is_integer(i), do: i
  def to_ast(f) when is_number(f), do: f
  def to_ast(s) when is_bitstring(s), do: s
  def to_ast(p) when is_boolean(p), do: p
  def to_ast(a) when is_atom(a), do: a

  def to_ast({:symb, _, elems}) do
    Enum.reverse(elems)
    |> dot_from_list()
  end

  def to_ast({:list, _, args}), do: Enum.map(args, &to_ast/1)

  def to_ast({op, meta, args}), do: {op, meta, Enum.map(args, &to_ast/1)}
  def to_ast({a0, a1}), do: {to_ast(a0), to_ast(a1)}
end