
defmodule Archeometer.Query.Term do
  @moduledoc """
  Restrict and adapt Elixir terms to a subset that is translatable to SQLite
  expressions. See [the language reference](

  The following literals are allowed
  - Integers
  - Floating point numbers
  - Bitstrings
  - Booleans
  - Lists

  Plus some operators and functions
  - Boolean operators: `and`, `or`, `not`
  - Comparaison operators: `==`, `!=`, `<`, `>`, `<=`, `>=`
  - Arithmetic operators: `+`, `-`, `*`, `/`
  - Arithmetic functions: `round`
  - Search functions: `like`
  - Null checking: `is_nil`, `not is_nil`
  - Aggregation functions: `avg`, `count`, `max`, `min`, `sum`
  - Subquery expressions: `exists`
  - `in` operator

  And any kind of dot operator: `m.some.key.or.attribute` to represent table
  lookup. For example

      max(m.num_lines, 10 * m.num_args) > 10

      like(, "Archeometer.%") and != "Archeometer.Query.Term"

  Tuples and maps are allowed but only as a top level construct.

      {, m.num_lines, avg(} # this is valid
      [name, num_lines * 10] # this as well

      {, {, m.functions.num_lines}} # this is not
      [, {m.num_lines}] # neither is this

  @doc """
  Takes an AST and returns a validated AST.For the most part it is identical to
  the original, except with anotated `symbols.lookups` instead of dot operators.
  def validate(ast)

  # literals
  def validate(i) when is_integer(i), do: {:ok, i}
  def validate(f) when is_number(f), do: {:ok, f}
  def validate(s) when is_bitstring(s), do: {:ok, s}
  def validate(p) when is_boolean(p), do: {:ok, p}

  # escaped fragment
  def validate({:^, _, [idx]} = ast) when is_integer(idx), do: {:ok, ast}

  # allow blocks only if they have a single element
  def validate({:__block__, _, [ast]}), do: validate(ast)

  def validate(ast) when is_list(ast) do
    with {:ok, exprs} <- Enum.reduce(ast, {:ok, []}, &validate_arg/2) do
      {:ok, {:list, [], Enum.reverse(exprs)}}

  # builtin operators
  @valid_builtins_arity [
    is_nil: 1,
    not: 1,
    avg: 1,
    count: 1,
    max: 1,
    min: 1,
    sum: 1,
    exists: 1,
    length: 1,
    round: 2,
    like: 2,
    and: 2,
    or: 2,
    in: 2,
    *: 2,
    /: 2,
    +: 2,
    -: 2,
    ==: 2,
    !=: 2,
    <: 2,
    >: 2,
    <=: 2,
    >=: 2,
    replace: 3

  @valid_builtins Keyword.keys(@valid_builtins_arity)

  def validate({op, _meta, args}) when op in @valid_builtins and is_list(args) do
    arity = @valid_builtins_arity[op]

    with {:ok, exprs} <- Enum.reduce(args, {:ok, []}, &validate_arg/2) do
      cond do
        length(exprs) > arity ->
          {:error, {:too_many_args, {op, arity, args}}}

        length(exprs) < arity ->
          {:error, {:not_enough_args, {op, arity, args}}}

        true ->
          {:ok, {op, [], Enum.reverse(exprs)}}

  def validate({op, _meta, module}) when op in @valid_builtins and is_atom(module) do
    {:error, {:missing_args, op}}

  # dot operators are left associative in Elixir, but we need right
  # associativity, so we need to do a little bit of juggling...
  def validate({{:., _, _}, _, []} = lookup), do: flatten_dots(lookup, [])

  def validate({atom, meta, module}) when is_atom(module) and is_atom(atom),
    do: {:ok, {:symb, [], [{atom, meta, nil}]}}

  def validate(other), do: {:error, {:invalid_exprs, other}}

  defp validate_arg(expr, acc) do
    with {:ok, exprs} <- acc,
         {:ok, valid_expr} <- validate(expr) do
      {:ok, [valid_expr | exprs]}

  defp dot_from_list([{atom, meta, nil}]), do: {atom, meta, Elixir}

  defp dot_from_list([{atom, meta, nil} | rest]),
    do: {{:., meta, [dot_from_list(rest), atom]}, [no_parens: true], []}

  defp flatten_dots({{:., meta, [subexpr, atom]}, _meta_outer, []}, symbs)
       when is_atom(atom),
       do: flatten_dots(subexpr, [{atom, meta, nil} | symbs])

  defp flatten_dots({atom, meta, module}, symbs) when is_atom(module) and is_atom(atom),
    do: {:ok, {:symb, [], [{atom, meta, nil} | symbs]}}

  defp flatten_dots(other, _), do: {:error, {:invalid_symb, other}}

  defp ok_do({:ok, term}, fun), do: {:ok, fun.(term)}
  defp ok_do({:error, _} = error, _fun), do: error

  @sqlite_functions [:avg, :count, :max, :min, :sum, :round, :length, :replace]
  @sqlite_prefix_operators [:not]
  @sqlite_subquery_operators [:exists]
  @sqlite_infix_operators [
  @sqlite_postfix_operators [:is_nil, :not_is_nil]

  @doc """
  Takes a validated AST and tries to transformed into an SQL-compatible io_data.
  The goal would be to obtain valid SQL just by calling `IO.iodata_to_binary/1`.
  def to_iodata(ast)

  def to_iodata(i) when is_integer(i), do: {:ok, Integer.to_string(i)}
  def to_iodata(f) when is_number(f), do: {:ok, Float.to_string(f)}
  def to_iodata(s) when is_bitstring(s), do: {:ok, "'#{s}'"}
  def to_iodata(p) when is_boolean(p), do: {:ok, Atom.to_string(p)}

  def to_iodata({:symb, _meta, symbs}),
    do: {:ok, Enum.map_join(symbs, ".", &elem(&1, 0))}

  def to_iodata({:list, _meta, args}) do
    |> to_iodata_arglist()
    |> ok_do(fn io_args -> ["(", io_args, ")"] end)

  # NOT IS NULL is illegal SQL, so this extra case is hardcoded
  def to_iodata({:not, _, [{:is_nil, _, [arg]}]}),
    do: to_iodata({:not_is_nil, [], [arg]})

  def to_iodata({op, _meta, args}) when op in @sqlite_functions do
    |> to_iodata_arglist()
    |> ok_do(fn io_args -> [builtin_str(op), "(", io_args, ")"] end)

  def to_iodata({op, _meta, [arg]}) when op in @sqlite_prefix_operators do
    |> ok_do(fn io_arg -> [builtin_str(op), " ", io_arg] end)

  def to_iodata({op, _meta, [arg]})
      when op in @sqlite_subquery_operators and is_bitstring(arg) do
    {:ok, [builtin_str(op), " ", "(", arg, ")"]}

  def to_iodata({op, _meta, [arg0, arg1]}) when op in @sqlite_infix_operators do
    with {:ok, io_arg0} <- to_iodata(arg0),
         {:ok, io_arg1} <- to_iodata(arg1) do
      {:ok, ["(", io_arg0, " ", builtin_str(op), " ", io_arg1, ")"]}

  def to_iodata({op, _meta, [arg]}) when op in @sqlite_postfix_operators do
    |> ok_do(fn io_arg -> [io_arg, " ", builtin_str(op)] end)

  def to_iodata({_op, _meta, _args} = other_ast),
    do: {:error, {:invalid_operator, other_ast}}

  defp to_iodata_arglist(args) do
    |> Enum.reduce({:ok, []}, &to_iodata_arg/2)
    |> ok_do(fn io_args -> io_args |> Enum.reverse() |> Enum.intersperse(",") end)

  defp to_iodata_arg(arg, {:ok, args}) do
    |> to_iodata()
    |> ok_do(fn io_arg -> [io_arg | args] end)

  defp to_iodata_arg(_arg, {:error, _} = error), do: error

  # as far as possible, operators have the same representation
  # if not possible the string representation is harcoded here
  defp builtin_str(:not), do: "NOT"
  defp builtin_str(:and), do: "AND"
  defp builtin_str(:or), do: "OR"
  defp builtin_str(:like), do: "LIKE"
  defp builtin_str(:is_nil), do: "ISNULL"
  defp builtin_str(:not_is_nil), do: "NOTNULL"
  defp builtin_str(:exists), do: "EXISTS"
  defp builtin_str(op) when op in @valid_builtins, do: Atom.to_string(op)

  @doc """
  Takes a validated AST and tries to transformed into its original state.

  Most of the metadata is long gone by this point, but at least the original
  structure is preserved.
  def to_ast(validated_ast)

  def to_ast(i) when is_integer(i), do: i
  def to_ast(f) when is_number(f), do: f
  def to_ast(s) when is_bitstring(s), do: s
  def to_ast(p) when is_boolean(p), do: p
  def to_ast(a) when is_atom(a), do: a

  def to_ast({:symb, _, elems}) do
    |> dot_from_list()

  def to_ast({:list, _, args}), do:, &to_ast/1)

  def to_ast({op, meta, args}), do: {op, meta,, &to_ast/1)}
  def to_ast({a0, a1}), do: {to_ast(a0), to_ast(a1)}