defmodule Archeometer.Query.Term do
@moduledoc """
Restrict and adapt Elixir terms to a subset that is translatable to SQLite
expressions. See [the language reference](https://sqlite.org/lang_expr.html)
The following literals are allowed
- Integers
- Floating point numbers
- Bitstrings
- Booleans
- Lists
Plus some operators and functions
- Boolean operators: `and`, `or`, `not`
- Comparaison operators: `==`, `!=`, `<`, `>`, `<=`, `>=`
- Arithmetic operators: `+`, `-`, `*`, `/`
- Arithmetic functions: `round`
- Search functions: `like`
- Null checking: `is_nil`, `not is_nil`
- Aggregation functions: `avg`, `count`, `max`, `min`, `sum`
- Subquery expressions: `exists`
- `in` operator
And any kind of dot operator: `m.some.key.or.attribute` to represent table
lookup. For example
max(m.num_lines, 10 * m.num_args)
m.cc > 10
like(m.name, "Archeometer.%") and m.name != "Archeometer.Query.Term"
Tuples and maps are allowed but only as a top level construct.
{m.name, m.num_lines, avg(m.functions.cc)} # this is valid
[name, num_lines * 10] # this as well
{m.name, {m.functions.name, m.functions.num_lines}} # this is not
[m.name, {m.num_lines}] # neither is this
"""
@doc """
Takes an AST and returns a validated AST.For the most part it is identical to
the original, except with anotated `symbols.lookups` instead of dot operators.
"""
def validate(ast)
# literals
def validate(i) when is_integer(i), do: {:ok, i}
def validate(f) when is_number(f), do: {:ok, f}
def validate(s) when is_bitstring(s), do: {:ok, s}
def validate(p) when is_boolean(p), do: {:ok, p}
# escaped fragment
def validate({:^, _, [idx]} = ast) when is_integer(idx), do: {:ok, ast}
# allow blocks only if they have a single element
def validate({:__block__, _, [ast]}), do: validate(ast)
def validate(ast) when is_list(ast) do
with {:ok, exprs} <- Enum.reduce(ast, {:ok, []}, &validate_arg/2) do
{:ok, {:list, [], Enum.reverse(exprs)}}
end
end
# builtin operators
@valid_builtins_arity [
is_nil: 1,
not: 1,
avg: 1,
count: 1,
max: 1,
min: 1,
sum: 1,
exists: 1,
length: 1,
round: 2,
like: 2,
and: 2,
or: 2,
in: 2,
*: 2,
/: 2,
+: 2,
-: 2,
==: 2,
!=: 2,
<: 2,
>: 2,
<=: 2,
>=: 2,
replace: 3
]
@valid_builtins Keyword.keys(@valid_builtins_arity)
def validate({op, _meta, args}) when op in @valid_builtins and is_list(args) do
arity = @valid_builtins_arity[op]
with {:ok, exprs} <- Enum.reduce(args, {:ok, []}, &validate_arg/2) do
cond do
length(exprs) > arity ->
{:error, {:too_many_args, {op, arity, args}}}
length(exprs) < arity ->
{:error, {:not_enough_args, {op, arity, args}}}
true ->
{:ok, {op, [], Enum.reverse(exprs)}}
end
end
end
def validate({op, _meta, module}) when op in @valid_builtins and is_atom(module) do
{:error, {:missing_args, op}}
end
# dot operators are left associative in Elixir, but we need right
# associativity, so we need to do a little bit of juggling...
def validate({{:., _, _}, _, []} = lookup), do: flatten_dots(lookup, [])
def validate({atom, meta, module}) when is_atom(module) and is_atom(atom),
do: {:ok, {:symb, [], [{atom, meta, nil}]}}
def validate(other), do: {:error, {:invalid_exprs, other}}
defp validate_arg(expr, acc) do
with {:ok, exprs} <- acc,
{:ok, valid_expr} <- validate(expr) do
{:ok, [valid_expr | exprs]}
end
end
defp dot_from_list([{atom, meta, nil}]), do: {atom, meta, Elixir}
defp dot_from_list([{atom, meta, nil} | rest]),
do: {{:., meta, [dot_from_list(rest), atom]}, [no_parens: true], []}
defp flatten_dots({{:., meta, [subexpr, atom]}, _meta_outer, []}, symbs)
when is_atom(atom),
do: flatten_dots(subexpr, [{atom, meta, nil} | symbs])
defp flatten_dots({atom, meta, module}, symbs) when is_atom(module) and is_atom(atom),
do: {:ok, {:symb, [], [{atom, meta, nil} | symbs]}}
defp flatten_dots(other, _), do: {:error, {:invalid_symb, other}}
defp ok_do({:ok, term}, fun), do: {:ok, fun.(term)}
defp ok_do({:error, _} = error, _fun), do: error
@sqlite_functions [:avg, :count, :max, :min, :sum, :round, :length, :replace]
@sqlite_prefix_operators [:not]
@sqlite_subquery_operators [:exists]
@sqlite_infix_operators [
:and,
:or,
:like,
:in,
:+,
:-,
:*,
:/,
:==,
:!=,
:<,
:>,
:<=,
:>=
]
@sqlite_postfix_operators [:is_nil, :not_is_nil]
@doc """
Takes a validated AST and tries to transformed into an SQL-compatible io_data.
The goal would be to obtain valid SQL just by calling `IO.iodata_to_binary/1`.
"""
def to_iodata(ast)
def to_iodata(i) when is_integer(i), do: {:ok, Integer.to_string(i)}
def to_iodata(f) when is_number(f), do: {:ok, Float.to_string(f)}
def to_iodata(s) when is_bitstring(s), do: {:ok, "'#{s}'"}
def to_iodata(p) when is_boolean(p), do: {:ok, Atom.to_string(p)}
def to_iodata({:symb, _meta, symbs}),
do: {:ok, Enum.map_join(symbs, ".", &elem(&1, 0))}
def to_iodata({:list, _meta, args}) do
args
|> to_iodata_arglist()
|> ok_do(fn io_args -> ["(", io_args, ")"] end)
end
# NOT IS NULL is illegal SQL, so this extra case is hardcoded
def to_iodata({:not, _, [{:is_nil, _, [arg]}]}),
do: to_iodata({:not_is_nil, [], [arg]})
def to_iodata({op, _meta, args}) when op in @sqlite_functions do
args
|> to_iodata_arglist()
|> ok_do(fn io_args -> [builtin_str(op), "(", io_args, ")"] end)
end
def to_iodata({op, _meta, [arg]}) when op in @sqlite_prefix_operators do
to_iodata(arg)
|> ok_do(fn io_arg -> [builtin_str(op), " ", io_arg] end)
end
def to_iodata({op, _meta, [arg]})
when op in @sqlite_subquery_operators and is_bitstring(arg) do
{:ok, [builtin_str(op), " ", "(", arg, ")"]}
end
def to_iodata({op, _meta, [arg0, arg1]}) when op in @sqlite_infix_operators do
with {:ok, io_arg0} <- to_iodata(arg0),
{:ok, io_arg1} <- to_iodata(arg1) do
{:ok, ["(", io_arg0, " ", builtin_str(op), " ", io_arg1, ")"]}
end
end
def to_iodata({op, _meta, [arg]}) when op in @sqlite_postfix_operators do
to_iodata(arg)
|> ok_do(fn io_arg -> [io_arg, " ", builtin_str(op)] end)
end
def to_iodata({_op, _meta, _args} = other_ast),
do: {:error, {:invalid_operator, other_ast}}
defp to_iodata_arglist(args) do
args
|> Enum.reduce({:ok, []}, &to_iodata_arg/2)
|> ok_do(fn io_args -> io_args |> Enum.reverse() |> Enum.intersperse(",") end)
end
defp to_iodata_arg(arg, {:ok, args}) do
arg
|> to_iodata()
|> ok_do(fn io_arg -> [io_arg | args] end)
end
defp to_iodata_arg(_arg, {:error, _} = error), do: error
# as far as possible, operators have the same representation
# if not possible the string representation is harcoded here
defp builtin_str(:not), do: "NOT"
defp builtin_str(:and), do: "AND"
defp builtin_str(:or), do: "OR"
defp builtin_str(:like), do: "LIKE"
defp builtin_str(:is_nil), do: "ISNULL"
defp builtin_str(:not_is_nil), do: "NOTNULL"
defp builtin_str(:exists), do: "EXISTS"
defp builtin_str(op) when op in @valid_builtins, do: Atom.to_string(op)
@doc """
Takes a validated AST and tries to transformed into its original state.
Most of the metadata is long gone by this point, but at least the original
structure is preserved.
"""
def to_ast(validated_ast)
def to_ast(i) when is_integer(i), do: i
def to_ast(f) when is_number(f), do: f
def to_ast(s) when is_bitstring(s), do: s
def to_ast(p) when is_boolean(p), do: p
def to_ast(a) when is_atom(a), do: a
def to_ast({:symb, _, elems}) do
Enum.reverse(elems)
|> dot_from_list()
end
def to_ast({:list, _, args}), do: Enum.map(args, &to_ast/1)
def to_ast({op, meta, args}), do: {op, meta, Enum.map(args, &to_ast/1)}
def to_ast({a0, a1}), do: {to_ast(a0), to_ast(a1)}
end