lib/archeometer/query.ex

defmodule Archeometer.Query do
  @moduledoc """
  This module exposes a `from/2` macro to creates queries for the Archeometer
  database. The recommended way to use it is to import the whole module. Let's
  see a simple example!

      import Archeometer.Query
      alias Archeometer.Schema.Module

      # then you can use it to make queries!
      Archeometer.Repo.all(
        from m in Module,
          select: m.name,
          where: m.num_lines > 50
      )

  The first argument must be an expression in the form of `m in Module`, where
  `m` will be the prefix used for all the fields in the query, and `Module` is
  a module implementing `Archeometer.Schema`.

  The available schemas are
  - `Archeometer.Schema.Module`
  - `Archeometer.Schema.Function`
  - `Archeometer.Schema.Macro`
  - `Archeometer.Schema.XRef`
  - `Archeometer.Schema.Application`

  Each schema specifies its fields and how it is related to the other schemas,
  like in an SQL Database!

  ## Query keywords

  The rest of the arguments specify how to construct the query. Each option is
  mapped to an SQL keyword. They are

  - `:select`
  - `:where`
  - `:order_by`
  - `:group_by`
  - `:having`
  - `:limit`

  Each option accepts either an `Archeometer.Query.Term` expression or an
  `Archeometer.Query.Term.Container` container (tuples, lists or maps).
  For keyword lists and maps, the keys can be used as aliases in the rest of
  the query.

  In the case of the `order_by`, they keys are used to determine the order.

      import Archeometer.Query
      alias Archeometer.Schema.Module

      from m in Module,
        select: [app: m.application.name, avg_cc: avg(m.cc)]
        where: m.application == "archeometer"
        order_by: [desc: sum(m.num_lines)],
        group_by: m.application.name
        having: avg_cc > 5,
        limit: 10

  ## Fields and tables

  Only fields can be selected. Trying to select reference to other tables will
  return an error. For example

      import Archeometer.Query
      alias Archeometer.Schema.Module

      # this will fail because `functions` is a table
      from m in Module, select: m.functions

      # this works because `name` is a field
      from m in Module, select: m.functions.name

  ## More Examples

  - `like` operator with named columns

          iex> from f in Archeometer.Schema.Function,
          ...> select: [name: f.name, arity: f.num_args],
          ...> where: arity > 3,
          ...> where: like(f.module.name, "Kamaji.Web.%")

  - Boolean operators

          iex> from m in Archeometer.Schema.Module,
          ...> where: like(m.name, "%") > 5 and sum(m.functions.cc) > 5 or not m.num_lines < 500,
          ...> select: [m.name, m.application.name]

  - Grouping with nested aggregation

          iex> from m in Archeometer.Schema.Module,
          ...> group_by: m.id,
          ...> having: max_cc > 5,
          ...> select: [module: m.name, max_cc: max(m.functions.cc)]

          iex> from m in Archeometer.Schema.Module,
          ...> group_by: name,
          ...> select: [name: m.name, num_deps: count(m.out_refs.callee.name)]

          iex> from m in Archeometer.Schema.Module,
          ...> group_by: name,
          ...> select: [name: m.name, num_usages: count(m.in_refs.caller.name)]

  - `not` and `is_nil`

          iex> from f in Archeometer.Schema.Function,
          ...> where: not is_nil(f.coverage),
          ...> select: [f.module.name, f.name, f.num_args]

  - A more complex query

          iex> from m in Archeometer.Schema.Module,
          ...> select: [name: m.name, acc_cc: sum(m.functions.cc)],
          ...> group_by: name,
          ...> where: m.num_lines > 100 and m.coverage < 0.9,
          ...> where: not exists(m.macros.name),
          ...> order_by: [desc: acc_cc],
          ...> limit: 10

  """

  alias Archeometer.Query.{JoinExpr, Builder}

  defstruct source: nil,
            select: [],
            distinct: false,
            where: [],
            order_by: [],
            group_by: [],
            having: [],
            limit: nil,
            tables: %{},
            aliases: %{},
            bindings: []

  defmacro from({:in, _meta, [{mod_alias, _, _}, module]}, opts) do
    {escaped_opts, bindings} = Builder.escape(opts)

    quote do
      Archeometer.Query.Builder.build(
        {:ok,
         initial_query_for(
           unquote(module),
           alias: unquote(mod_alias),
           bindings: unquote(bindings)
         )},
        unquote(Macro.escape(escaped_opts))
      )
    end
  end

  def initial_query_for(module, [{:alias, mod_alias} | other_opts]) do
    bindings = Keyword.get(other_opts, :bindings, [])

    init_table = %JoinExpr.Table{
      module: module,
      alias: JoinExpr.next_alias_id(%{}),
      key: module.__archeometer_keys__()
    }

    %Archeometer.Query{
      source: init_table,
      tables: %{
        Atom.to_string(mod_alias) => %{dest: init_table}
      },
      aliases: %{
        mod_alias => {:table_alias, [alias?: true, key?: true, dest: module]}
      },
      bindings: bindings
    }
  end

  def root_prefix(%__MODULE__{} = query) do
    mod = query.source.module

    Enum.find_value(query.aliases, fn
      {prefix, {:table_alias, [alias?: true, key?: true, dest: ^mod]}} ->
        prefix

      _ ->
        nil
    end)
  end

  def intersection(result1, result2) do
    if result1.headers != result2.headers do
      {:error, "Columns are not the same"}
    else
      rows1 = MapSet.new(result1.rows)
      rows2 = MapSet.new(result2.rows)
      rows_intersection = MapSet.intersection(rows1, rows2)

      %Archeometer.Repo.Result{
        headers: result1.headers,
        rows: Enum.map(rows_intersection, & &1)
      }
    end
  end

  defimpl Inspect do
    import Inspect.Algebra

    alias Archeometer.Query.Term
    alias Archeometer.Query

    defp replace_subqueries(ast) do
      Macro.prewalk(ast, fn
        %{select: [[sel]]} -> sel
        other -> other
      end)
    end

    defp inspect_expr({name, expr}) do
      space(concat(Atom.to_string(name), ":"), inspect_expr(expr))
    end

    defp inspect_expr(expr) do
      expr |> replace_subqueries() |> Term.to_ast() |> Macro.to_string()
    end

    defp inspect_expr_list(list) do
      content =
        list
        |> Enum.map(&inspect_expr/1)
        |> Enum.intersperse(", ")
        |> concat()

      concat(["[", content, "]"])
    end

    defp inspect_section(section, terms) when is_list(terms) do
      space(
        concat(section, ":"),
        inspect_expr_list(terms)
      )
    end

    defp inspect_section(_section, nil), do: []

    defp inspect_section(section, expr),
      do: space(concat(section, ":"), inspect_expr(expr))

    defp inspect_sections(section, exprs),
      do: Enum.map(exprs, &inspect_section(section, &1))

    defp inspect_source(query) do
      space(
        space(query |> Query.root_prefix() |> Atom.to_string(), "in"),
        inspect(query.source.module)
      )
    end

    def inspect(query, _opts) do
      body =
        [
          [inspect_source(query)],
          inspect_sections("select", query.select),
          inspect_sections("where", query.where),
          inspect_sections("group_by", query.group_by),
          inspect_sections("having", query.having),
          [inspect_section("limit", query.limit)],
          [inspect_section("distinct", query.distinct)]
        ]
        |> List.flatten()
        |> Enum.intersperse(", ")
        |> concat()

      concat(["Archeometer.Query.from(", body, ")"])
    end
  end
end