lib/explorer/query.ex

defmodule Explorer.Query do
  @moduledoc """
  High-level query for Explorer.

  Queries convert regular Elixir code which compile to efficient
  dataframes operations. Inside a query, only the limited set of
  Series operations are available and identifiers, such as `strs`
  and `nums`, represent dataframe column names:

      iex> df = Explorer.DataFrame.new(strs: ["a", "b", "c"], nums: [1, 2, 3])
      iex> Explorer.DataFrame.filter(df, nums > 2)
      #Explorer.DataFrame<
        Polars[1 x 2]
        strs string ["c"]
        nums integer [3]
      >

  If you want to access variables defined outside of the query
  or get access to all Elixir constructs, you must use `^`:

      iex> min = 2
      iex> df = Explorer.DataFrame.new(strs: ["a", "b", "c"], nums: [1, 2, 3])
      iex> Explorer.DataFrame.filter(df, nums > ^min)
      #Explorer.DataFrame<
        Polars[1 x 2]
        strs string ["c"]
        nums integer [3]
      >

      iex> min = 2
      iex> df = Explorer.DataFrame.new(strs: ["a", "b", "c"], nums: [1, 2, 3])
      iex> Explorer.DataFrame.filter(df, nums < ^if(min > 0, do: 10, else: -10))
      #Explorer.DataFrame<
        Polars[3 x 2]
        strs string ["a", "b", "c"]
        nums integer [1, 2, 3]
      >

  All operations from `Explorer.Series` are imported inside queries.
  This module also provides operators to use in queries, which are
  also imported into queries.

  ## Implementation details

  Queries simply become lazy dataframe operations at runtime.
  For example, the following query

      Explorer.DataFrame.filter(df, nums > 2)

  is equivalent to

      Explorer.DataFrame.filter_with(df, fn df -> df["nums"] > 2 end)

  This means that, whenever you want to generate queries programatically,
  you can fallback to the regular `_with` APIs.
  """

  kernel_all = Kernel.__info__(:functions) ++ Kernel.__info__(:macros)

  kernel_only = [
    @: 1,
    |>: 2,
    dbg: 0,
    dbg: 1,
    dbg: 2,
    sigil_c: 2,
    sigil_C: 2,
    sigil_D: 2,
    sigil_N: 2,
    sigil_s: 2,
    sigil_S: 2,
    sigil_w: 2,
    sigil_W: 2,
    tap: 2,
    then: 2
  ]

  @kernel_only kernel_only -- kernel_only -- kernel_all

  @doc """
  Builds an anonymous function from a query.

  This is the entry point used by `Explorer.DataFrame.filter/2`
  and friends to convert queries into anonymous functions.
  See the moduledoc for more information.
  """
  defmacro query(expression) do
    df = Macro.unique_var(:df, __MODULE__)
    {query, vars} = traverse(expression, [], %{df: df})

    quote do
      fn unquote(df) ->
        unquote_splicing(Enum.reverse(vars))
        import Kernel, only: unquote(@kernel_only)
        import Explorer.Query, except: [query: 1]
        import Explorer.Series
        unquote(query)
      end
    end
  end

  defp traverse({:^, meta, [expr]}, vars, _state) do
    var = Macro.unique_var(:pin, __MODULE__)
    {var, [{:=, meta, [var, expr]} | vars]}
  end

  defp traverse({var, meta, ctx}, vars, state) when is_atom(var) and is_atom(ctx) do
    {{{:., meta, [Explorer.DataFrame, :pull]}, meta, [state.df, var]}, vars}
  end

  defp traverse({left, meta, right}, vars, state) do
    if is_atom(left) and is_list(right) and special_form_defines_var?(left, right) do
      raise ArgumentError, "#{left}/#{length(right)} is not currently supported in Explorer.Query"
    end

    {left, vars} = traverse(left, vars, state)
    {right, vars} = traverse(right, vars, state)
    {{left, meta, right}, vars}
  end

  defp traverse({left, right}, vars, state) do
    {left, vars} = traverse(left, vars, state)
    {right, vars} = traverse(right, vars, state)
    {{left, right}, vars}
  end

  defp traverse(list, vars, state) when is_list(list) do
    Enum.map_reduce(list, vars, &traverse(&1, &2, state))
  end

  defp traverse(other, vars, _state), do: {other, vars}

  defp special_form_defines_var?(:=, [_, _]), do: true
  defp special_form_defines_var?(:case, [_, _]), do: true
  defp special_form_defines_var?(:cond, [_]), do: true
  defp special_form_defines_var?(:for, [_ | _]), do: true
  defp special_form_defines_var?(:receive, [_]), do: true
  defp special_form_defines_var?(:try, [_]), do: true
  defp special_form_defines_var?(:with, [_ | _]), do: true
  defp special_form_defines_var?(_, _), do: false

  # and and or are sent as is to queries
  binary_delegates = [
    ==: :equal,
    !=: :not_equal,
    >: :greater,
    >=: :greater_equal,
    <: :less,
    <=: :less_equal,
    +: :add,
    -: :subtract,
    *: :multiply,
    /: :divide,
    **: :pow
  ]

  for {operator, delegate} <- binary_delegates do
    @doc """
    Delegate to `Explorer.Series.#{delegate}/2`.
    """
    def unquote(operator)(left, right), do: Explorer.Series.unquote(delegate)(left, right)
  end

  @doc """
  Unary minus operator.

  Works with numbers and series.
  """
  def -number when is_number(number), do: Kernel.-(number)

  def -series when is_struct(series, Explorer.Series),
    do: Explorer.Series.multiply(series, Kernel.-(1))

  @doc """
  Unary plus operator.

  Works with numbers and series.
  """
  def +number when is_number(number), do: number
  def +series when is_struct(series, Explorer.Series), do: series
end