Skip to main content

lib/decant.ex

defmodule Decant do
  @moduledoc """
  Tokenized, multi-field `ILIKE`/`LIKE` search compiled to a composable
  `Ecto.Query.dynamic/2`.

  `Decant` turns a free-text search string into a boolean expression over a set
  of columns, then hands it back as a `dynamic` you splice into your own query:

      filter =
        Decant.dynamic(term,
          fields: [
            {:customer, :email},
            {:customer, :first_name},
            {:customer, :last_name},
            {:order, :display_id, cast: :string}
          ]
        )

      from q in query, where: ^filter

  ## The shape

  A search string is split into tokens (words). The default logic is:

      every token must match SOMEWHERE   (token_logic: :and)
        token matches if ANY field hits  (field_logic: :or)

  so `"jane gmail"` matches a row whose email contains `jane` and whose name (or
  email, or any listed field) contains `gmail`. Flip `:token_logic` /
  `:field_logic` to get "or search" or "match-all-fields" behaviour.

  ## Bindings

  `Decant` is binding-agnostic: it references columns through **named bindings**
  (`as:`), so the same field spec works no matter the join shape of the host
  query. Every searchable source must declare an `as:`:

      from o in Order, as: :order,
        join: c in assoc(o, :customer), as: :customer

  ## Options

    * `:fields` (required) — list of field specs. Each is `{binding, column}` or
      `{binding, column, field_opts}`. `field_opts`:
        * `:cast` — `:string` wraps the column in `CAST(? AS TEXT)` so non-text
          columns (integer ids, enums) are searchable.
        * `:match` — per-field override of the global `:match` mode.
    * `:match` — `:contains` (default), `:prefix`, `:suffix`, or `:exact`.
    * `:token_logic` — `:and` (default) or `:or`. How tokens combine.
    * `:field_logic` — `:or` (default) or `:and`. How fields combine per token.
    * `:case` — `:insensitive` (default, `ILIKE`) or `:sensitive` (`LIKE`).
    * `:escape` — escape `%`, `_`, `\\` in user input so they are matched
      literally instead of acting as wildcards. Defaults to `true`.
    * `:on_blank` — what a blank/no-token term resolves to: `:all` (default,
      `dynamic(true)` — don't constrain an empty search) or `:none`
      (`dynamic(false)` — match nothing).
    * `:tokenizer` — keyword opts forwarded to `Decant.Tokenizer.tokenize/2`
      (`:pattern`, `:trim`, `:drop_empty`, `:downcase`, `:max_tokens`).

  ## Empty input

  A `nil`, blank, or all-whitespace term tokenizes to `[]`. By default
  `dynamic/2` then returns `dynamic(true)` — an always-true filter — so callers
  can unconditionally write `where: ^Decant.dynamic(term, ...)` with no
  branching, and the planner discards the `WHERE true`. Pass `on_blank: :none`
  when an empty search should instead return no rows (`dynamic(false)`).
  """

  require Ecto.Query

  alias Decant.{Field, Tokenizer}

  @doc """
  Build an `Ecto.Query.dynamic/2` from a search `term` and `opts`.

  See the module doc for the full option list. Returns `dynamic(true)` when the
  term yields no tokens.
  """
  @spec dynamic(String.t() | nil, keyword()) :: Ecto.Query.dynamic_expr()
  def dynamic(term, opts) do
    fields =
      opts
      |> Keyword.fetch!(:fields)
      |> List.wrap()
      |> Enum.map(&Field.normalize/1)

    if fields == [] do
      raise ArgumentError, "Decant.dynamic/2 requires a non-empty :fields list"
    end

    match = validate_match!(Keyword.get(opts, :match, :contains))
    token_logic = validate_logic!(:token_logic, Keyword.get(opts, :token_logic, :and))
    field_logic = validate_logic!(:field_logic, Keyword.get(opts, :field_logic, :or))
    kase = validate_case!(Keyword.get(opts, :case, :insensitive))
    escape? = Keyword.get(opts, :escape, true)
    on_blank = validate_on_blank!(Keyword.get(opts, :on_blank, :all))
    tokenizer_opts = Keyword.get(opts, :tokenizer, [])

    case Tokenizer.tokenize(term, tokenizer_opts) do
      [] ->
        blank_clause(on_blank)

      tokens ->
        tokens
        |> Enum.map(&token_clause(&1, fields, match, kase, escape?, field_logic))
        |> combine(token_logic)
    end
  end

  # One token → (field1 hits OR/AND field2 hits OR/AND ...)
  defp token_clause(token, fields, match, kase, escape?, field_logic) do
    fields
    |> Enum.map(fn %Field{} = f ->
      pattern = pattern(token, f.match || match, escape?)
      field_clause(f, pattern, kase)
    end)
    |> combine(field_logic)
  end

  defp field_clause(%Field{binding: b, column: c, cast: nil}, pattern, :insensitive),
    do: Ecto.Query.dynamic([{^b, t}], ilike(field(t, ^c), ^pattern))

  defp field_clause(%Field{binding: b, column: c, cast: nil}, pattern, :sensitive),
    do: Ecto.Query.dynamic([{^b, t}], like(field(t, ^c), ^pattern))

  defp field_clause(%Field{binding: b, column: c, cast: :string}, pattern, :insensitive),
    do: Ecto.Query.dynamic([{^b, t}], ilike(fragment("CAST(? AS TEXT)", field(t, ^c)), ^pattern))

  defp field_clause(%Field{binding: b, column: c, cast: :string}, pattern, :sensitive),
    do: Ecto.Query.dynamic([{^b, t}], like(fragment("CAST(? AS TEXT)", field(t, ^c)), ^pattern))

  # A blank/no-token term resolves to a constant filter: `:all` matches every
  # row (the search box is empty, so don't constrain), `:none` matches nothing.
  defp blank_clause(:all), do: Ecto.Query.dynamic(true)
  defp blank_clause(:none), do: Ecto.Query.dynamic(false)

  defp combine([single], _logic), do: single

  defp combine([first | rest], :and),
    do: Enum.reduce(rest, first, fn d, acc -> Ecto.Query.dynamic(^acc and ^d) end)

  defp combine([first | rest], :or),
    do: Enum.reduce(rest, first, fn d, acc -> Ecto.Query.dynamic(^acc or ^d) end)

  defp pattern(token, match, escape?) do
    token = if escape?, do: escape_like(token), else: token

    case match do
      :contains -> "%" <> token <> "%"
      :prefix -> token <> "%"
      :suffix -> "%" <> token
      :exact -> token
    end
  end

  # Escape LIKE/ILIKE metacharacters. Backslash is PostgreSQL's default escape
  # character, so no explicit ESCAPE clause is needed. Backslash itself is
  # escaped first to avoid double-escaping the escapes we introduce.
  defp escape_like(str) do
    str
    |> String.replace("\\", "\\\\")
    |> String.replace("%", "\\%")
    |> String.replace("_", "\\_")
  end

  defp validate_match!(m) when m in [:contains, :prefix, :suffix, :exact], do: m

  defp validate_match!(m),
    do:
      raise(
        ArgumentError,
        ":match must be one of :contains, :prefix, :suffix, :exact, got: #{inspect(m)}"
      )

  defp validate_logic!(_key, l) when l in [:and, :or], do: l

  defp validate_logic!(key, l),
    do: raise(ArgumentError, "#{inspect(key)} must be :and or :or, got: #{inspect(l)}")

  defp validate_case!(c) when c in [:insensitive, :sensitive], do: c

  defp validate_case!(c),
    do: raise(ArgumentError, ":case must be :insensitive or :sensitive, got: #{inspect(c)}")

  defp validate_on_blank!(b) when b in [:all, :none], do: b

  defp validate_on_blank!(b),
    do: raise(ArgumentError, ":on_blank must be :all or :none, got: #{inspect(b)}")
end