lib/bylaw/ecto/query/checks/deterministic_order.ex

defmodule Bylaw.Ecto.Query.Checks.DeterministicOrder do
  @moduledoc """
  Validates that ordered queries include the root schema primary key.

  This is useful when callers page through ordered rows or use helpers such as
  `Repo.one/2` with `Ecto.Query.first/2` or `Ecto.Query.last/2`. Ordering by a
  non-unique field such as `:inserted_at` or `:name` leaves rows with the same
  value free to move between executions unless the query also orders by a
  deterministic tie-breaker.

  For now, this check only trusts the root Ecto schema primary key. Ecto schemas
  do not expose arbitrary database unique indexes, and this check should not ask
  callers to manually assert uniqueness that Bylaw cannot verify. If a query is
  intentionally ordered by another unique database key, use the explicit escape
  hatch until a DB-aware check can verify those constraints directly.

  ## Examples

  Bad:

      from(Post, as: :post)
      |> order_by([post: p], desc: p.inserted_at)
      |> limit(10)

  Why this is bad:

  `inserted_at` is not guaranteed to be unique. Rows with the same timestamp can
  move between executions, which can make paginated queries skip or duplicate
  rows.

  Better:

      from(Post, as: :post)
      |> order_by([post: p], desc: p.inserted_at)
      |> order_by([post: p], asc: p.id)
      |> limit(10)

  Why this is better:

  The root primary key resolves ties in the visible sort key, so every row has a
  stable relative position.

  Better for a composite primary key:

      from(Membership, as: :membership)
      |> order_by([membership: mem], asc: mem.inserted_at)
      |> order_by([membership: mem], asc: mem.organization_id)
      |> order_by([membership: mem], asc: mem.sequence)

  ## Notes

  This check only trusts the root Ecto schema primary key. It cannot verify
  arbitrary unique database indexes or schema-less query sources.

  The check is static. It infers root schema primary keys with Ecto schema
  reflection. Schema-less queries and schemas without primary keys cannot be
  proven deterministic by this check, so ordered queries in those cases return
  an issue unless validation is explicitly disabled.

  ## Options

    * `:validate` - explicit `false` disables the check. Defaults to `true`.

  ## Usage

  Add this module to the explicit check list passed through `Bylaw.Ecto.Query`.
  See `Bylaw.Ecto.Query` for the full `c:Ecto.Repo.prepare_query/3` setup.
  """

  @behaviour Bylaw.Ecto.Query.Check

  alias Bylaw.Ecto.Query.CheckOptions
  alias Bylaw.Ecto.Query.Introspection
  alias Bylaw.Ecto.Query.Issue

  @typedoc false
  @type field_set :: list(atom())
  @typedoc false
  @type check_opts :: list({:validate, boolean()})
  @typedoc false
  @type opts :: check_opts()

  @doc """
  Implements the `Bylaw.Ecto.Query.Check` validation callback.
  """

  @impl Bylaw.Ecto.Query.Check
  @spec validate(Bylaw.Ecto.Query.Check.operation(), Bylaw.Ecto.Query.Check.query(), opts()) ::
          Bylaw.Ecto.Query.Check.result()
  def validate(operation, query, opts) when is_list(opts) do
    check_opts = CheckOptions.normalize!(opts, [:validate])

    if CheckOptions.enabled?(check_opts) and ordered?(query) do
      validate_ordered_query(operation, query)
    else
      :ok
    end
  end

  def validate(_operation, _query, opts) do
    raise ArgumentError, "expected opts to be a keyword list, got: #{inspect(opts)}"
  end

  defp validate_ordered_query(operation, query) do
    fields = order_fields(query)
    primary_key = primary_key(query)

    if deterministic?(fields, primary_key) do
      :ok
    else
      {:error, [issue(operation, fields, primary_key)]}
    end
  end

  defp primary_key(query) do
    case Introspection.root_schema(query) do
      {:ok, schema} ->
        schema.__schema__(:primary_key)

      :unknown ->
        []
    end
  end

  defp ordered?(%{order_bys: order_bys}) when is_list(order_bys), do: not Enum.empty?(order_bys)
  defp ordered?(_query), do: false

  @spec order_fields(term()) :: field_set()
  defp order_fields(query) when is_map(query) do
    root_aliases = Introspection.root_aliases(query)

    query
    |> Map.get(:order_bys, [])
    |> Enum.flat_map(fn order_by ->
      order_by
      |> Map.get(:expr, [])
      |> fields_in_order_expr(root_aliases)
    end)
    |> Enum.uniq()
    |> Enum.sort()
  end

  defp order_fields(_query), do: []

  defp fields_in_order_expr(exprs, root_aliases) when is_list(exprs) do
    Enum.flat_map(exprs, fn
      {_direction, expr} -> Introspection.root_fields(expr, root_aliases)
      expr -> Introspection.root_fields(expr, root_aliases)
    end)
  end

  defp fields_in_order_expr(_expr, _root_aliases), do: []

  @spec deterministic?(field_set(), field_set()) :: boolean()
  defp deterministic?(_fields, []), do: false

  defp deterministic?(fields, primary_key) do
    Enum.all?(primary_key, &Enum.member?(fields, &1))
  end

  @spec issue(Bylaw.Ecto.Query.Check.operation(), field_set(), field_set()) :: Issue.t()
  defp issue(operation, fields, primary_key) do
    %Issue{
      check: __MODULE__,
      message: message(primary_key),
      meta: %{
        operation: operation,
        primary_key: primary_key,
        found_order_keys: fields
      }
    }
  end

  defp message([]) do
    "expected ordered query to include the root primary key, but no root primary key is known"
  end

  defp message(primary_key) do
    "expected ordered query to include the root primary key: #{format_keys(primary_key)}"
  end

  defp format_keys(keys), do: Enum.map_join(keys, ", ", &inspect/1)
end