lib/bylaw/ecto/query/checks/cartesian_joins.ex

defmodule Bylaw.Ecto.Query.Checks.CartesianJoins do
  @moduledoc """
  Validates that queries do not use explicit cartesian joins.

  This check catches join shapes that are easy to introduce accidentally and
  expensive to run.

  ## Examples

  Bad:

      from(Post, as: :post)
      |> join(:inner, [post: p], c in Comment,
        as: :comment,
        on: true
      )

  Why this is bad:

  `on: true` creates every possible pair of posts and comments. That can
  multiply rows, inflate aggregates, and produce a query that is much more
  expensive than intended.

  Better:

      from(Post, as: :post)
      |> join(:inner, [post: p], c in Comment,
        as: :comment,
        on: c.post_id == p.id
      )

  Why this is better:

  The join predicate states the relationship between the two tables, so each
  joined row is tied back to its post.

  Bad:

      from(Plan, as: :plan)
      |> join(:cross, [plan: pl], f in Feature, as: :feature)
      |> where([feature: f], f.enabled == true)

  ## Notes

  This check catches obvious cartesian joins: `cross_join`, uncorrelated
  `cross_lateral_join`, and non-association joins whose `on` expression is
  literally `true`. It does not parse SQL fragments or prove general SQL
  cardinality.

  It rejects `cross_join`, uncorrelated `cross_lateral_join`, and
  non-association joins whose `on` expression is literally `true`. Correlated
  lateral joins are treated as constrained when a supported subquery predicate
  depends on both a local subquery binding and a previous parent binding, or
  when a lateral fragment source exposes a previous parent binding reference.

  ## Options

    * `:validate` - explicit `false` disables the check. Defaults to `true`.

  Like Bylaw's other Ecto query checks, this intentionally inspects the query
  structure produced by Ecto's query macros. It supports the tested join and
  lateral subquery shapes exposed by the Ecto query API. Association joins are
  not considered literal `on: true` joins because Ecto stores their association
  predicate separately from the `on` expression.

  This check is a guardrail for obvious cartesian joins, not a full SQL
  cardinality proof. It does not parse fragment SQL. For lateral fragments, an
  Ecto-visible reference to a previous binding is treated as dependency
  evidence; opaque SQL that needs stricter review should be handled in the
  application query or by disabling the check for that call site.

  ## Usage

  Add this module to the explicit check list passed through `Bylaw.Ecto.Query`.
  See `Bylaw.Ecto.Query` for the full `c:Ecto.Repo.prepare_query/3` setup.
  """

  @behaviour Bylaw.Ecto.Query.Check

  alias Bylaw.Ecto.Query.CheckOptions
  alias Bylaw.Ecto.Query.Introspection
  alias Bylaw.Ecto.Query.Issue

  @typedoc false
  @type reason :: :cross_join | :cross_lateral_join | :literal_true_on
  @typedoc false
  @type check_opts :: list({:validate, boolean()})
  @typedoc false
  @type opts :: check_opts()
  @comparison_ops [:==, :!=, :>, :>=, :<, :<=]
  @lateral_quals [:cross_lateral, :inner_lateral, :left_lateral]

  @doc """
  Implements the `Bylaw.Ecto.Query.Check` validation callback.
  """

  @impl Bylaw.Ecto.Query.Check
  @spec validate(Bylaw.Ecto.Query.Check.operation(), Bylaw.Ecto.Query.Check.query(), opts()) ::
          Bylaw.Ecto.Query.Check.result()
  def validate(operation, query, opts) when is_list(opts) do
    check_opts = CheckOptions.normalize!(opts, [:validate])

    if CheckOptions.enabled?(check_opts) do
      validate_enabled(operation, query)
    else
      :ok
    end
  end

  def validate(_operation, _query, opts) do
    raise ArgumentError, "expected opts to be a keyword list, got: #{inspect(opts)}"
  end

  defp validate_enabled(operation, query) do
    case issues(operation, query) do
      [] -> :ok
      issues -> {:error, issues}
    end
  end

  defp issues(operation, query) when is_map(query) do
    aliases = Introspection.aliases(query)

    query
    |> Map.get(:joins, [])
    |> Enum.with_index()
    |> Enum.flat_map(fn {join, join_index} ->
      binding_index = join_index + 1

      case cartesian_reason(join, aliases, binding_index) do
        nil -> []
        reason -> [issue(operation, join, join_index, reason)]
      end
    end)
  end

  defp issues(_operation, _query), do: []

  defp cartesian_reason(join, aliases, binding_index) do
    cond do
      correlated_lateral_join?(join, aliases, binding_index) -> nil
      match?(%{qual: :cross}, join) -> :cross_join
      match?(%{qual: :cross_lateral}, join) -> :cross_lateral_join
      association_join?(join) -> nil
      true -> literal_true_on_reason(join)
    end
  end

  defp association_join?(%{assoc: nil}), do: false
  defp association_join?(%{assoc: _assoc}), do: true
  defp association_join?(_join), do: false

  defp correlated_lateral_join?(
         %{qual: qual, source: %Ecto.SubQuery{query: query}},
         aliases,
         binding_index
       )
       when qual in @lateral_quals do
    correlated_lateral_query?(query, aliases, binding_index)
  end

  defp correlated_lateral_join?(
         %{qual: qual, source: {:fragment, _meta, _parts} = source},
         aliases,
         binding_index
       )
       when qual in @lateral_quals do
    previous_binding_reference?(source, aliases, binding_index)
  end

  defp correlated_lateral_join?(_join, _aliases, _binding_index), do: false

  defp correlated_lateral_query?(query, parent_aliases, parent_binding_index)
       when is_map(query) do
    local_aliases = Introspection.aliases(query)

    query
    |> predicate_expressions()
    |> Enum.any?(&correlated_predicate?(&1, parent_aliases, parent_binding_index, local_aliases))
  end

  defp correlated_lateral_query?(_query, _parent_aliases, _parent_binding_index), do: false

  defp predicate_expressions(query) do
    where_expressions(query) ++ join_on_expressions(query)
  end

  defp where_expressions(query) do
    query
    |> Map.get(:wheres, [])
    |> Enum.flat_map(fn
      %{expr: expr} -> [expr]
      _where -> []
    end)
  end

  defp join_on_expressions(query) do
    query
    |> Map.get(:joins, [])
    |> Enum.flat_map(fn
      %{on: %{expr: expr}} -> [expr]
      _join -> []
    end)
  end

  defp correlated_predicate?({:and, _meta, [left, right]}, parent_aliases, binding_index, aliases) do
    correlated_predicate?(left, parent_aliases, binding_index, aliases) or
      correlated_predicate?(right, parent_aliases, binding_index, aliases)
  end

  defp correlated_predicate?({:or, _meta, [left, right]}, parent_aliases, binding_index, aliases) do
    correlated_predicate?(left, parent_aliases, binding_index, aliases) and
      correlated_predicate?(right, parent_aliases, binding_index, aliases)
  end

  defp correlated_predicate?({op, _meta, [left, right]}, parent_aliases, binding_index, aliases)
       when op in @comparison_ops do
    correlated_comparison?(left, right, parent_aliases, binding_index, aliases)
  end

  defp correlated_predicate?(
         {:fragment, _meta, _parts} = expr,
         parent_aliases,
         binding_index,
         aliases
       ) do
    correlated_expression?(expr, parent_aliases, binding_index, aliases)
  end

  defp correlated_predicate?(_expr, _parent_aliases, _binding_index, _aliases), do: false

  defp correlated_comparison?(left, right, parent_aliases, binding_index, aliases) do
    correlated_expression?([left, right], parent_aliases, binding_index, aliases)
  end

  defp correlated_expression?(expr, parent_aliases, binding_index, aliases) do
    expr
    |> expression_references(parent_aliases, binding_index, aliases)
    |> correlated_references?()
  end

  defp correlated_references?(%{local?: true, parent?: true}), do: true
  defp correlated_references?(_references), do: false

  defp expression_references(expr, parent_aliases, binding_index, aliases) do
    cond do
      parent_field?(expr, parent_aliases, binding_index) ->
        %{local?: false, parent?: true}

      local_field?(expr, aliases) ->
        %{local?: true, parent?: false}

      is_tuple(expr) ->
        expr
        |> Tuple.to_list()
        |> expression_references(parent_aliases, binding_index, aliases)

      is_list(expr) ->
        Enum.reduce(expr, empty_references(), fn item, references ->
          merge_references(
            references,
            expression_references(item, parent_aliases, binding_index, aliases)
          )
        end)

      true ->
        empty_references()
    end
  end

  defp empty_references, do: %{local?: false, parent?: false}

  defp merge_references(left, right) do
    %{
      local?: left.local? or right.local?,
      parent?: left.parent? or right.parent?
    }
  end

  defp parent_field?(expr, aliases, binding_index) do
    match?({_parent_index, _field}, parent_field(expr, aliases, binding_index))
  end

  defp parent_field({{:., _meta, [source, field]}, _call_meta, []}, aliases, binding_index)
       when is_atom(field) do
    parent_field(source, field, aliases, binding_index)
  end

  defp parent_field({:field, _meta, [source, field]}, aliases, binding_index)
       when is_atom(field) do
    parent_field(source, field, aliases, binding_index)
  end

  defp parent_field(_expr, _aliases, _binding_index), do: :unknown

  defp parent_field({:parent_as, _meta, [name]}, field, aliases, binding_index)
       when is_atom(name) do
    case Map.get(aliases, name) do
      index when is_integer(index) and index < binding_index -> {index, field}
      _index -> :unknown
    end
  end

  defp parent_field(_source, _field, _aliases, _binding_index), do: :unknown

  defp local_field?(expr, aliases) do
    match?({:ok, {_local_index, _field}}, Introspection.field(expr, aliases))
  end

  defp previous_binding_reference?(expr, aliases, binding_index) do
    previous_binding?(expr, aliases, binding_index) or
      previous_binding_field?(expr, aliases, binding_index) or
      nested_previous_binding_reference?(expr, aliases, binding_index)
  end

  defp previous_binding?(expr, aliases, binding_index) do
    case Introspection.binding_index(expr, aliases) do
      {:ok, index} when index < binding_index -> true
      _binding -> false
    end
  end

  defp previous_binding_field?(expr, aliases, binding_index) do
    case Introspection.field(expr, aliases) do
      {:ok, {index, _field}} when index < binding_index -> true
      _field -> false
    end
  end

  defp nested_previous_binding_reference?(expr, aliases, binding_index) when is_tuple(expr) do
    expr
    |> Tuple.to_list()
    |> previous_binding_reference?(aliases, binding_index)
  end

  defp nested_previous_binding_reference?(expr, aliases, binding_index) when is_list(expr) do
    Enum.any?(expr, &previous_binding_reference?(&1, aliases, binding_index))
  end

  defp nested_previous_binding_reference?(_expr, _aliases, _binding_index), do: false

  defp literal_true_on_reason(%{on: %{expr: true}}), do: :literal_true_on
  defp literal_true_on_reason(_join), do: nil

  @spec issue(Bylaw.Ecto.Query.Check.operation(), term(), non_neg_integer(), reason()) ::
          Issue.t()
  defp issue(operation, join, join_index, reason) do
    %Issue{
      check: __MODULE__,
      message: message(join_index, reason),
      meta: %{
        operation: operation,
        join_index: join_index,
        binding_index: join_index + 1,
        join_qual: Map.get(join, :qual),
        reason: reason
      }
    }
  end

  defp message(join_index, :cross_join) do
    "expected join #{join_index} not to be cartesian; found cross_join"
  end

  defp message(join_index, :cross_lateral_join) do
    "expected join #{join_index} not to be cartesian; found cross_lateral_join"
  end

  defp message(join_index, :literal_true_on) do
    "expected join #{join_index} not to be cartesian; found a literal true on expression"
  end
end