Skip to main content

lib/firebreak/blast_radius.ex

defmodule Firebreak.BlastRadius do
  @moduledoc """
  Tier-2 analysis — the part no existing tool does.

  The supervision tree tells you what a supervisor *contains*. It says nothing
  about who outside that subtree *depends* on the processes inside it. This pass
  takes the resolved coupling graph and, for each supervisor, finds the edges
  that cross *into* its subtree from the outside.

  Those external callers are exactly the processes that will observe `:noproc`
  or `:timeout` when the supervisor restarts a child — failures the tree alone
  would tell you are "contained".
  """

  alias Firebreak.{Edge, Finding}

  # A target called by at least this many distinct modules is treated as shared
  # infrastructure (PubSub, a Registry, a universally-used server). Coupling into
  # it is real but inherent — you can't restructure the tree to avoid it — so a
  # crossing that lands *only* on such targets is capped to :info to keep the
  # genuinely actionable couplings from drowning.
  @shared_infra_indegree 5

  @type ranking :: %{
          supervisor: module(),
          members: MapSet.t(),
          edges: [Edge.t()],
          external_callers: [module()]
        }

  @spec analyze(Firebreak.Forest.t(), [Edge.t()]) :: {[ranking()], [Finding.t()]}
  def analyze(forest, edges) do
    indegree = in_degree(edges)

    rankings =
      edges
      |> assign_to_tightest(forest)
      |> Enum.map(fn {sup, sup_edges} -> ranking(sup, forest, sup_edges) end)
      |> Enum.reject(&(&1.external_callers == []))
      |> Enum.sort_by(&length(&1.external_callers), :desc)

    {rankings, Enum.flat_map(rankings, &finding_for(&1, indegree, forest))}
  end

  # distinct in-bound source modules per target module across the whole graph.
  defp in_degree(edges) do
    edges
    |> Enum.reject(&is_nil(&1.to))
    |> Enum.group_by(& &1.to, & &1.from)
    |> Map.new(fn {to, froms} -> {to, froms |> Enum.uniq() |> length()} end)
  end

  # Each crossing edge is attributed to the *tightest* supervisor whose subtree
  # contains the target but not the source. A deeply-nested target therefore
  # reports once — at its enclosing supervisor — not again at every ancestor up
  # to the root (where "crosses in" is degenerate: the root contains everything).
  defp assign_to_tightest(edges, forest) do
    edges
    |> Enum.flat_map(fn e ->
      case tightest_crossed(e, forest) do
        nil -> []
        sup -> [{sup, e}]
      end
    end)
    |> Enum.group_by(&elem(&1, 0), &elem(&1, 1))
  end

  defp tightest_crossed(%Edge{to: nil}, _forest), do: nil

  defp tightest_crossed(%Edge{to: to, from: from}, forest) do
    forest.supervisors
    |> Enum.filter(fn sup ->
      members = members(forest, sup)
      MapSet.member?(members, to) and not MapSet.member?(members, from)
    end)
    |> case do
      [] -> nil
      crossed -> Enum.min_by(crossed, &MapSet.size(members(forest, &1)))
    end
  end

  defp members(forest, sup), do: Map.get(forest.subtree, sup, MapSet.new([sup]))

  defp ranking(sup, forest, sup_edges) do
    %{
      supervisor: sup,
      members: members(forest, sup),
      edges: sup_edges,
      external_callers: sup_edges |> Enum.map(& &1.from) |> Enum.uniq()
    }
  end

  defp finding_for(r, indegree, forest) do
    n = length(r.external_callers)
    sync? = Enum.any?(r.edges, &(&1.sync? and &1.kind == :call))

    targets = r.edges |> Enum.map(& &1.to) |> Enum.uniq()

    {infra_targets, specific_targets} =
      Enum.split_with(targets, &(Map.get(indegree, &1, 0) >= @shared_infra_indegree))

    # Crossings that land only on shared infrastructure are inherent, not design
    # smells — cap them so they don't bury the specific, fixable couplings.
    shared_only? = specific_targets == [] and infra_targets != []

    # A crossing onto a `:temporary` child is the sharpest case: once that child
    # exits, its supervisor never restarts it, so the dependency the caller holds
    # is a *permanent* :noproc, not a transient restart window.
    temp_targets = Enum.filter(specific_targets, &(restart_of(forest, &1) == :temporary))

    # Only a *synchronous* crossing (a `GenServer.call`/`whereis`) makes the caller
    # block and observe `:timeout`/`:noproc` on a restart — that's the actionable
    # risk. An async-only crossing (`cast`/`send`/`pubsub`/`ets`) is real coupling
    # but the caller never blocks, so it's capped low to keep the signal clean.
    severity =
      cond do
        shared_only? -> :info
        sync? and (n >= 4 or temp_targets != []) -> :high
        sync? and n >= 2 -> :medium
        sync? -> :low
        temp_targets != [] or n >= 2 -> :low
        true -> :info
      end

    callers = Enum.map_join(r.external_callers, ", ", &inspect/1)

    [
      %Finding{
        check: :cross_tree_coupling,
        severity: severity,
        module: r.supervisor,
        line: nil,
        message:
          message(r, n, callers, sync?, shared_only?, infra_targets) <> temp_note(temp_targets),
        details: %{
          external_callers: r.external_callers,
          targets: targets,
          synchronous: sync?,
          edge_count: length(r.edges),
          shared_infra_targets: infra_targets,
          temporary_targets: temp_targets
        }
      }
    ]
  end

  defp restart_of(forest, mod) do
    case Map.get(forest.child_specs, mod) do
      %{restart: r} -> r
      _ -> nil
    end
  end

  defp temp_note([]), do: ""

  defp temp_note(temp_targets) do
    " " <>
      "#{Enum.map_join(temp_targets, ", ", &inspect/1)} " <>
      "#{if length(temp_targets) == 1, do: "is", else: "are"} :temporary; once down, the " <>
      "supervisor never restarts #{if length(temp_targets) == 1, do: "it", else: "them"}, so the " <>
      "caller's dependency becomes a permanent :noproc, not a transient window."
  end

  defp message(r, n, _callers, _sync?, true, infra_targets) do
    "#{n} module(s) outside #{inspect(r.supervisor)}'s subtree depend on it, but only via shared " <>
      "infrastructure (#{Enum.map_join(infra_targets, ", ", &inspect/1)}) that much of the app " <>
      "uses; this coupling is inherent and not fixable by restructuring the tree."
  end

  defp message(r, n, callers, true, false, _infra) do
    "#{n} module(s) outside #{inspect(r.supervisor)}'s subtree synchronously depend on processes " <>
      "inside it (#{callers}); restarting it can surface :timeout/:noproc in those callers - " <>
      "coupling the supervision tree does not show."
  end

  defp message(r, n, callers, false, false, _infra) do
    "#{n} module(s) outside #{inspect(r.supervisor)}'s subtree depend on processes inside it " <>
      "(#{callers}), but only asynchronously (cast/send/pubsub); a restart drops in-flight messages " <>
      "rather than blocking the callers - lower-risk coupling the supervision tree still doesn't show."
  end
end