Skip to main content

lib/break_glass/rate_limiter.ex

defmodule BreakGlass.RateLimiter do
  @moduledoc """
  GenServer that owns the `:break_glass_rate_limit` ETS table and serialises
  all write operations to prevent TOCTOU races.

  Tracks failed authentication attempts per IP address. When the attempt count
  reaches `:max_attempts`, the IP is locked out for `:lockout_seconds`.

  ## Configuration

  - `:max_attempts` — number of failures before lockout (default: `5`)
  - `:lockout_seconds` — lockout window duration in seconds (default: `900`)

  ## ETS Table

  Table name: `:break_glass_rate_limit`

  Record format: `{ip :: String.t(), attempt_count :: non_neg_integer(), locked_at :: integer() | nil}`

  - `locked_at` is `nil` until the attempt count reaches `:max_attempts`.
  - Once set, `locked_at` is never reset by subsequent failures — the lockout clock does not slide.
  - `reset_attempts/1` deletes the row entirely, removing both the count and lockout.

  Hot-path reads (`locked_out?/1`, `remaining_attempts/1`) query ETS directly
  without a GenServer round-trip.
  """

  use GenServer

  require Logger

  @table :break_glass_rate_limit
  @default_max_attempts 5
  @default_lockout_seconds 900
  # Sweep every 60 seconds
  @sweep_interval_ms 60_000

  # ---------------------------------------------------------------------------
  # Public API
  # ---------------------------------------------------------------------------

  @doc """
  Starts the RateLimiter GenServer and registers it under `#{__MODULE__}`.
  """
  @spec start_link(keyword()) :: GenServer.on_start()
  def start_link(opts \\ []) do
    GenServer.start_link(__MODULE__, opts, name: __MODULE__)
  end

  @doc """
  Records a failed authentication attempt for `ip`.

  If the resulting attempt count reaches `:max_attempts`, a lockout timestamp
  is stored and a `Logger.warning` is emitted with the IP and count.

  Returns `:ok`.
  """
  @spec record_failed_attempt(ip :: String.t()) :: :ok
  def record_failed_attempt(ip) do
    GenServer.call(__MODULE__, {:record_failed_attempt, ip})
  end

  @doc """
  Resets the failed attempt counter and any lockout for `ip` by deleting the
  ETS row entirely.

  Returns `:ok`.
  """
  @spec reset_attempts(ip :: String.t()) :: :ok
  def reset_attempts(ip) do
    GenServer.call(__MODULE__, {:reset_attempts, ip})
  end

  @doc """
  Returns `true` if `ip` is currently locked out (lockout window still active),
  `false` otherwise.

  Reads ETS directly — no GenServer round-trip.
  """
  @spec locked_out?(ip :: String.t()) :: boolean()
  def locked_out?(ip) do
    lockout_seconds = fetch_lockout_seconds()
    now = System.system_time(:second)

    case :ets.lookup(@table, ip) do
      [{^ip, _count, locked_at}] when is_integer(locked_at) ->
        now < locked_at + lockout_seconds

      _ ->
        false
    end
  rescue
    _ -> false
  end

  @doc """
  Returns the number of remaining authentication attempts for `ip`.

  Reads ETS directly — no GenServer round-trip.

  Returns the configured `:max_attempts` (default `#{@default_max_attempts}`) when no
  failures have been recorded for `ip`.
  """
  @spec remaining_attempts(ip :: String.t()) :: non_neg_integer()
  def remaining_attempts(ip) do
    max = fetch_max_attempts()

    case :ets.lookup(@table, ip) do
      [{^ip, count, _locked_at}] ->
        max(max - count, 0)

      _ ->
        max
    end
  rescue
    _ -> @default_max_attempts
  end

  # ---------------------------------------------------------------------------
  # GenServer callbacks
  # ---------------------------------------------------------------------------

  @impl true
  def init(_opts) do
    :ets.new(@table, [:set, :protected, :named_table, {:read_concurrency, true}])
    schedule_sweep()
    {:ok, %{}}
  end

  @impl true
  def handle_call({:record_failed_attempt, ip}, _from, state) do
    max_attempts = fetch_max_attempts()

    new_count =
      case :ets.lookup(@table, ip) do
        [{^ip, count, _locked_at}] -> count + 1
        [] -> 1
      end

    if new_count >= max_attempts do
      locked_at = System.system_time(:second)
      :ets.insert(@table, {ip, new_count, locked_at})

      Logger.warning("[BreakGlass] IP #{ip} locked out after #{new_count} failed attempts")
    else
      :ets.insert(@table, {ip, new_count, nil})
    end

    {:reply, :ok, state}
  end

  @impl true
  def handle_call({:reset_attempts, ip}, _from, state) do
    :ets.delete(@table, ip)
    {:reply, :ok, state}
  end

  @impl true
  def handle_info(:sweep, state) do
    lockout_seconds = fetch_lockout_seconds()
    now = System.system_time(:second)

    expired =
      :ets.select(@table, [
        {{:"$1", :"$2", :"$3"},
         [
           {:is_integer, :"$3"},
           {:<, {:+, :"$3", lockout_seconds}, now}
         ], [:"$1"]}
      ])

    Enum.each(expired, fn ip -> :ets.delete(@table, ip) end)

    schedule_sweep()
    {:noreply, state}
  end

  # ---------------------------------------------------------------------------
  # Private helpers
  # ---------------------------------------------------------------------------

  defp fetch_max_attempts do
    Application.get_env(:break_glass_ex, :max_attempts, @default_max_attempts)
  end

  defp fetch_lockout_seconds do
    Application.get_env(:break_glass_ex, :lockout_seconds, @default_lockout_seconds)
  end

  defp schedule_sweep do
    Process.send_after(self(), :sweep, @sweep_interval_ms)
  end
end