Skip to main content

lib/snapcast.ex

defmodule Snapcast do
  @moduledoc """
  A pure-Elixir snapcast **server**: speak snapcast's binary protocol directly to
  snapclients, owning the audio clock and timestamping every chunk, so there is no
  external snapserver and no ffmpeg/snapserver pacing to fight.

  Public entry points + the stream format. The server currently uses PCM as the
  compatibility transport while encoded Snap transport (opus/flac) is developed
  behind an explicit per-`play/3` `:transport_codec` option.

  ## Configuration

  Settings are read from the `:snapcast` application environment, e.g.:

      config :snapcast,
        enabled: true,
        port: 1704,
        bind_ip: {0, 0, 0, 0},
        format: {48_000, 16, 2},
        listener: MyApp.SnapcastListener

  `format` is the default PCM output format `{sample_rate, bits_per_sample,
  channels}`. `play/3` can override it with a per-stream `:format`.

  ## Supervision

  Add `Snapcast.children()` to your supervision tree; it returns the server
  subtree when `enabled: true`, or `[]` otherwise.

  ## Lifecycle events

  Configure a `Snapcast.Listener` (via `:listener`) to receive client
  connect/disconnect and playback progress/ended notifications.
  """

  alias Snapcast.Server

  @doc "Default PCM stream format `{rate, bits, channels}`."
  def format do
    normalize_format(config(:format, {48_000, 16, 2})) || {48_000, 16, 2}
  end

  @doc """
  Clamp a format's sample rate to `max_sample_rate/0` by repeated halving.

  Snapcast carries PCM uncompressed (24-bit is inflated to 32-bit on the wire), so a
  very high sample rate can exceed what a client link sustains and cause dropouts.
  Halving (192k→96k, 176.4k→88.2k) is an exact 2:1 decimation rather than a fractional
  resample; bit depth and channels are left untouched.
  """
  def cap_format({rate, bits, channels}) when is_integer(rate),
    do: {cap_rate(rate), bits, channels}

  def cap_format(format), do: format

  defp cap_rate(rate) do
    max = max_sample_rate()
    if is_integer(max) and max > 0 and rate > max, do: cap_rate(div(rate, 2)), else: rate
  end

  @doc "Maximum Snapcast sample rate; higher-rate streams are halved down to it (see `cap_format/1`)."
  def max_sample_rate, do: config(:max_sample_rate, 96_000)

  @doc "Chunk duration in milliseconds."
  def chunk_ms, do: config(:chunk_ms, 20)

  @doc "End-to-end buffer (ms): clients play each chunk this long after its timestamp."
  def buffer_ms, do: config(:buffer_ms, 1000)

  @doc """
  End-to-end buffer (ms) scaled to a stream's wire data rate.

  Higher-bitrate streams (hi-res PCM) leave thinner network headroom, so they get a
  proportionally deeper buffer to ride out jitter — scaled linearly from `buffer_ms/0`
  against a 48kHz/16-bit/stereo reference (192_000 wire bytes/s) and capped at
  `max_buffer_ms/0`. A standard CD/48k stream is unchanged at `buffer_ms/0`.
  """
  def buffer_ms_for({rate, bits, channels})
      when is_integer(rate) and is_integer(bits) and is_integer(channels) do
    base = buffer_ms()
    bytes_per_sec = rate * wire_sample_bytes(bits) * channels

    div(base * bytes_per_sec, 192_000)
    |> max(base)
    |> min(max_buffer_ms())
  end

  def buffer_ms_for(_format), do: buffer_ms()

  @doc "Upper bound for the data-rate-scaled `buffer_ms_for/1`."
  def max_buffer_ms, do: config(:max_buffer_ms, 4_000)

  @doc """
  Per-endpoint minimum buffer (ms), keyed by client id (e.g.
  `%{"living-room" => 3_000}`).

  An endpoint on a flaky link (typically Wi-Fi) can be given a deeper buffer than the
  default so it rides out longer network stalls. Empty by default — endpoints use the
  format-scaled `buffer_ms_for/1`.
  """
  def client_buffer_ms, do: config(:client_buffer_ms, %{})

  @doc """
  Effective buffer (ms) for a stream serving `client_ids`: the **larger** of the format's
  data-rate-scaled buffer (`buffer_ms_for/1`) and the deepest per-endpoint floor
  (`client_buffer_ms/0`) among those clients.

  Taking the max is what keeps a synchronized group coherent: every client in the group
  plays each chunk at `chunk_ts + bufferMs`, so they must share one `bufferMs` to stay in
  sync. A good endpoint streams at 1s solo, but inherits a grouped slow endpoint's 3s so
  the whole group stays locked together and the slow link still gets the depth it needs.
  """
  def effective_buffer_ms(format, client_ids) when is_list(client_ids) do
    floors = client_buffer_ms() |> Map.take(client_ids) |> Map.values()
    max(buffer_ms_for(format), Enum.max([0 | floors]))
  end

  @doc """
  Per-client socket send-buffer size (bytes); default 1 MiB, `0` to leave the OS default.

  A transient link stall (typically Wi-Fi) fills the kernel send buffer and makes
  `gen_tcp.send` **block** — which freezes the session and starves that client's Time-sync
  replies (same process/mailbox), so the client resyncs its clock and **pops**, even with a
  deep audio buffer. A large send buffer lets the stall queue in the kernel and the write
  return immediately, keeping the session responsive. Applied as `sndbuf` plus the inet
  driver's `high_watermark`/`low_watermark` (send blocks only past the high watermark).
  """
  def send_buffer_bytes, do: config(:send_buffer_bytes, 1_048_576)

  # Snapcast carries 24-bit samples in a 32-bit word on the wire (see `Stream.wire_pcm/2`).
  defp wire_sample_bytes(24), do: 4
  defp wire_sample_bytes(bits) when bits in [16, 32], do: div(bits, 8)
  defp wire_sample_bytes(_bits), do: 2

  @doc "Default bitrate used when transcoding speech/long-form sources to Opus."
  def opus_bitrate, do: config(:opus_bitrate, "96k")

  @doc """
  Opus frame duration in milliseconds (sets the per-packet / WireChunk cadence).

  Must be a valid Opus frame size — one of 2.5, 5, 10, 20, 40, 60 ms (integer ms here, so
  10/20/40/60). Small frames keep WireChunks tiny and latency low; 20ms is the default.
  """
  def opus_frame_ms, do: config(:opus_frame_ms, 20)

  @doc "FLAC compression level used by ffmpeg for Snapcast FLAC transport."
  def flac_compression_level, do: config(:flac_compression_level, 5)

  @doc """
  Whether ffmpeg reads its input at realtime speed (`-re`).

  Default `true`: ffmpeg is paced to 1× so the stream buffer stays bounded — without it
  a long source floods the buffer (the `buffer <> data` accumulation goes O(n²) and can
  stall the stream process). The trade-off is that frames are produced just-in-time, so
  an encode/scheduling hiccup can momentarily starve the pacer (more likely at high
  sample rates). Set `false` to let ffmpeg race ahead, which keeps the pacer reliably
  fed at the cost of an unbounded server-side buffer — use only for short sources.
  """
  def realtime_input?, do: config(:realtime_input, true)

  @doc """
  FLAC frame size (samples per frame) for the FLAC transport.

  Each WireChunk carries one whole FLAC frame, so the frame size sets the chunk cadence
  and end-to-end latency. Keep it small (snapcast's own server uses 1152, ~26ms at 48k)
  so chunks stay PCM-sized — large frames make each blocking socket write hold up the
  session's Time-sync replies, which can trip a client's sync timeout and make it
  reconnect.
  """
  def flac_frame_size, do: config(:flac_frame_size, 1152)

  @doc "Normalize a transport codec name to `:pcm | :opus | :flac` (or `nil`)."
  def normalize_transport_codec(codec) when is_atom(codec) do
    codec
    |> Atom.to_string()
    |> normalize_transport_codec()
  end

  def normalize_transport_codec(codec) when is_binary(codec) do
    case codec |> String.trim() |> String.downcase() do
      "pcm" -> :pcm
      "opus" -> :opus
      "flac" -> :flac
      _other -> nil
    end
  end

  def normalize_transport_codec(_codec), do: nil

  @doc "Normalize a PCM format to `{sample_rate, bits_per_sample, channels}`."
  def normalize_format({rate, bits, channels}) do
    with rate when is_integer(rate) and rate > 0 <- integer(rate),
         bits when bits in [16, 24, 32] <- integer(bits),
         channels when is_integer(channels) and channels > 0 <- integer(channels) do
      {rate, bits, channels}
    else
      _invalid -> nil
    end
  end

  def normalize_format(format) when is_binary(format) do
    case String.split(format, ":", parts: 3) do
      [rate, bits, channels] ->
        normalize_format({rate, bits, channels})

      _invalid ->
        nil
    end
  end

  def normalize_format(_format), do: nil

  @doc "TCP port to listen on (snapclients default to 1704)."
  def port, do: config(:port, 1704)

  @doc "TCP address to bind the server to."
  def bind_ip, do: config(:bind_ip, {0, 0, 0, 0})

  @doc "Whether the server is started in the supervision tree."
  def enabled?, do: config(:enabled, false)

  @doc "The configured `Snapcast.Listener` module, if any."
  def listener, do: config(:listener, nil)

  @doc "Whether to supervise a local snapclient for this machine."
  def local_client_enabled?, do: config(:local_client, true) and is_binary(snapclient_path())

  @doc "Path to the local snapclient executable, if available."
  def snapclient_path do
    case config(:snapclient_path, nil) do
      path when is_binary(path) and path != "" -> path
      _missing -> System.find_executable("snapclient")
    end
  end

  @doc "Stable host id for the supervised local snapclient."
  def local_client_id, do: config(:local_client_id, "snapcast-local")

  @doc "URL used by the supervised local snapclient."
  def local_client_url, do: config(:local_client_url, "tcp://127.0.0.1:#{port()}")

  @doc "snapclient log filter used by the supervised local client."
  def local_client_logfilter, do: config(:local_client_logfilter, "*:info")

  @doc "Whether to advertise the server over mDNS/DNS-SD."
  def advertise?, do: config(:advertise, true)

  @doc "mDNS service name advertised for the server."
  def advertise_name, do: config(:advertise_name, "Snapcast")

  @doc "Path to the DNS-SD publisher executable, if available."
  def dns_sd_path do
    case config(:dns_sd_path, nil) do
      path when is_binary(path) and path != "" -> path
      _missing -> System.find_executable("dns-sd")
    end
  end

  @doc "Path to the ffmpeg executable used to decode sources to PCM."
  def ffmpeg_path do
    config(:ffmpeg_path, nil) || System.find_executable("ffmpeg") || "/usr/bin/ffmpeg"
  end

  @doc """
  Play a source to the given client ids.

  `source` is either a binary path/URL (decoded by ffmpeg) or a 0-arity function
  returning one, resolved lazily when the stream starts (e.g. for short-lived URLs).

  opts: `:position_ms`, `:endpoint` (opaque term echoed back in listener events),
  `:duration_ms`, `:transport_codec`, `:format`.
  """
  defdelegate play(source, client_ids, opts \\ []), to: Server
  defdelegate pause(), to: Server
  defdelegate resume(), to: Server
  defdelegate seek(position_ms, playback_gen \\ nil), to: Server
  @doc "Stop the current stream."
  defdelegate stop_playback(), to: Server
  defdelegate set_volume(client_id, volume), to: Server
  @doc "List connected clients."
  defdelegate clients(), to: Server

  @doc """
  The server's supervision subtree, gated by `enabled?/0`.

  Returns the `SessionSupervisor` + `Server` (plus the mDNS advertiser and a local
  snapclient when configured), or `[]` when not enabled.
  """
  def children do
    if enabled?() do
      children = [
        {DynamicSupervisor, name: Snapcast.SessionSupervisor, strategy: :one_for_one},
        {Server, port: port()}
      ]

      children =
        if advertise?() do
          children ++ [Snapcast.Advertiser]
        else
          children
        end

      if local_client_enabled?() do
        children ++ [Snapcast.LocalClient]
      else
        children
      end
    else
      []
    end
  end

  defp integer(value) when is_integer(value), do: value

  defp integer(value) when is_binary(value) do
    case Integer.parse(String.trim(value)) do
      {integer, ""} -> integer
      _invalid -> nil
    end
  end

  defp integer(_value), do: nil

  defp config(key, default), do: Application.get_env(:snapcast, key, default)
end