defmodule AirPlay.V2.GroupPlayer do
@moduledoc """
Minimal AirPlay 2 multi-room group player.
This mirrors the working airplay2-rs group model:
* connect and setup every receiver independently
* use the first receiver as the primary PTP/BMCA clock source
* send SETPEERS to all receivers
* decode audio once
* share one RTP sequence/timestamp timeline across all receivers
* encrypt each audio packet with the receiver-specific stream key
"""
alias AirPlay.{Alac, Decoder, Rtp, Source}
alias AirPlay.V2.{Crypto, Pairing, Plist, PtpBmca, Rtsp2, Setup}
@sample_rate 44_100
@samples_per_packet 352
@feedback_interval_us 2_000_000
@ssrc 0
# Streaming decode (see `AirPlay.Decoder`): buffer a little audio before the
# first packet so receivers start cleanly, then pull frames on demand at ~1×
# real time instead of decoding the whole file up front.
@default_prebuffer_frames 125
@default_prebuffer_timeout_ms 5_000
@take_batch 32
@idle_poll_ms 5
@doc "Set volume on a running AP2 group player process."
@spec set_volume(pid(), number()) :: :ok
def set_volume(pid, volume) when is_pid(pid) and is_number(volume) do
send(pid, {__MODULE__, :set_volume, volume})
:ok
end
@doc "Ask a running AP2 group player process to stop."
@spec stop(pid()) :: :ok
def stop(pid) when is_pid(pid) do
send(pid, {__MODULE__, :stop})
:ok
end
@doc """
Play a local or HTTP audio source to an AirPlay 2 receiver group.
Receivers are maps with at least `:host` or `"host"`, and optional `:port` /
`"port"`.
"""
@spec play_file([map()], String.t(), keyword()) :: {:ok, map()} | {:error, term()}
def play_file(receivers, path, opts \\ []) when is_list(receivers) do
receivers = receivers |> Enum.map(&normalize_receiver/1) |> Enum.reject(&is_nil/1)
if length(receivers) < 2 do
{:error, :need_at_least_two_airplay2_receivers}
else
case prepare_targets(receivers, opts) do
{:ok, targets} -> play_prepared_targets(targets, path, opts)
{:error, _reason} = error -> error
end
end
end
defp play_prepared_targets([primary | _] = targets, path, opts) do
case PtpBmca.start_link(primary.host_ip, local_ip: local_ip_tuple(primary.local_ip)) do
{:ok, ptp} ->
result =
with :ok <- wait_for_ptp(Keyword.get(opts, :ptp_settle_ms, 500)),
{:ok, targets} <- send_setpeers(targets, peer_addresses(targets)),
{:ok, targets} <- flush_all(targets),
{:ok, targets} <- maybe_set_volume_all(targets, Keyword.get(opts, :volume)),
{:ok, decoder} <- start_decoder(path, opts) do
# Stream-decode and send concurrently (see `AirPlay.V2.Player`): start
# after a short prebuffer rather than waiting for a full-file decode.
# The group's shared timeline comes from the primary's clock id
# (captured at PtpBmca.start_link), so we do NOT additionally wait on
# the PTP offset measurement — it can take many seconds (or never
# complete) and isn't what keeps the speakers together here.
stats = send_audio(targets, ptp, decoder, opts)
Decoder.stop(decoder)
stats
end
cleanup(targets, ptp)
result
{:error, _reason} = error ->
cleanup(targets, nil)
error
end
end
defp normalize_receiver(%{host: host} = receiver) when is_binary(host) do
%{host: host, port: normalize_port(Map.get(receiver, :port))}
end
defp normalize_receiver(%{"host" => host} = receiver) when is_binary(host) do
%{host: host, port: normalize_port(Map.get(receiver, "port"))}
end
defp normalize_receiver(_receiver), do: nil
defp normalize_port(port) when is_integer(port) and port > 0, do: port
defp normalize_port(port) when is_binary(port) do
String.to_integer(port)
rescue
_error -> 7000
end
defp normalize_port(_port), do: 7000
defp prepare_targets(receivers, opts) do
Enum.reduce_while(receivers, {:ok, []}, fn receiver, {:ok, targets} ->
case prepare_target(receiver, opts) do
{:ok, target} ->
{:cont, {:ok, [target | targets]}}
{:error, reason} ->
cleanup(Enum.reverse(targets), nil)
{:halt, {:error, {:prepare_group_receiver, receiver.host, reason}}}
end
end)
|> case do
{:ok, targets} -> {:ok, Enum.reverse(targets)}
error -> error
end
end
defp prepare_target(%{host: host, port: port} = receiver, opts) do
with {:ok, host_ip} <- parse_host(host),
{:ok, pairing0} <- Pairing.transient(host, port: port),
{:ok, local_ip} <- local_ip(pairing0),
{:ok, session_body, pairing1} <- setup_session(pairing0, local_ip),
{:ok, event_sock} <- connect_event(host, session_body),
{:ok, control_sock, control_port} <- open_control_socket(),
{:ok, stream_body, pairing2} <- setup_stream(pairing1, control_port, opts),
{:ok, pairing3} <- record(pairing2),
{:ok, data_port, remote_control_port} <- stream_ports(stream_body),
{:ok, audio_sock} <- :gen_udp.open(0, [:binary, active: false]) do
{:ok,
%{
host: host,
port: port,
receiver: receiver,
host_ip: host_ip,
local_ip: local_ip,
session_body: session_body,
stream_body: stream_body,
pairing: pairing3,
event_sock: event_sock,
control_sock: control_sock,
audio_sock: audio_sock,
data_port: data_port,
remote_control_port: remote_control_port
}}
end
end
defp setup_session(pairing, local_ip) do
case Setup.session(pairing, local_addresses: [local_ip], timing_port: 319) do
{:ok, 200, _headers, body, pairing} -> {:ok, body, pairing}
{:ok, status, _headers, body, _pairing} -> {:error, {:setup_session, status, body}}
error -> error
end
end
defp setup_stream(pairing, control_port, opts) do
case Setup.stream(pairing, control_port,
latency_min: Keyword.get(opts, :latency_min, 22_050),
latency_max: Keyword.get(opts, :latency_max, 88_200)
) do
{:ok, 200, _headers, body, pairing} -> {:ok, body, pairing}
{:ok, status, _headers, body, _pairing} -> {:error, {:setup_stream, status, body}}
error -> error
end
end
defp record(pairing) do
case Setup.record(pairing, 0, 0) do
{:ok, status, _headers, _body, pairing} when status in 200..299 -> {:ok, pairing}
{:ok, status, _headers, body, _pairing} -> {:error, {:record, status, body}}
error -> error
end
end
defp send_setpeers(targets, addresses) do
map_targets(targets, fn target ->
case Setup.set_peers(target.pairing, addresses) do
{:ok, status, _headers, _body, pairing} when status in 200..299 ->
{:ok, %{target | pairing: pairing}}
{:ok, status, _headers, body, _pairing} ->
{:error, {:set_peers, target.host, status, body}}
error ->
error
end
end)
end
defp flush_all(targets) do
map_targets(targets, fn target ->
case Setup.flush(target.pairing, 0, 0) do
{:ok, status, _headers, _body, pairing} when status in 200..299 ->
{:ok, %{target | pairing: pairing}}
{:ok, status, _headers, body, _pairing} ->
{:error, {:flush, target.host, status, body}}
error ->
error
end
end)
end
defp maybe_set_volume_all(targets, nil), do: {:ok, targets}
defp maybe_set_volume_all(targets, volume) do
map_targets(targets, fn target -> set_target_volume(target, volume) end)
end
defp set_target_volume(target, volume) do
case Setup.set_volume(target.pairing, volume) do
{:ok, status, _headers, _body, pairing} when status in 200..299 ->
{:ok, %{target | pairing: pairing}}
{:ok, status, _headers, body, _pairing} ->
{:error, {:set_volume, target.host, status, body}}
error ->
error
end
end
defp map_targets(targets, fun) do
Enum.reduce_while(targets, {:ok, []}, fn target, {:ok, acc} ->
case fun.(target) do
{:ok, target} -> {:cont, {:ok, [target | acc]}}
{:error, reason} -> {:halt, {:error, reason}}
end
end)
|> case do
{:ok, targets} -> {:ok, Enum.reverse(targets)}
error -> error
end
end
defp wait_for_ptp(milliseconds) when is_integer(milliseconds) and milliseconds > 0 do
Process.sleep(milliseconds)
:ok
end
defp wait_for_ptp(_milliseconds), do: :ok
defp peer_addresses(targets) do
targets
|> Enum.flat_map(fn target -> [target.host, target.local_ip] end)
|> Enum.reject(&is_nil/1)
|> Enum.uniq()
end
defp connect_event(host, session_body) do
event_port = session_body |> Plist.decode!() |> Map.get("eventPort")
if is_integer(event_port) and event_port > 0 do
case :gen_tcp.connect(String.to_charlist(host), event_port, [:binary, active: false], 2_000) do
{:ok, sock} -> {:ok, sock}
{:error, _reason} -> {:ok, nil}
end
else
{:ok, nil}
end
end
defp close_event(nil), do: :ok
defp close_event(sock), do: :gen_tcp.close(sock)
defp open_control_socket do
with {:ok, sock} <- :gen_udp.open(0, [:binary, active: false]),
{:ok, port} <- :inet.port(sock) do
{:ok, sock, port}
end
end
defp send_audio(targets, ptp, decoder, opts) do
render_delay_ns = Keyword.get(opts, :render_delay_ms, 200) * 1_000_000
clock_id = ptp.clock_id || session_clock_id(hd(targets).session_body) || <<0::64>>
frame_duration_us = div(@samples_per_packet * 1_000_000, @sample_rate)
started_us = System.monotonic_time(:microsecond)
context = %{
clock_id: clock_id,
frame_duration_us: frame_duration_us,
ptp: ptp,
render_delay_ns: render_delay_ns,
started_us: started_us
}
{_seq, _rtp, sync_seq, _last_sync, targets, _last_feedback_us, stopped?, packets} =
Enum.reduce_while(
Stream.with_index(decoder_frame_stream(decoder)),
{0, 0, 0, nil, targets, started_us, false, 0},
fn frame, state ->
send_group_audio_frame(frame, state, context)
end
)
{:ok,
%{
targets: length(targets),
packets: packets,
sync_packets: sync_seq,
stopped?: stopped?,
ptp: PtpBmca.offset(ptp)
}}
end
defp send_group_audio_frame(
{frame, index},
{seq, rtp, sync_seq, last_sync, targets, last_feedback_us, _stopped?, count},
context
) do
case receive_controls(targets) do
{:stop, targets} ->
{:halt, {seq, rtp, sync_seq, last_sync, targets, last_feedback_us, true, count}}
{:cont, targets} ->
send_active_group_audio_frame(
frame,
index,
{seq, rtp, sync_seq, last_sync, targets, last_feedback_us, count},
context
)
end
end
defp send_active_group_audio_frame(
frame,
index,
{seq, rtp, sync_seq, last_sync, targets, last_feedback_us, count},
context
) do
first? = index == 0
{sync_seq, last_sync} =
maybe_send_group_sync(first?, rtp, sync_seq, last_sync, targets, context)
send_group_audio_packets(frame, first?, seq, rtp, targets)
{targets, last_feedback_us} = maybe_send_group_feedback(targets, last_feedback_us)
pace_audio_frame(index, context)
{:cont,
{rem(seq + 1, 65_536), rem(rtp + @samples_per_packet, 4_294_967_296), sync_seq, last_sync,
targets, last_feedback_us, false, count + 1}}
end
defp maybe_send_group_feedback(targets, last_feedback_us) do
now_us = System.monotonic_time(:microsecond)
if now_us - last_feedback_us >= @feedback_interval_us do
targets =
Enum.map(targets, fn target ->
case Setup.feedback(target.pairing) do
{:ok, status, _headers, _body, pairing} when status in 200..299 ->
%{target | pairing: pairing}
_error ->
target
end
end)
{targets, now_us}
else
{targets, last_feedback_us}
end
end
defp maybe_send_group_sync(first?, rtp, sync_seq, last_sync, targets, context) do
if first? or is_nil(last_sync) or rtp - last_sync >= @sample_rate do
ptp_time = PtpBmca.receiver_time_ns(context.ptp) + context.render_delay_ns
next_rtp = rtp + @samples_per_packet
sync = Rtp.ptp_sync(sync_seq, rtp, ptp_time, next_rtp, context.clock_id, sync_seq == 0)
Enum.each(targets, fn target ->
:ok = :gen_udp.send(target.control_sock, target.host_ip, target.remote_control_port, sync)
end)
{rem(sync_seq + 1, 65_536), rtp}
else
{sync_seq, last_sync}
end
end
defp send_group_audio_packets(frame, first?, seq, rtp, targets) do
payload = Alac.encode_stereo16(frame)
payload_type = if first?, do: 0xE0, else: 0x60
header = <<0x80, payload_type, seq::16, rtp::32, @ssrc::32>>
Enum.each(targets, fn target ->
packet = header <> Crypto.audio_encrypt(target.pairing.audio_key, rtp, @ssrc, seq, payload)
:ok = :gen_udp.send(target.audio_sock, target.host_ip, target.data_port, packet)
end)
end
defp pace_audio_frame(index, context) do
target_us = context.started_us + (index + 1) * context.frame_duration_us
sleep_us = target_us - System.monotonic_time(:microsecond)
if sleep_us > 1_000, do: Process.sleep(div(sleep_us, 1_000))
end
defp receive_controls(targets) do
receive do
{__MODULE__, :set_volume, volume} ->
targets =
Enum.map(targets, fn target ->
case set_target_volume(target, volume) do
{:ok, target} -> target
_error -> target
end
end)
receive_controls(targets)
{__MODULE__, :stop} ->
{:stop, targets}
after
0 -> {:cont, targets}
end
end
defp stream_ports(stream_body) do
case Plist.decode!(stream_body) do
%{"streams" => [%{"dataPort" => data_port, "controlPort" => control_port} | _]}
when is_integer(data_port) and is_integer(control_port) ->
{:ok, data_port, control_port}
decoded ->
{:error, {:missing_stream_ports, decoded}}
end
end
defp session_clock_id(session_body) do
case Plist.decode!(session_body) do
%{"timingPeerInfo" => %{"ClockID" => clock_id}} when is_integer(clock_id) ->
<<clock_id::64>>
_ ->
nil
end
end
# Start a streaming ffmpeg decoder and wait for a small prebuffer, so the first
# RTP packets can go out almost immediately. `Source.stream_args/2` adds `-re`
# (read input at native rate), bounding the decoded PCM buffer for long files.
defp start_decoder(path, opts) do
args = Source.stream_args(path, opts)
case Decoder.start_link(args: args, ffmpeg: Keyword.get(opts, :ffmpeg)) do
{:ok, decoder} ->
ready = Keyword.get(opts, :prebuffer_frames, @default_prebuffer_frames)
timeout = Keyword.get(opts, :prebuffer_timeout_ms, @default_prebuffer_timeout_ms)
case Decoder.await_ready(decoder, ready, timeout) do
{:ok, _frames} ->
{:ok, decoder}
{:error, reason} ->
Decoder.stop(decoder)
{:error, {:decoder_not_ready, reason}}
end
error ->
error
end
end
# Lazily pull decoded frames so `send_audio` can pace them out one at a time.
# `{[], false}` means the decoder is momentarily behind (not yet at end of
# stream), so idle briefly and retry rather than spin.
defp decoder_frame_stream(decoder) do
Stream.resource(
fn -> :ok end,
fn :ok ->
case Decoder.take(decoder, @take_batch) do
{[], true} ->
{:halt, :ok}
{[], false} ->
Process.sleep(@idle_poll_ms)
{[], :ok}
{frames, _eos?} ->
{frames, :ok}
end
end,
fn :ok -> :ok end
)
end
defp cleanup(targets, ptp) do
if ptp, do: PtpBmca.stop(ptp)
Enum.each(targets, &cleanup_target/1)
:ok
end
defp cleanup_target(target) do
close_event(Map.get(target, :event_sock))
close_udp(Map.get(target, :audio_sock))
close_udp(Map.get(target, :control_sock))
case target do
%{pairing: %{rtsp: rtsp}} -> Rtsp2.close(rtsp)
_target -> :ok
end
catch
_kind, _reason -> :ok
end
defp close_udp(nil), do: :ok
defp close_udp(sock), do: :gen_udp.close(sock)
defp local_ip(pairing) do
case :inet.sockname(pairing.rtsp.sock) do
{:ok, {{_, _, _, _} = ip, _port}} -> {:ok, ip_to_string(ip)}
{:ok, {ip, _port}} -> {:ok, ip_to_string(ip)}
error -> error
end
end
defp local_ip_tuple(ip) when is_binary(ip) do
{:ok, tuple} = ip |> String.to_charlist() |> :inet.parse_address()
tuple
end
defp ip_to_string(ip), do: ip |> Tuple.to_list() |> Enum.join(".")
defp parse_host({_, _, _, _} = ip), do: {:ok, ip}
defp parse_host(host) when is_binary(host),
do: host |> String.to_charlist() |> :inet.parse_address()
end