# examples/voice_cloning.exs
#
# Voice Cloning example — clone a voice from reference audio.
#
# Usage:
# mix run examples/voice_cloning.exs --reference path/to/voice.wav
# mix run examples/voice_cloning.exs --reference speaker.wav --text "你好世界"
# mix run examples/voice_cloning.exs --reference speaker.wav --text "Bonjour!" --device mps
#
# For ultimate cloning (highest fidelity), also provide the transcript:
# mix run examples/voice_cloning.exs --reference speaker.wav --prompt-text "transcript here"
{opts, _args, _invalid} =
OptionParser.parse(System.argv(),
switches: [
reference: :string,
text: :string,
prompt_text: :string,
device: :string,
output: :string,
steps: :integer,
cfg: :float,
denoise: :boolean
]
)
reference = opts[:reference]
unless reference do
IO.puts(:stderr, "Error: --reference path/to/voice.wav is required")
System.halt(1)
end
unless File.exists?(reference) do
IO.puts(:stderr, "Error: reference audio not found: #{reference}")
System.halt(1)
end
text = opts[:text] || "This is a cloned voice generated by VoxCPM2 through Elixir."
device = opts[:device] || "cuda"
output = opts[:output] || "voice_clone.wav"
steps = opts[:steps] || 10
cfg = opts[:cfg] || 2.0
denoise = opts[:denoise] || false
prompt_text = opts[:prompt_text]
IO.puts("==> Voice Cloning")
IO.puts("==> Reference: #{reference}")
IO.puts("==> Text: #{text}")
IO.puts("==> Device: #{device}")
# Need denoiser loaded for --denoise
{:ok, pid} = VoxCPMEx.start_link(device: device, load_denoiser: denoise)
:ok = VoxCPMEx.await_ready(pid, 120_000)
IO.puts("==> Model ready!")
# Build generation options
gen_opts = [
audio_prompt: reference,
inference_timesteps: steps,
cfg_value: cfg,
denoise: denoise
]
# Ultimate cloning: add prompt_wav + prompt_text
gen_opts =
if prompt_text do
IO.puts("==> Ultimate cloning mode (with transcript)")
[{:prompt_wav_path, reference}, {:prompt_text, prompt_text} | gen_opts]
else
gen_opts
end
{:ok, audio} = VoxCPMEx.generate(pid, text, gen_opts)
:ok = VoxCPMEx.save(audio, output)
IO.puts("==> Audio saved to #{output} (#{byte_size(audio)} bytes)")
IO.puts("==> Done! 🎙️")