examples/voice_design.exs

# examples/voice_design.exs
#
# Voice Design example — generate a novel voice from a natural-language description.
# No reference audio needed!
#
# Usage:
#   mix run examples/voice_design.exs
#   mix run examples/voice_design.exs --text "你好，欢迎来到语音合成演示"
#   mix run examples/voice_design.exs --control "A young woman, gentle and sweet voice"
#   mix run examples/voice_design.exs --device cpu

{opts, _args, _invalid} =
  OptionParser.parse(System.argv(),
    switches: [
      text: :string,
      control: :string,
      device: :string,
      output: :string,
      steps: :integer,
      cfg: :float
    ]
  )

control = opts[:control] || "A warm, professional female voice, calm and clear"
text = opts[:text] || "Hello, welcome to VoxCPM2 voice design! This voice was created purely from a text description."
device = opts[:device] || "cuda"
output = opts[:output] || "voice_design.wav"
steps = opts[:steps] || 15
cfg = opts[:cfg] || 2.0

# Voice Design: prepend control description in parentheses
full_text = "(#{control}) #{text}"

IO.puts("==> Voice Design")
IO.puts("==> Control: #{control}")
IO.puts("==> Text: #{text}")
IO.puts("==> Device: #{device}")

{:ok, pid} = VoxCPMEx.start_link(device: device)
:ok = VoxCPMEx.await_ready(pid, 120_000)
IO.puts("==> Model ready!")

{:ok, audio} = VoxCPMEx.generate(pid, full_text,
  inference_timesteps: steps,
  cfg_value: cfg
)

:ok = VoxCPMEx.save(audio, output)
IO.puts("==> Audio saved to #{output} (#{byte_size(audio)} bytes)")
IO.puts("==> Done! 🔊")

# Try different voice descriptions:
#
#   "A young woman, gentle and sweet voice"
#   "A deep male voice, authoritative and confident"
#   "An elderly person, wise and slow-paced"
#   "A cheerful child, energetic and bright"
#   "A calm narrator, suitable for audiobooks"
#   "A robot voice, mechanical and precise"
#   "A news anchor, professional and articulate"