lib/planck/ai/models/llama_cpp.ex

defmodule Planck.AI.Models.LlamaCpp do
  @moduledoc """
  Factory for llama.cpp models via its OpenAI-compatible HTTP server.

  Unlike other catalog modules, this provides a factory function rather than
  a static list because the available model depends on what the user has loaded
  into their local llama.cpp server.

  ## Examples

      iex> Planck.AI.Models.LlamaCpp.model("llama3.2")
      %Planck.AI.Model{provider: :llama_cpp, base_url: "http://localhost:8080", ...}

      iex> Planck.AI.Models.LlamaCpp.model("mistral", base_url: "http://10.0.0.5:8080", context_window: 32_768)

  """

  @behaviour Planck.AI.ModelProvider

  require Logger

  alias Planck.AI.Model

  @default_base_url "http://localhost:8080"

  @spec all() :: [Model.t()]
  @spec all(keyword()) :: [Model.t()]
  @impl Planck.AI.ModelProvider
  def all(opts \\ []) do
    base_url = opts[:base_url] || @default_base_url
    req_opts = if api_key = opts[:api_key], do: [auth: {:bearer, api_key}], else: []

    case http_client().get("#{base_url}/models", req_opts) do
      {:ok, %{status: 200, body: %{"data" => models}}} ->
        Enum.map(models, &parse_model(&1, base_url, opts))

      {:ok, %{status: status}} ->
        Logger.warning("[Planck.AI] llama.cpp returned HTTP #{status} from #{base_url}")
        []

      {:error, reason} ->
        Logger.warning("[Planck.AI] llama.cpp unreachable at #{base_url}: #{inspect(reason)}")
        []
    end
  end

  @doc """
  Builds a `Planck.AI.Model` for a llama.cpp-hosted model.

  ## Options

  - `:base_url` — base URL of the llama.cpp server. Defaults to `#{@default_base_url}`.
  - `:context_window` — context window size. Defaults to `4096`.
  - `:max_tokens` — max tokens to generate. Defaults to `2048`.
  - `:supports_thinking` — whether the model supports thinking blocks. Defaults to `false`.
  - `:input_types` — list of supported input modalities. Defaults to `[:text]`.
  - `:default_opts` — inference parameters applied on every call unless overridden by the
    caller (e.g. `[temperature: 1.0, top_p: 0.95, top_k: 40, min_p: 0.01]`). Defaults to `[]`.
  """
  @spec model(String.t()) :: Model.t()
  @spec model(String.t(), keyword()) :: Model.t()
  def model(id, opts \\ []) do
    %Model{
      id: id,
      name: opts[:name] || id,
      provider: :llama_cpp,
      base_url: opts[:base_url] || @default_base_url,
      api_key: opts[:api_key],
      context_window: opts[:context_window] || 4_096,
      max_tokens: opts[:max_tokens] || 2_048,
      supports_thinking: opts[:supports_thinking] || false,
      input_types: opts[:input_types] || [:text],
      default_opts: opts[:default_opts] || []
    }
  end

  defp parse_model(%{"id" => id}, base_url, opts) do
    %Model{
      id: id,
      name: id,
      provider: :llama_cpp,
      base_url: base_url,
      api_key: opts[:api_key],
      context_window: opts[:context_window] || 4_096,
      max_tokens: opts[:max_tokens] || 2_048,
      supports_thinking: opts[:supports_thinking] || false,
      input_types: opts[:input_types] || [:text],
      default_opts: opts[:default_opts] || []
    }
  end

  defp http_client do
    Application.get_env(:planck_ai, :http_client, Planck.AI.ReqHTTPClient)
  end
end