lib/llm.ex

Select File
defmodule LLM do
  @moduledoc """
  Lightweight Elixir client for LLM APIs.

  Supports 4 adapters covering 99% of providers:
  - **OpenAI Chat Completions** — `/v1/chat/completions` (also works with Ollama, Groq, OpenRouter, DeepSeek, xAI, etc.)
  - **OpenAI Responses** — `/v1/responses`
  - **Anthropic Messages** — `/v1/messages`
  - **Gemini** — `/v1beta/models/{model}:generateContent`

  ## Quick Start

      # Simple text generation
      {:ok, response} = LLM.generate("What is Elixir?",
        provider: :openai,
        model: "gpt-4"
      )
      response.message.content  #=> "Elixir is a..."

      # Streaming
      {:ok, response} = LLM.stream("Tell me a story",
        provider: :anthropic,
        model: "claude-sonnet-4-5-20250514",
        on_chunk: &IO.write/1
      )

      # Using provider modules
      {:ok, response} = LLM.generate("Hello",
        provider: LLM.Provider.OpenAI,
        model: "gpt-4"
      )

      # With explicit API key
      {:ok, response} = LLM.generate("Hello",
        provider: {LLM.Provider.OpenAI, api_key: "sk-..."},
        model: "gpt-4"
      )

      # Custom provider (runtime)
      {:ok, response} = LLM.generate("Hello",
        provider: %{
          adapter: LLM.Adapter.Anthropic,
          base_url: "https://my-proxy.com",
          api_key: "sk-ant-..."
        },
        model: "claude-sonnet-4-5-20250514"
      )

      # With tools
      {:ok, response} = LLM.generate("Read mix.exs",
        provider: :openai,
        model: "gpt-4",
        tools: [MyApp.ReadFileTool]
      )

  ## Configuration

      # config/config.exs
      config :llm, :providers,
        openai: [api_key: "sk-..."],
        anthropic: [api_key: "sk-ant-..."]

      # Or at runtime
      LLM.put_key(:openai, "sk-...")

  ## Provider

  A provider can be:
  - An atom preset (`:openai`, `:anthropic`, `:gemini`, `:openrouter`, `:openai_responses`)
  - A provider module (`LLM.Provider.OpenAI`, `LLM.Provider.Anthropic`, etc.)
  - A tuple `{module, opts}` for runtime API key (`{LLM.Provider.OpenAI, api_key: "sk-..."}`)
  - A map with `:adapter`, `:base_url`, and optionally `:api_key`
  """

  @type stream_option ::
          {:provider, atom() | map() | module()}
          | {:model, String.t()}
          | {:max_tokens, non_neg_integer()}
          | {:temperature, float()}
          | {:thinking, atom() | map()}
          | {:tools, [module() | LLM.Tool.t() | {module(), map()}]}
          | {:auto_tools, boolean()}
          | {:max_rounds, non_neg_integer()}
          | {:on_message, (LLM.Message.t() -> any())}
          | {:system, String.t()}
          | {:messages, [LLM.Message.t()]}
          | {:schema, map()}

  @type generate_option ::
          {:provider, atom() | map() | module()}
          | {:model, String.t()}
          | {:max_tokens, non_neg_integer()}
          | {:temperature, float()}
          | {:thinking, atom() | map()}
          | {:tools, [module() | LLM.Tool.t() | {module(), map()}]}
          | {:auto_tools, boolean()}
          | {:max_rounds, non_neg_integer()}
          | {:on_message, (LLM.Message.t() -> any())}
          | {:system, String.t()}
          | {:messages, [LLM.Message.t()]}
          | {:schema, map()}

  @doc """
  Stream a prompt and return the final response.

  Returns `{:ok, response}` on success, `{:error, reason}` on failure.

  Callbacks can be placed in `opts` (2nd arg) or passed separately as a third
  keyword argument (which takes precedence):

      # Callbacks in opts
      {:ok, response} = LLM.stream("Tell me a story",
        provider: :openai,
        model: "gpt-4",
        on_chunk: &IO.write/1
      )

      # Callbacks as separate third argument
      {:ok, response} = LLM.stream("Tell me a story",
        [provider: :openai, model: "gpt-4"],
        on_chunk: &IO.write/1
      )

  ## Callbacks

    * `:on_chunk` — called for each chunk, `fn chunk -> ... end`
    * `:on_message` — called once per completed `LLM.Message`,
      `fn message -> ... end`

  For manual stream control use `LLM.Stream.start/2` and
  `LLM.Stream.collect/2` directly.
  """
  @spec stream(String.t() | LLM.Context.t(), keyword(), keyword()) ::
          {:ok, LLM.Response.t()} | {:error, term()}
  def stream(prompt, opts \\ [], callbacks \\ []) do
    context = to_context(prompt, opts)

    collect_opts = [
      auto_tools: Keyword.get(opts, :auto_tools, true),
      max_rounds: Keyword.get(opts, :max_rounds, 10),
      on_chunk: callbacks[:on_chunk] || opts[:on_chunk],
      on_message: callbacks[:on_message] || opts[:on_message]
    ]

    with {:ok, stream} <- LLM.Stream.start(context, opts),
         {:ok, response} <- LLM.Stream.collect(stream, collect_opts) do
      {:ok, maybe_parse_schema(response, opts)}
    end
  end

  @doc """
  Stream a prompt and return the response, raising on error.
  """
  @spec stream!(String.t() | LLM.Context.t(), keyword(), keyword()) :: LLM.Response.t()
  def stream!(prompt, opts \\ [], callbacks \\ []) do
    case stream(prompt, opts, callbacks) do
      {:ok, response} -> response
      {:error, reason} -> raise "LLM streaming failed: #{inspect(reason)}"
    end
  end

  @doc """
  Generate text (non-streaming). Returns the final response.

  Sends a regular HTTP request and decodes the provider's full response.
  Executes tool calls with follow-up regular HTTP requests when present.

  Callbacks can be placed in `opts` (2nd arg) or passed separately as a third
  keyword argument (which takes precedence):

      # on_message in opts
      {:ok, response} = LLM.generate("What is Elixir?",
        provider: :openai,
        model: "gpt-4",
        on_message: fn msg -> IO.inspect(msg.role) end
      )

      # on_message as separate third argument
      {:ok, response} = LLM.generate("What is Elixir?",
        [provider: :openai, model: "gpt-4"],
        on_message: fn msg -> IO.inspect(msg.role) end
      )

  ## Callbacks

    * `:on_message` — called once per completed `LLM.Message`,
      `fn message -> ... end`

  When `schema:` is set, the model is asked to return JSON matching the
  given schema. On success, `response.parsed` contains the decoded map and the
  last assistant message in `response.messages` also carries `:parsed`. Tool
  auto-execution is disabled for that request since the response is structured
  data, not a tool call round-trip.

  ## Structured output

      {:ok, response} = LLM.generate("Extract the name and age.",
        provider: :openai,
        model: "gpt-4o",
        schema: %{
          "name" => "person",
          "schema" => %{
            "type" => "object",
            "properties" => %{
              "name" => %{"type" => "string"},
              "age" => %{"type" => "integer"}
            },
            "required" => ["name", "age"]
          }
        }
      )
      response.parsed  #=> %{"name" => "Alice", "age" => 30}

  Pass a bare JSON Schema map to use `"output"` as the default name:

      schema: %{"type" => "object", "properties" => %{...}}

  The schema is passed through to the provider unchanged — it must be valid for the
  target provider's structured-output feature (schema requirements vary by provider).
  """
  @spec generate(String.t() | LLM.Context.t(), [generate_option], keyword()) ::
          {:ok, LLM.Response.t()} | {:error, term()}
  def generate(prompt, opts \\ [], callbacks \\ []) do
    opts =
      if opts[:schema],
        do: Keyword.put(opts, :auto_tools, false),
        else: opts

    on_message = callbacks[:on_message] || opts[:on_message]
    opts = if on_message, do: Keyword.put(opts, :on_message, on_message), else: opts

    context = to_context(prompt, opts)

    with {:ok, response} <- do_generate(context, opts, 0, %LLM.Usage{}) do
      {:ok, maybe_parse_schema(response, opts)}
    end
  end

  @doc """
  Generate text, raising on error.
  """
  @spec generate!(String.t() | LLM.Context.t(), [generate_option], keyword()) :: LLM.Response.t()
  def generate!(prompt, opts \\ [], callbacks \\ []) do
    case generate(prompt, opts, callbacks) do
      {:ok, response} -> response
      {:error, reason} -> raise "LLM generation failed: #{inspect(reason)}"
    end
  end

  @doc """
  List available provider presets.
  """
  @spec providers() :: [atom()]
  def providers, do: LLM.Provider.Resolver.list_providers()

  @doc """
  List available models from a provider.

  Returns `{:ok, models}` on success, where `models` is a list of model info maps.
  Returns `{:error, reason}` on failure.

  ## Options

    * `:provider` - provider preset atom, module, or config map (defaults to `:openai`)

  ## Examples

      {:ok, models} = LLM.models(provider: :openai)
      {:ok, models} = LLM.models(provider: :anthropic)

  """
  @spec models(keyword()) :: {:ok, [LLM.Adapter.model_info()]} | {:error, term()}
  def models(opts \\ []) do
    provider = LLM.Provider.Resolver.resolve(opts[:provider] || :openai)

    try do
      provider.adapter.list_models(provider)
    rescue
      UndefinedFunctionError -> {:error, :not_supported}
    end
  end

  @doc """
  Store an API key at runtime.
  """
  @spec put_key(atom(), String.t()) :: :ok
  def put_key(provider_name, api_key) do
    Process.put({__MODULE__, :provider_key, provider_name}, api_key)
    :ok
  end

  @doc """
  Get an API key, checking process dictionary first, then application config.
  """
  @spec get_key(atom()) :: String.t() | nil
  def get_key(provider_name) do
    case Process.get({__MODULE__, :provider_key, provider_name}) do
      nil ->
        case Application.get_env(:llm, :providers, %{}) do
          config when is_list(config) ->
            config[provider_name][:api_key]

          config when is_map(config) ->
            config[provider_name][:api_key]

          _ ->
            nil
        end

      key ->
        key
    end
  end

  # --- Private ---

  defp to_context(prompt, opts) when is_binary(prompt) do
    tools = normalize_tools(opts[:tools] || [])
    previous_messages = opts[:messages] || []

    %LLM.Context{
      system: opts[:system],
      messages: previous_messages ++ [LLM.Message.new(prompt)],
      tools: tools,
      provider_state: %{}
    }
  end

  defp to_context(%LLM.Context{} = ctx, opts) do
    tools = normalize_tools(opts[:tools] || [])
    %{ctx | tools: tools ++ ctx.tools}
  end

  defp normalize_tools(tools) do
    Enum.map(tools, &LLM.Tool.normalize/1)
  end

  defp do_generate(context, opts, rounds, accumulated_usage) do
    max_rounds = Keyword.get(opts, :max_rounds, 10)

    if rounds >= max_rounds do
      request_once(context, opts)
      |> case do
        {:ok, response} ->
          {:ok,
           finalize_generate_response(
             response,
             context,
             :max_rounds,
             accumulated_usage,
             opts[:on_message]
           )}

        {:error, _} = err ->
          err
      end
    else
      with {:ok, response} <- request_once(context, opts) do
        auto_tools = Keyword.get(opts, :auto_tools, true)

        if auto_tools and has_tool_calls?(response) do
          {:ok, next_context} = execute_tool_calls(response.message, context, opts)
          usage = LLM.Usage.add(accumulated_usage, response.usage || %LLM.Usage{})
          do_generate(next_context, opts, rounds + 1, usage)
        else
          {:ok,
           finalize_generate_response(
             response,
             context,
             nil,
             accumulated_usage,
             opts[:on_message]
           )}
        end
      end
    end
  end

  defp request_once(context, opts) do
    provider = LLM.Provider.Resolver.resolve(opts[:provider] || :openai)
    adapter = provider.adapter
    model = opts[:model] || raise ArgumentError, "model is required"

    request_opts = [model: model, stream: false] ++ opts
    request_body = adapter.build_request(context, request_opts)
    path = non_stream_path(adapter, model)
    url = LLM.Stream.build_url(provider.base_url, path)
    headers = LLM.Stream.build_headers(provider, request_opts)

    req =
      Req.new(
        url: url,
        method: :post,
        headers: headers,
        json: request_body
      )

    case LLM.HTTPClient.request(req) do
      {:ok, %Req.Response{status: 200, body: body}} when is_map(body) ->
        adapter.decode_response(body)

      {:ok, %Req.Response{status: 200, body: body}} when is_binary(body) ->
        with {:ok, decoded} <- Jason.decode(body),
             {:ok, response} <- adapter.decode_response(decoded) do
          {:ok, response}
        else
          {:error, %Jason.DecodeError{} = error} -> {:error, {:invalid_json, error}}
          {:error, _} = err -> err
        end

      {:ok, %Req.Response{status: 200, body: body}} ->
        {:error, {:unexpected_response_body, body}}

      {:ok, %Req.Response{status: status, body: body} = response}
      when status != 200 ->
        {:error, %{status: status, body: LLM.Stream.normalize_error_body(body, response)}}

      {:error, _} = err ->
        err
    end
  end

  defp non_stream_path(adapter, model) do
    Code.ensure_loaded(adapter)

    raw_path =
      if function_exported?(adapter, :non_stream_path, 0),
        do: adapter.non_stream_path(),
        else: adapter.stream_path()

    LLM.Stream.build_path(raw_path, model)
  end

  defp has_tool_calls?(%LLM.Response{message: %LLM.Message{tools: tools}}),
    do: is_list(tools) and tools != []

  defp execute_tool_calls(%LLM.Message{} = assistant_message, context, opts) do
    on_message = opts[:on_message]
    tools = Enum.map(context.tools, &LLM.Tool.normalize/1)

    results =
      assistant_message.tools
      |> Task.async_stream(
        fn call ->
          tool = Enum.find(tools, fn t -> t.name == call.name end)

          result =
            if tool do
              try do
                case tool.execute.(call.args, %{messages: context.messages}) do
                  {:ok, result} -> to_string(result)
                  {:error, err} -> "Error: #{err}"
                end
              rescue
                e -> "Error: #{Exception.message(e)}"
              end
            else
              "Error: Unknown tool #{inspect(call.name)}"
            end

          {:ok, %{id: call.id, name: call.name, content: result}}
        end,
        ordered: true,
        timeout: Keyword.get(opts, :tool_timeout, 30_000)
      )
      |> Enum.map(fn
        {:ok, {:ok, result}} -> result
        {:ok, {:exit, reason}} -> %{id: nil, name: nil, content: "Error: #{inspect(reason)}"}
        {:exit, reason} -> %{id: nil, name: nil, content: "Error: #{inspect(reason)}"}
      end)

    tool_result_messages =
      Enum.map(results, fn r ->
        %LLM.Message{role: :tool, tool_call_id: r.id, name: r.name, content: r.content}
      end)

    if on_message do
      on_message.(assistant_message)
      Enum.each(tool_result_messages, on_message)
    end

    {:ok, %{context | messages: context.messages ++ [assistant_message] ++ tool_result_messages}}
  end

  defp finalize_generate_response(
         response,
         context,
         forced_stop_reason,
         accumulated_usage,
         on_message
       ) do
    response_usage = response.usage || %LLM.Usage{}
    total_usage = LLM.Usage.add(accumulated_usage, response_usage)
    message = %{response.message | usage: response.message.usage || response_usage}

    if on_message, do: on_message.(message)

    %{
      response
      | message: message,
        messages: context.messages ++ [message],
        usage: total_usage,
        stop_reason: forced_stop_reason || response.stop_reason
    }
  end

  defp maybe_parse_schema(response, opts) do
    case opts[:schema] do
      nil -> response
      _ -> put_parsed_on_response_and_message(response)
    end
  end

  defp put_parsed_on_response_and_message(response) do
    parsed = parse_schema(response)
    message = %{response.message | parsed: parsed}
    messages = update_last_message(response.messages, message)

    %{response | parsed: parsed, message: message, messages: messages}
  end

  defp update_last_message(nil, _message), do: nil
  defp update_last_message([], _message), do: []

  defp update_last_message(messages, message),
    do: List.update_at(messages, -1, fn _ -> message end)

  # Anthropic tool-forcing: the result is in the __structured_output__ tool call args.
  defp parse_schema(%LLM.Response{message: %LLM.Message{tools: tools} = msg})
       when is_list(tools) do
    if Enum.any?(tools, &(&1.name == "__structured_output__")) or
         Enum.any?(tools, &(&1[:name] == "__structured_output__")) do
      case Enum.find(tools, &(&1.name == "__structured_output__")) do
        %{args: args} when is_map(args) -> args
        _ -> nil
      end
    else
      parse_schema(%LLM.Response{message: %{msg | tools: nil}})
    end
  end

  # OpenAI / Gemini: the result is JSON in message content.
  defp parse_schema(%LLM.Response{message: %LLM.Message{content: content}})
       when is_binary(content) do
    case Jason.decode(content) do
      {:ok, parsed} when is_map(parsed) -> parsed
      _ -> nil
    end
  end

  defp parse_schema(_), do: nil
end