defmodule ALLM do
@moduledoc """
Top-level facade for the ALLM library — provider-neutral LLM execution with
first-class streaming and serializable conversation state.
ALLM is organized into four conceptual layers (see
`steering/allm_engine_session_streaming_spec_v0_2.md` §2):
* **Layer A — Serializable data.** Plain structs (`ALLM.Message`,
`ALLM.Request`, `ALLM.Response`, `ALLM.Thread`, `ALLM.Session`, …) that
round-trip through `:erlang.term_to_binary/1` and JSON via
`ALLM.Serializer`. No PIDs, refs, funs, or API keys.
* **Layer B — Runtime.** `ALLM.Engine` plus the `ALLM.Adapter`,
`ALLM.StreamAdapter`, `ALLM.ToolExecutor`, and `ALLM.ToolResultEncoder`
behaviours. Holds the non-serializable dependencies (modules, funs,
Finch names, keys resolved at call time).
* **Layer C — Stateless execution.** `generate/3`, `stream_generate/3`,
`step/3`, `stream_step/3`, `chat/3`, `stream/3` on this module. Each
call takes an engine explicitly.
* **Layer D — Stateful continuation.** `ALLM.Session` operations over a
persisted `%ALLM.Session{}`.
Phase 1 (this release) ships Layer A: the data structs, `ALLM.Validate`,
`ALLM.Serializer`, and the constructors on this facade. Layers B/C/D
(engines, adapters, streaming, sessions) land in later phases.
## Getting Started
Drive a `chat/3` round-trip against the deterministic `ALLM.Providers.Fake`
adapter — no API key, no network. Parallel to the README's Getting Started
snippet (kept in sync by visual review):
iex> engine = ALLM.Engine.new(adapter: ALLM.Providers.Fake, adapter_opts: [script: [{:text, "Hello, ALLM!"}, {:finish, :stop}]])
iex> {:ok, %ALLM.ChatResult{final_response: %ALLM.Response{output_text: text}}} = ALLM.chat(engine, [ALLM.user("Hi.")])
iex> text
"Hello, ALLM!"
## Example
iex> messages = [ALLM.system("Be helpful."), ALLM.user("Name three primes.")]
iex> req = ALLM.request(messages, model: "fake:gpt-test")
iex> :ok = ALLM.Validate.request(req)
iex> json = ALLM.Serializer.to_json!(req)
iex> {:ok, ^req} = ALLM.Serializer.from_json(json)
See `steering/allm_engine_session_streaming_spec_v0_2.md` §4 for the full
public API surface.
"""
alias ALLM.{
ChatResult,
Engine,
Image,
ImageRequest,
ImageResponse,
Message,
Request,
StepResult,
Thread,
Tool
}
alias ALLM.Error.{AdapterError, EngineError, ImageAdapterError, ValidationError}
@doc """
Build a system-role `%ALLM.Message{}` from a text string.
## Examples
iex> ALLM.system("be helpful")
%ALLM.Message{role: :system, content: "be helpful", name: nil, tool_call_id: nil, metadata: %{}}
"""
@spec system(String.t()) :: Message.t()
def system(text), do: %Message{role: :system, content: text}
@doc """
Build a user-role `%ALLM.Message{}` from a text string.
## Examples
iex> ALLM.user("hi")
%ALLM.Message{role: :user, content: "hi", name: nil, tool_call_id: nil, metadata: %{}}
"""
@spec user(String.t()) :: Message.t()
def user(text), do: %Message{role: :user, content: text}
@doc """
Build an assistant-role `%ALLM.Message{}` from a text string.
## Examples
iex> ALLM.assistant("hello")
%ALLM.Message{role: :assistant, content: "hello", name: nil, tool_call_id: nil, metadata: %{}}
"""
@spec assistant(String.t()) :: Message.t()
def assistant(text), do: %Message{role: :assistant, content: text}
@doc """
Build a tool-role `%ALLM.Message{}` carrying a tool-call result.
`tool_call_id` must match the `:id` of the `ALLM.ToolCall` that produced
this result so the provider can match results to calls. `content` is either
a binary or a JSON-serializable map.
## Examples
iex> msg = ALLM.tool_result("call_abc", %{ok: true})
iex> {msg.role, msg.tool_call_id, msg.content}
{:tool, "call_abc", %{ok: true}}
"""
@spec tool_result(String.t(), String.t() | map()) :: Message.t()
def tool_result(tool_call_id, content) do
%Message{role: :tool, tool_call_id: tool_call_id, content: content}
end
@doc """
Build an `%ALLM.Tool{}` from keyword opts. Delegates to `ALLM.Tool.new/1`.
`:name`, `:description`, and `:schema` are required; omitting any raises
`ArgumentError`. `:handler` is optional.
## Examples
iex> tool = ALLM.tool(name: "weather", description: "weather by city", schema: %{"type" => "object"})
iex> {tool.name, tool.description}
{"weather", "weather by city"}
"""
@spec tool(keyword()) :: Tool.t()
def tool(opts), do: Tool.new(opts)
@doc """
Build the canonical tagged map for a JSON-schema response format (spec §5.4).
Returns `%{type: :json_schema, name: name, schema: schema, strict: boolean}`.
`:strict` defaults to `true`; pass `strict: false` to relax provider-side
schema enforcement.
## Examples
iex> ALLM.json_schema("person", %{"type" => "object"})
%{type: :json_schema, name: "person", schema: %{"type" => "object"}, strict: true}
iex> ALLM.json_schema("person", %{"type" => "object"}, strict: false)
%{type: :json_schema, name: "person", schema: %{"type" => "object"}, strict: false}
"""
@spec json_schema(String.t(), map(), keyword()) :: map()
def json_schema(name, schema, opts \\ []) do
%{
type: :json_schema,
name: name,
schema: schema,
strict: Keyword.get(opts, :strict, true)
}
end
@doc """
Build an `%ALLM.ImageRequest{}` from a prompt and keyword opts.
Delegates to `ALLM.ImageRequest.new/1` after putting `:prompt` last in
the opts list — the positional `prompt` is authoritative.
Does **not** validate — call `ALLM.Validate.image_request/1` to check
operation-arity and field rules. Mirrors `request/2`'s no-validate
precedent (Phase 13.3 design Decision #7). Unknown opts raise `KeyError`
via `struct!/2`.
Callers wanting `:variation` (which forbids a non-empty `:prompt`) should
build the struct directly via `ALLM.ImageRequest.new/1`.
## Examples
iex> req = ALLM.image_request("a kestrel")
iex> {req.operation, req.prompt, req.n, req.response_format}
{:generate, "a kestrel", 1, :binary}
iex> req = ALLM.image_request("a watercolor kestrel", model: "gpt-image-1", size: {1024, 1024}, n: 2)
iex> :ok = ALLM.Validate.image_request(req)
iex> json = ALLM.Serializer.to_json!(req)
iex> {:ok, ^req} = ALLM.Serializer.from_json(json)
iex> {req.model, req.size, req.n}
{"gpt-image-1", {1024, 1024}, 2}
"""
@spec image_request(String.t(), keyword()) :: ALLM.ImageRequest.t()
def image_request(prompt, opts \\ []) when is_binary(prompt) and is_list(opts) do
ALLM.ImageRequest.new(Keyword.put(opts, :prompt, prompt))
end
@doc """
Build an `%ALLM.Request{}` from a list of messages and keyword opts.
Delegates to `ALLM.Request.new/2`.
Does **not** validate — validation runs at the adapter boundary (Phase 5)
or via an explicit `ALLM.Validate.request/1` call. Keeping construction
composable matches the Non-obvious Decision #7 of the Phase 1 design:
`request/2` returns a `%Request{}` directly, not `{:ok | :error}`.
## Examples
iex> req = ALLM.request([ALLM.user("hi")])
iex> {length(req.messages), req.stream, req.tools}
{1, false, []}
iex> req = ALLM.request([ALLM.user("hi")], model: "gpt-4.1-mini", response_format: %{type: :json_object})
iex> {req.model, req.response_format}
{"gpt-4.1-mini", %{type: :json_object}}
"""
@spec request([Message.t()], keyword()) :: Request.t()
def request(messages, opts \\ []), do: Request.new(messages, opts)
@doc """
Open a streaming generation against the engine's adapter. See spec §4 and
§10.2.
Returns `{:ok, enumerable}` where the enumerable is a lazy stream of
`ALLM.Event` values (no event fires until the caller reduces), or
`{:error, struct}` on a synchronous pre-flight failure (missing adapter,
invalid request, adapter-reported pre-flight error).
## Options
In addition to any provider-specific opts the adapter honours, the
following Phase 5 streaming-layer keys are consumed by this function:
* `:emit_text_deltas` — `true` (default) keeps `:text_delta` events in
the stream; `false` drops them. `:text_completed` and
`:message_completed` are unaffected.
* `:emit_tool_deltas` — `true` (default) keeps `:tool_call_delta`
events; `false` drops them.
* `:include_raw_chunks` — `false` (default) drops `:raw_chunk` events
EXCEPT those with payload `{:usage, _}`, which always pass so
`%Response.usage` can be populated downstream.
* `:on_event` — a 1-arity function invoked for every event BEFORE the
filters apply. Exceptions raised inside the callback surface in the
consumer's reducing process, not at this call site.
Phase 7 orchestration opts (`:mode`, `:max_turns`, `:halt_when`) are
silently stripped here; `stream_generate/3` is single-request.
## Examples
iex> engine = ALLM.Engine.new(
...> adapter: ALLM.Providers.Fake,
...> adapter_opts: [script: [{:text, "hi"}, {:finish, :stop}]]
...> )
iex> req = ALLM.request([ALLM.user("say hi")])
iex> {:ok, stream} = ALLM.stream_generate(engine, req)
iex> Enum.any?(Enum.to_list(stream), &match?({:message_completed, _}, &1))
true
"""
@spec stream_generate(Engine.t(), Request.t(), keyword()) ::
{:ok, Enumerable.t()}
| {:error, EngineError.t() | AdapterError.t() | ValidationError.t()}
def stream_generate(engine, request, opts \\ []),
do: ALLM.StreamRunner.run(engine, request, opts)
@doc """
Execute a non-streaming generation against the engine's adapter. See
spec §4 and §10.1.
Implemented as a reducer over `stream_generate/3` (spec §3) — the
streaming path is the primitive. A mid-stream adapter error folds into
`response.finish_reason == :error` with the error struct under
`response.metadata.error`; pre-flight errors surface directly as
`{:error, struct}`.
## Options
Accepts the same options as `stream_generate/3`. `:include_raw_chunks`
defaults to `false` but `{:usage, _}` raw chunks always survive the
filter so `response.usage` is populated regardless.
See `ALLM.Runner` for the full mid-stream error contract and the
stream-first reducer rationale.
## Examples
iex> engine = ALLM.Engine.new(
...> adapter: ALLM.Providers.Fake,
...> adapter_opts: [script: [{:text, "hi"}, {:finish, :stop}]]
...> )
iex> req = ALLM.request([ALLM.user("say hi")])
iex> {:ok, response} = ALLM.generate(engine, req)
iex> {response.output_text, response.finish_reason}
{"hi", :stop}
"""
@spec generate(Engine.t(), Request.t(), keyword()) ::
{:ok, ALLM.Response.t()}
| {:error, EngineError.t() | AdapterError.t() | ValidationError.t()}
def generate(engine, request, opts \\ []),
do: ALLM.Runner.run(engine, request, opts)
@doc """
Execute a single chat step (one adapter round-trip plus any auto-executed
tool calls) and return a `%ALLM.StepResult{}`. See spec §4 and §10.3.
`thread_or_messages` is either an `%ALLM.Thread{}` or a list of
`%ALLM.Message{}` (normalised via `ALLM.Thread.from_messages/1`). The
thread is validated via `ALLM.Validate.thread/1` at entry. Pure
one-line delegation to `ALLM.Chat.step/3`; see that module for the
full behaviour contract (mode dispatch, on_tool_error policy, halt
metadata).
## Options
In addition to any provider-specific opts the adapter honours:
* `:mode` — `:auto` (default) executes tool calls; `:manual` returns
them for the caller to submit results.
* `:tool_timeout` — milliseconds per tool (default 30_000).
* `:on_tool_error` — `:continue` (default) or `:halt`.
* `:tool_executor`, `:tool_result_encoder` — module overrides.
* Phase 5 stream filter opts are accepted but have no effect on this
non-streaming path.
## Examples
iex> engine = ALLM.Engine.new(
...> adapter: ALLM.Providers.Fake,
...> adapter_opts: [
...> script: [
...> {:tool_call, id: "call_0", name: "weather", arguments: %{"city" => "NYC"}},
...> {:finish, :tool_calls}
...> ]
...> ],
...> tools: [ALLM.tool(
...> name: "weather",
...> description: "forecast by city",
...> schema: %{"type" => "object"},
...> handler: fn %{"city" => c} -> {:ok, %{forecast: "sunny", city: c}} end
...> )]
...> )
iex> {:ok, sr} = ALLM.step(engine, [ALLM.user("weather in NYC?")])
iex> {sr.done?, length(sr.tool_results)}
{false, 1}
"""
@spec step(Engine.t(), Thread.t() | [Message.t()], keyword()) ::
{:ok, StepResult.t()}
| {:error, EngineError.t() | AdapterError.t() | ValidationError.t()}
def step(engine, thread_or_messages, opts \\ []),
do: ALLM.Chat.step(engine, thread_or_messages, opts)
@doc """
Execute a single chat step as a lazy stream of `ALLM.Event` values. See
spec §4 and §10.4.
`thread_or_messages` is either an `%ALLM.Thread{}` or a list of
`%ALLM.Message{}`. The returned stream is open — no events fire until
the caller reduces. Events are emitted in this order: all adapter
events (pass-through from `stream_generate/3`), then zero-to-N
tool-execution event groups (per tool: `:tool_execution_started` →
`:tool_execution_completed` → `:tool_result_encoded` /
`:ask_user_requested` / `:tool_halt`), then exactly one terminal
`:step_completed` event.
Pure one-line delegation to `ALLM.Chat.stream_step/3`; see that
module for the three-phase `Stream.resource/3` state machine and the
unknown-tool error-in-stream contract.
## Options
Same as `step/3`. Additionally accepts the Phase 5 streaming filter
opts (`:emit_text_deltas`, `:emit_tool_deltas`, `:include_raw_chunks`,
`:on_event`) — they apply to the adapter-stream pass-through phase.
## Examples
iex> engine = ALLM.Engine.new(
...> adapter: ALLM.Providers.Fake,
...> adapter_opts: [
...> script: [
...> {:tool_call, id: "call_0", name: "weather", arguments: %{"city" => "NYC"}},
...> {:finish, :tool_calls}
...> ]
...> ],
...> tools: [ALLM.tool(
...> name: "weather",
...> description: "forecast by city",
...> schema: %{"type" => "object"},
...> handler: fn %{"city" => c} -> {:ok, %{forecast: "sunny", city: c}} end
...> )]
...> )
iex> {:ok, stream} = ALLM.stream_step(engine, [ALLM.user("weather in NYC?")])
iex> events = Enum.to_list(stream)
iex> Enum.any?(events, &match?({:step_completed, _}, &1))
true
"""
@spec stream_step(Engine.t(), Thread.t() | [Message.t()], keyword()) ::
{:ok, Enumerable.t()}
| {:error, EngineError.t() | AdapterError.t() | ValidationError.t()}
def stream_step(engine, thread_or_messages, opts \\ []),
do: ALLM.Chat.stream_step(engine, thread_or_messages, opts)
@doc """
Run a multi-turn chat loop against the engine and return a
`%ALLM.ChatResult{}`. See spec §4 and §10.5.
`thread_or_messages` is either an `%ALLM.Thread{}` or a list of
`%ALLM.Message{}` (normalised via `ALLM.Thread.from_messages/1`). The
thread is validated via `ALLM.Validate.thread/1` at entry. Pure
one-line delegation to `ALLM.Chat.run/3`; see that module for the
full multi-turn loop semantics, the seven-entry terminal-condition
ordering, and the `%ChatResult{}` shape.
## Mode
* `:auto` (default) — the loop executes tool calls automatically.
Each step appends tool-result messages to the thread before the
next adapter call. Halt reasons follow the table below.
* `:manual` — the FIRST step whose response carries
`finish_reason: :tool_calls` halts with `halted_reason:
:manual_tool_calls`. The caller submits tool results via a fresh
`chat/3` call with the augmented thread (no executor runs).
Pure-text steps under `:manual` continue normally.
## `:max_turns` precedence
The loop bound resolves at entry through this chain (call opts wins
on the left):
call opts > engine.params[:max_turns] > Application.get_env(:allm, :max_turns) > library default 8
Per Phase 7 design Non-obvious Decision #9. `max_turns` must be a
`pos_integer()`; non-positive integers raise `ArgumentError`.
## `:halt_when` semantics
`:halt_when` is a `(StepResult.t() -> boolean())` callback invoked
AFTER the step's thread mutation has been applied (Phase 7 design
Non-obvious Decision #11). It is the LAST per-step gate consulted —
ask-user, handler `{:halt, _, _}`, `on_tool_error: :halt`,
`:manual_tool_calls`, and adapter `finish_reason ∈ {:stop, :error,
:length, :content_filter}` all preempt it. Exceptions raised inside
`halt_when` propagate to the caller of `chat/3`; they are NOT
caught.
## `:on_tool_error`
Atom forms `:continue` (default) and `:halt` behave as in Phase 6.
The function form `(ToolCall.t(), term() -> {:continue, term()} |
:halt)` was deferred from Phase 6 and lands in Phase 7. The
function is invoked synchronously inside the per-tool task after
the handler's return / encoder failure resolves to an error term
(Phase 7 Non-obvious Decision #8): `{:continue, replacement}`
encodes `replacement` as the tool-result content; `:halt` halts the
batch with `halted_reason: :tool_error`. An invalid return shape or
a raise from inside the function is wrapped as `%ALLM.Error.ToolError{reason:
:invalid_return}` and treated as `:halt`.
## `:on_event` scope
Inherits the Phase 5 contract: `:on_event` observes only
adapter-emitted events (text deltas, tool-call deltas, message
bookends, `:raw_chunk`, adapter-emitted `:error`). Phase 6 / Phase
7 chat-layer events (`:tool_execution_*`, `:tool_result_encoded`,
`:ask_user_requested`, `:tool_halt`, `:step_completed`,
`:chat_completed`) are NOT delivered to `:on_event` — they fire
outside `ALLM.StreamRunner`. Per Phase 7 Non-obvious Decision #13.
## Halt-reason table
| Reason | Fires when | `metadata` keys populated |
|--------|------------|---------------------------|
| `:completed` | Adapter `finish_reason ∈ {:stop, :length, :content_filter}` | `%{}` |
| `:error` | Adapter `finish_reason: :error` (mid-stream error folds in) | `%{error: error_struct}` (when present) |
| `:max_turns` | `step_index + 1 >= max_turns` after a non-halting step | `%{max_turns: N}` |
| `:halt_when` | `halt_when.(step_result)` returned `true` | `%{halt_when_step_index: idx}` |
| `:ask_user` | Handler returned `{:ask_user, _}` / `{:ask_user, _, _}` | `%{pending_question: q, pending_tool_call_id: id, ask_user_opts: opts}` (also on top-level `%ChatResult{}`) |
| `:tool_error` | `on_tool_error: :halt`, fun returned `:halt`, or fun raised | `%{halt_tool_call_id: id}` (plus `:on_tool_error_exception` if fun raised) |
| `:manual_tool_calls` | `mode: :manual` and step's `response.finish_reason == :tool_calls`, OR (Phase 18) `mode: :auto` and one or more called tools have `manual: true` | `%{manual_turn_index: idx}` (whole-loop) — additionally `%{manual_tool_calls: [%ToolCall{}, ...]}` (per-tool, only the manual bucket) |
| atom() (user) | Handler returned `{:halt, reason, result}` not in the above set | `%{halt_tool_call_id: id, halt_result: result}` |
## Mixed-bucket re-issue (Phase 18 per-tool manual)
When `mode: :auto` and at least one called tool has `manual: true`, the
loop halts with `halted_reason: :manual_tool_calls` after running the
auto-bucket tools. The returned `result.thread` carries the assistant
message AND the auto-bucket `:tool` messages — but **NOT** placeholder
messages for the manual ids. Naively re-issuing `chat/3` on
`result.thread` sends a malformed request to the provider (assistant
tool_calls with no matching `:tool` messages for the manual ids),
surfacing as `%ALLM.Error.AdapterError{reason: :invalid_request}`.
Callers MUST append a `:tool` message for each id in
`result.metadata.manual_tool_calls` before re-issuing:
{:ok, result} = ALLM.chat(engine, [ALLM.user("...")])
# result.halted_reason == :manual_tool_calls
# result.metadata.manual_tool_calls == [%ToolCall{id: "cm", ...}]
# Resolve each manual call out-of-band, then append a :tool message.
tool_msg = %ALLM.Message{
role: :tool,
content: "approved",
tool_call_id: "cm"
}
augmented = ALLM.Thread.add_message(result.thread, tool_msg)
{:ok, final} = ALLM.chat(engine, augmented)
The `ALLM.Session` API (`Session.start/3` + `submit_tool_result/3`)
enforces this discipline automatically; raw `chat/3` callers must guard
by hand. Whole-loop `mode: :manual` callers are unaffected — every
tool call surfaces on `result.final_response.tool_calls`, no auto
bucket exists.
## Examples
iex> engine = ALLM.Engine.new(
...> adapter: ALLM.Providers.Fake,
...> adapter_opts: [
...> scripts: [
...> [{:tool_call, id: "c0", name: "echo", arguments: %{"x" => 1}},
...> {:finish, :tool_calls}],
...> [{:text, "done"}, {:finish, :stop}]
...> ]
...> ],
...> tools: [ALLM.tool(
...> name: "echo",
...> description: "",
...> schema: %{},
...> handler: fn args -> {:ok, args} end
...> )]
...> )
iex> {:ok, %ALLM.ChatResult{} = result} = ALLM.chat(engine, [ALLM.user("echo please")])
iex> {result.halted_reason, length(result.steps)}
{:completed, 2}
"""
@spec chat(Engine.t(), Thread.t() | [Message.t()], keyword()) ::
{:ok, ChatResult.t()}
| {:error, EngineError.t() | AdapterError.t() | ValidationError.t()}
def chat(engine, thread_or_messages, opts \\ []),
do: ALLM.Chat.run(engine, thread_or_messages, opts)
@doc """
Stream a multi-turn chat loop as a lazy enumerable of `ALLM.Event`
values terminating in exactly one `:chat_completed` event. See spec
§4 and §10.6.
`thread_or_messages` is either an `%ALLM.Thread{}` or a list of
`%ALLM.Message{}`. The returned stream is open — no events fire
until the caller reduces. Pure one-line delegation to
`ALLM.Chat.stream/3`; see that module for the two-phase
`Stream.resource/3` state machine and the cleanup chain.
## Single terminal `:chat_completed`
A naturally-terminating stream emits adapter events plus tool
events for each turn, one `:step_completed` per turn, and exactly
one trailing `{:chat_completed, %{result: %ChatResult{}}}` event
(Phase 7 Non-obvious Decision #3). Both `chat/3` and
`stream/3 |> ALLM.StreamCollector.to_chat_result/1` produce the
SAME `%ChatResult{}` for identical inputs because both paths
construct it via the same `ALLM.Chat.build_chat_result/1` helper
(Phase 7 Non-obvious Decision #4).
Consumer halts (`Enum.take/2`, `Stream.take_while/2`) produce NO
`:chat_completed` event; callers needing a final `%ChatResult{}`
for a cancelled stream collect events and call
`ALLM.StreamCollector.to_chat_result/1` on the partial state — the
fallback path returns `halted_reason: :cancelled`.
## Stream-first
`chat/3` is a reducer over this stream (per spec §3). The streaming
path is the primitive; the non-streaming variant exists so callers
who don't need event-level visibility get a synchronous result.
## Ask-user thread asymmetry
When a step's handler returns `{:ask_user, _}`, the streamed
`:step_completed.thread` does NOT include the assistant question
message — only the terminal `:chat_completed.result.thread` does
(Phase 7 Invariant 8). Consumers persisting thread state across
turns must read `ChatResult.thread`, never `:step_completed.thread`.
## `:on_event` scope
Same as `chat/3` and `stream_generate/3`: `:on_event` observes only
adapter-emitted events. Chat-layer events
(`:tool_execution_*`, `:tool_result_encoded`, `:ask_user_requested`,
`:tool_halt`, `:step_completed`, `:chat_completed`) are NOT
delivered to `:on_event` — they fire outside `ALLM.StreamRunner`.
Per Phase 7 Non-obvious Decision #13.
## Options
Same options as `chat/3`. The Phase 5 streaming filter opts
(`:emit_text_deltas`, `:emit_tool_deltas`, `:include_raw_chunks`,
`:on_event`) apply to each turn's adapter pass-through.
## Examples
iex> engine = ALLM.Engine.new(
...> adapter: ALLM.Providers.Fake,
...> adapter_opts: [
...> scripts: [
...> [{:tool_call, id: "c0", name: "echo", arguments: %{"x" => 1}},
...> {:finish, :tool_calls}],
...> [{:text, "done"}, {:finish, :stop}]
...> ]
...> ],
...> tools: [ALLM.tool(
...> name: "echo",
...> description: "",
...> schema: %{},
...> handler: fn args -> {:ok, args} end
...> )]
...> )
iex> {:ok, stream} = ALLM.stream(engine, [ALLM.user("echo please")])
iex> events = Enum.to_list(stream)
iex> Enum.count(events, &match?({:chat_completed, _}, &1))
1
"""
@spec stream(Engine.t(), Thread.t() | [Message.t()], keyword()) ::
{:ok, Enumerable.t()}
| {:error, EngineError.t() | AdapterError.t() | ValidationError.t()}
def stream(engine, thread_or_messages, opts \\ []),
do: ALLM.Chat.stream(engine, thread_or_messages, opts)
@doc """
Generate one or more images against the engine's `:image_adapter`. See
spec §35.4, §35.5.
Layer C façade. Two input shapes:
* Binary `prompt` — sugar over `ALLM.image_request/2`. Opts merge into
the built `%ALLM.ImageRequest{operation: :generate}`.
* Pre-built `%ALLM.ImageRequest{}` — dispatched verbatim.
## Adapter-presence gate
Returns `{:error, %ALLM.Error.EngineError{reason: :no_image_adapter}}`
when `engine.image_adapter == nil`. This is the first gate; no other
validation runs (per Phase 14.2 design Decision #5).
## Validation policy (Decision #13)
The façade does NOT call `ALLM.Validate.image_request/1`. Caller-opt-in
only — mirrors `request/2`'s no-validate precedent. A manually-built
request that the validator would reject (e.g., empty prompt for
`:generate`) still dispatches.
## `request_id` precedence (Decision #7)
`opts[:request_id]` wins over an auto-generated id from
`ALLM.Telemetry.request_id/0`. The id is forwarded to the adapter via
`opts[:request_id]`. After the call, `response.request_id` is filled
from the forwarded id IFF the adapter left it `nil`; an
adapter-populated `:request_id` (e.g. provider's `x-request-id`
header) is preserved.
## `:stream` opt is silently dropped
Image generation is non-streaming in v0.3 (phasing principle #2).
Passing `stream: true` does not error — the opt is ignored.
## Unknown opts
Forwarded to the adapter via `opts` (matches the chat-side
`Engine.resolve_params/2` pass-through pattern).
## Examples
iex> img = ALLM.Image.from_binary(<<137, 80, 78, 71>>, "image/png")
iex> engine = ALLM.Engine.new(
...> image_adapter: ALLM.Providers.FakeImages,
...> adapter_opts: [image_script: [{:ok, [img]}]]
...> )
iex> {:ok, %ALLM.ImageResponse{images: [_]}} = ALLM.generate_image(engine, "a kestrel")
iex> :ok
:ok
iex> engine = ALLM.Engine.new()
iex> {:error, %ALLM.Error.EngineError{reason: :no_image_adapter}} =
...> ALLM.generate_image(engine, "a kestrel")
iex> :ok
:ok
"""
@spec generate_image(Engine.t(), String.t() | ImageRequest.t(), keyword()) ::
{:ok, ImageResponse.t()}
| {:error, EngineError.t() | ValidationError.t() | ImageAdapterError.t()}
def generate_image(engine, prompt_or_request, opts \\ [])
def generate_image(%Engine{} = engine, prompt, opts) when is_binary(prompt) do
request = ALLM.image_request(prompt, drop_request_opts(opts))
do_generate_image(engine, request, opts)
end
def generate_image(%Engine{} = engine, %ImageRequest{} = request, opts) do
do_generate_image(engine, request, opts)
end
@doc """
Edit a base image (optionally with a mask) against the engine's
`:image_adapter`. See spec §35.4, §35.5.
Three call shapes (Phase 14.2 design Decision #6):
* `edit_image(engine, base_image, prompt)` — single base, no mask;
builds `%ImageRequest{operation: :edit, input_images: [base], mask: nil}`.
* `edit_image(engine, [base, mask], prompt)` — 2-element list; both
images become `:input_images`, `:mask` stays `nil`. The list form
does NOT auto-promote the second element to `:mask` — use the
explicit `mask:` keyword for that.
* `edit_image(engine, base, prompt, mask: mask)` — explicit mask
keyword; builds `input_images: [base], mask: mask`.
Returns `{:error, %EngineError{reason: :no_image_adapter}}` when the
engine has no image adapter (first gate, before any other validation).
Forwards opts (n, size, quality, etc.) onto the request struct via
`ALLM.ImageRequest.new/1`. See `generate_image/3` for the full
`request_id` and `:stream`-drop semantics — they apply identically.
## Examples
iex> img = ALLM.Image.from_binary(<<137, 80, 78, 71>>, "image/png")
iex> engine = ALLM.Engine.new(
...> image_adapter: ALLM.Providers.FakeImages,
...> adapter_opts: [image_script: [{:ok, [img]}]]
...> )
iex> base = ALLM.Image.from_binary(<<1, 2, 3>>, "image/png")
iex> {:ok, %ALLM.ImageResponse{images: [_]}} =
...> ALLM.edit_image(engine, base, "make sky pink")
iex> :ok
:ok
"""
@spec edit_image(Engine.t(), Image.t() | [Image.t()], String.t(), keyword()) ::
{:ok, ImageResponse.t()}
| {:error, EngineError.t() | ValidationError.t() | ImageAdapterError.t()}
def edit_image(engine, image_or_list, prompt, opts \\ [])
def edit_image(%Engine{} = engine, %Image{} = base, prompt, opts) when is_binary(prompt) do
{mask, rest} = Keyword.pop(opts, :mask)
request_opts = drop_request_opts(rest)
request =
ImageRequest.new(
Keyword.merge(request_opts,
operation: :edit,
prompt: prompt,
input_images: [base],
mask: mask
)
)
do_generate_image(engine, request, opts)
end
def edit_image(%Engine{} = engine, images, prompt, opts)
when is_list(images) and is_binary(prompt) do
request_opts = drop_request_opts(opts)
request =
ImageRequest.new(
Keyword.merge(request_opts,
operation: :edit,
prompt: prompt,
input_images: images,
mask: nil
)
)
do_generate_image(engine, request, opts)
end
@doc """
Build variations of a single input image against the engine's
`:image_adapter`. See spec §35.4, §35.5.
Builds `%ImageRequest{operation: :variation, input_images: [image],
prompt: nil}` and forwards opts. Returns
`{:error, %EngineError{reason: :no_image_adapter}}` when the engine
has no image adapter (first gate).
See `generate_image/3` for the full `request_id` and `:stream`-drop
semantics.
## Examples
iex> img = ALLM.Image.from_binary(<<137, 80, 78, 71>>, "image/png")
iex> engine = ALLM.Engine.new(
...> image_adapter: ALLM.Providers.FakeImages,
...> adapter_opts: [image_script: [{:ok, [img]}]]
...> )
iex> input = ALLM.Image.from_binary(<<1, 2, 3>>, "image/png")
iex> {:ok, %ALLM.ImageResponse{images: [_]}} = ALLM.image_variations(engine, input)
iex> :ok
:ok
"""
@spec image_variations(Engine.t(), Image.t(), keyword()) ::
{:ok, ImageResponse.t()}
| {:error, EngineError.t() | ValidationError.t() | ImageAdapterError.t()}
def image_variations(engine, image, opts \\ [])
def image_variations(%Engine{} = engine, %Image{} = image, opts) do
request_opts = drop_request_opts(opts)
request =
ImageRequest.new(
Keyword.merge(request_opts,
operation: :variation,
input_images: [image],
prompt: nil
)
)
do_generate_image(engine, request, opts)
end
# ---------------------------------------------------------------------------
# Internals — Phase 14.3 telemetry + preflight + retry wrap (§35.9, §6.1).
# ---------------------------------------------------------------------------
# Image-side retryable reason atoms (Phase 14.3 design Decision #4 +
# Decision #9 augmentation). Engaged by both `augment_image_retry_policy/1`
# (extends the chat-side `default_policy/0.retry_on` at the call site) and
# `dispatch_image_attempt/3` (the per-attempt closure passed to
# `Retry.run/3`). Placed at the top of the image-internals block per the
# codebase convention (module attributes live above their consumer
# cluster, not interleaved between private helpers).
@retryable_image_reasons [:rate_limited, :provider_unavailable, :timeout, :network_error]
# Drop call-control opts that would collide with `ImageRequest` struct
# fields when merged into `ImageRequest.new/1`. These are call/dispatch-site
# concerns, not request fields — `ImageRequest.new/1` would `KeyError` on
# them via `struct!/2`. `:mask` is consumed at the `edit_image/4` boundary;
# the rest are forwarded to the adapter via `dispatch_opts`.
defp drop_request_opts(opts) when is_list(opts) do
Keyword.drop(opts, [
:request_id,
:stream,
:mask,
:adapter_opts,
:request_timeout,
:retry,
:api_key,
:telemetry_metadata
])
end
# Phase 14.3: wrap the entire body in `Telemetry.span(:image, ...)` so
# the `:start` event ALWAYS fires (even when the adapter is missing or
# preflight rejects) — observability is uniform. Inside the span, the
# `with`-chain runs in this order (per design Decision #15):
#
# (1) adapter-presence gate (`engine.image_adapter != nil`) — FIRST
# (2) `Capability.preflight_image/2` (no-op when catalog absent)
# (3) `Retry.run/3`-wrapped dispatch
defp do_generate_image(%Engine{} = engine, %ImageRequest{} = request, opts) do
request_id = Keyword.get(opts, :request_id) || ALLM.Telemetry.request_id()
resolved_model = Engine.resolve_model(engine, opts)
start_metadata = %{
request_id: request_id,
engine: engine,
model: resolved_model,
operation: request.operation,
n: request.n
}
ALLM.Telemetry.span(:image, start_metadata, fn ->
result = do_generate_image_body(engine, request, opts, request_id, resolved_model)
{extras, measurements} = image_stop_extras(result)
{result, measurements, extras}
end)
end
defp do_generate_image_body(
%Engine{image_adapter: nil},
_request,
_opts,
_request_id,
_resolved_model
) do
# Adapter-presence gate fires FIRST — per design Decision #15 and
# spec line 817, this short-circuits BEFORE preflight so a missing
# adapter + tools-disabled-model surfaces `:no_image_adapter`,
# not `:unsupported_capability`.
{:error, EngineError.new(:no_image_adapter)}
end
defp do_generate_image_body(
%Engine{image_adapter: adapter} = engine,
%ImageRequest{} = request,
opts,
request_id,
resolved_model
) do
with :ok <- ALLM.Capability.preflight_image(resolved_model, request) do
# Stamp the engine-resolved model onto the request so adapters that
# gate / shape the wire body off `request.model` (e.g. OpenAI's
# multipart `:edit` / `:variation`, which require `model` on the
# wire) see it. Mirrors the chat-side `StreamRunner.resolve_request_model/3`
# at `lib/allm/stream_runner.ex:209-213`. Preserves an explicitly-set
# request model.
request = %{request | model: request.model || resolved_model}
# Concat engine.adapter_opts with call-site adapter_opts. `Keyword.get/2`
# returns the FIRST occurrence on duplicate keys, so engine wins on
# collision. Mirrors the chat-side `StreamRunner.build_dispatch_opts/2`
# adapter_opts concat at `lib/allm/stream_runner.ex:229-230` (NOT
# `Keyword.merge/2`, which would have OPPOSITE precedence — call wins).
merged_adapter_opts =
engine.adapter_opts ++ Keyword.get(opts, :adapter_opts, [])
dispatch_opts =
opts
|> Keyword.drop([:stream])
|> Keyword.put(:request_id, request_id)
|> Keyword.put(:adapter_opts, merged_adapter_opts)
# Pass `telemetry_metadata` to `Retry.run/3` so any
# `[:allm, :adapter, :retry]` event shares the surrounding
# `:image` span's correlation id (Decision #15).
telemetry_metadata = %{
request_id: request_id,
model: resolved_model,
operation: request.operation
}
# Augment the engine's retry policy with the image-side
# retryable reasons. v0.2's `default_policy/0.retry_on` is HTTP-
# status-coded (`[429, 500, 502, 503, 504, :timeout]`); image
# adapters surface closed-enum atoms (`:rate_limited`,
# `:provider_unavailable`, `:timeout`, `:network_error`) via
# `%ImageAdapterError{}`. We extend `retry_on` here at the call
# site so `ALLM.Retry.error_matches?/2` recognises image atoms,
# without modifying the chat-side default (Decision #9 — "default
# policy reused unchanged").
policy = augment_image_retry_policy(engine.retry)
result =
ALLM.Retry.run(policy, telemetry_metadata, fn ->
dispatch_image_attempt(adapter, request, dispatch_opts)
end)
case result do
{:ok, %ImageResponse{request_id: nil} = response} ->
{:ok, %{response | request_id: request_id}}
{:ok, %ImageResponse{} = response} ->
{:ok, response}
{:error, _} = err ->
err
end
end
end
# Per-attempt closure for `Retry.run/3`. Engages the retry loop on the
# four documented retryable `ImageAdapterError` reasons (see
# `@retryable_image_reasons` at the top of this internals block);
# surfaces any other error verbatim with NO retry attempt (per design
# Decision #4 error-table + Phase 14.3 spec line 358).
defp augment_image_retry_policy(false), do: :no_retry
defp augment_image_retry_policy(retry_config) do
case ALLM.Retry.materialize(retry_config) do
:no_retry ->
:no_retry
%{retry_on: existing} = policy ->
# Append image atoms to the chat-side retry_on. Idempotent —
# `Enum.uniq/1` keeps the list stable on repeat calls.
%{policy | retry_on: Enum.uniq(existing ++ @retryable_image_reasons)}
end
end
defp dispatch_image_attempt(adapter, request, dispatch_opts) do
case adapter.generate(request, dispatch_opts) do
{:ok, _} = ok ->
ok
{:error, %ImageAdapterError{reason: reason} = err}
when reason in @retryable_image_reasons ->
delay = err.retry_after_ms || 0
{:retry, delay, err}
{:error, _} = err ->
err
end
end
# Compute `:stop`-event extras per design Decision #8: `image_count` is
# a MEASUREMENT (numeric — the actual number of images on success /
# `0` on error). `:usage`, `:response`, `:error` are METADATA (structured
# context). Returns `{metadata_extras, extra_measurements}`.
defp image_stop_extras({:ok, %ImageResponse{} = response}) do
metadata = %{
usage: response.usage,
response: response,
error: nil
}
measurements = %{image_count: length(response.images)}
{metadata, measurements}
end
defp image_stop_extras({:error, error}) do
metadata = %{
usage: nil,
response: nil,
error: error
}
measurements = %{image_count: 0}
{metadata, measurements}
end
end