# SPDX-FileCopyrightText: 2024 Isaak Tsalicoglou <isaak@waseigo.com>
# SPDX-License-Identifier: Apache-2.0
defmodule Ollamex do
@moduledoc """
Ollamex is an Elixir wrapper of [ollama](https://ollama.ai/)'s REST API. This is the main module that contains high-level functions that the user will typically interact with.
Ollamex is written based on the [ollama REST API documentation](https://github.com/jmorganca/ollama/blob/main/docs/api.md) for the following endpoints:
* [List Local Models](https://github.com/jmorganca/ollama/blob/main/docs/api.md#list-local-models)
* [Generate a completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion)
* [Generate a chat completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-chat-completion)
* [Generate embeddings](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-embeddings)
Tested with ollama version 0.1.20.
## Examples
### API initialization
```elixir
iex> api = Ollamex.API.new()
%Ollamex.API{
uri: "http://localhost:11434/api",
models: [
%Ollamex.LLModel{
name: "llama2:latest",
digest: "78e26419b4469263f75331927a00a0284ef6544c1975b826b15abdaef17bb962",
modified_at: "2024-01-09T22:24:14.925918123+02:00",
size: 3826793677,
details: %{
"families" => ["llama"],
"family" => "llama",
"format" => "gguf",
"parameter_size" => "7B",
"quantization_level" => "Q4_0"
},
modelfile: nil,
parameters: nil,
template: nil
},
%Ollamex.LLModel{
name: "mistral:latest",
digest: "61e88e884507ba5e06c49b40e6226884b2a16e872382c2b44a42f2d119d804a5",
modified_at: "2024-01-08T17:49:54.570542101+02:00",
size: 4109865159,
details: %{
"families" => ["llama"],
"family" => "llama",
"format" => "gguf",
"parameter_size" => "7B",
"quantization_level" => "Q4_0"
},
modelfile: nil,
parameters: nil,
template: nil
}
],
timeout: 120000,
errors: []
}
iex> Ollamex.API.list_models(api)
["llama2:latest", "mistral:latest"]
```
### Generate a completion (`/generate` endpoint)
```elixir
iex> p = %Ollamex.PromptRequest{model: "mistral:latest", prompt: "Explain using a simple paragraph like I'm 5 years old: Why is the sky not black like space?"}
%Ollamex.PromptRequest{
model: "mistral:latest",
prompt: "Explain using a simple paragraph like I'm 5 years old: Why is the sky not black like space?",
raw: false,
format: nil,
stream: true,
options: nil,
images: []
}
iex> Ollamex.generate_with_timeout(p, api)
{:ok,
%Ollamex.LLMResponse{
context: [733, 16289, 28793, ...],
created_at: "2024-01-10T19:23:12.943599755Z",
done: true,
eval_count: 100,
eval_duration: 16850322000,
model: "mistral:latest",
prompt_eval_count: 33,
prompt_eval_duration: 2865358000,
response: " The sky isn't black like space because it has [...]
pretty colors, and nighttime with stars and the moon!",
total_duration: 24862993618,
message: nil,
errors: nil
}}
```
### Generate a chat completion (`/chat` endpoint)
```elixir
messages =
[]
|> Ollamex.ChatMessage.append("user", "why is the sky blue?")
|> Ollamex.ChatMessage.append("assistant", "due to rayleigh scattering!")
|> Ollamex.ChatMessage.append("user", "how is that different to Mie scattering?")
|> Enum.map(&Map.from_struct(&1))
iex>
[
%{content: "why is the sky blue?", images: [], role: "user"},
%{content: "due to rayleigh scattering!", images: [], role: "assistant"},
%{
content: "how is that different to Mie scattering?",
images: [],
role: "user"
}
]
iex> cr = %Ollamex.ChatRequest{messages: messages, model: "llama2", stream: true}
%Ollamex.ChatRequest{
model: "llama2",
messages: [
%{content: "why is the sky blue?", images: [], role: "user"},
%{content: "due to rayleigh scattering!", images: [], role: "assistant"},
%{
content: "how is that different to Mie scattering?",
images: [],
role: "user"
}
],
format: nil,
options: nil,
template: nil,
stream: true
}
iex> Ollamex.chat_with_timeout(cr, api)
{:ok,
%Ollamex.LLMResponse{
context: nil,
created_at: "2024-01-10T19:29:05.771371091Z",
done: true,
eval_count: 515,
eval_duration: 83246108000,
model: "llama2",
prompt_eval_count: 61,
prompt_eval_duration: 7234332000,
response: nil,
total_duration: 95606709630,
message: %{
content: "Mie scattering is [...] while Rayleigh scattering
is responsible for the reddening of sunlight at sunrise
and sunset.",
role: "assistant"
},
errors: nil
}}
```
### Generate embeddings (`/embeddings` endpoint)
```elixir
iex> p = %Ollamex.PromptRequest{model: "llama2", prompt: "Explain the main features and benefits of the Elixir programming language in a single, concise paragraph."}
%Ollamex.PromptRequest{
model: "llama2",
prompt: "Explain the main features and benefits of the Elixir programming language in a single, concise paragraph.",
raw: false,
format: nil,
stream: true,
options: nil,
images: []
}
iex> Ollamex.embeddings(p, api)
%Ollamex.LLMResponse{
context: nil,
created_at: nil,
done: nil,
eval_count: nil,
eval_duration: nil,
model: "llama2",
prompt_eval_count: nil,
prompt_eval_duration: nil,
response: nil,
total_duration: nil,
message: nil,
embedding: [-1.6268974542617798, -1.4279855489730835, -0.46105068922042847,
0.7557640671730042, -0.17748284339904785, ...],
errors: nil
}
```
"""
@moduledoc since: "0.1.0"
alias Ollamex.{Helpers, API, LLMResponse, PromptRequest, ChatRequest}
defp prompt(request, endpoint, %API{} = api)
when is_struct(request) and is_bitstring(endpoint) do
req = Req.new(base_url: api.uri)
results =
Req.post(req,
url: Path.join("/", endpoint),
json: Map.from_struct(request),
receive_timeout: api.timeout,
into: []
)
case results do
{:error, reason} ->
%LLMResponse{errors: reason}
{:ok, %Req.Response{status: 200, body: body}} ->
Helpers.handle_response(body)
end
end
@doc """
[Generate a completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion) using the `/generate` endpoint of the ollama API.
Note that this doesn't guard against situations in which the LLM keeps generating nonsense forever, such as a stream of newlines or tab characters.
"""
@doc since: "0.1.0"
def generate(%PromptRequest{} = request, %API{} = api) do
prompt(request, "generate", api)
end
@doc """
Same functionality as `generate/2`, but will shutdown the task after the provided `timeout` (in milliseconds, default value `120_000`).
"""
@doc since: "0.1.0"
def generate_with_timeout(%PromptRequest{} = request, %API{} = api, timeout \\ 120_000)
when is_integer(timeout) do
Helpers.create_task(&generate/2, [request, api])
|> Helpers.yield_or_timeout_and_shutdown(timeout)
end
@doc """
[Generate a chat completion](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-chat-completion) using the `/chat` endpoint of the ollama API.
Note that this doesn't guard against situations in which the LLM keeps generating nonsense forever, such as a stream of newlines or tab characters.
"""
@doc since: "0.1.0"
def chat(%ChatRequest{} = request, %API{} = api) do
prompt(request, "chat", api)
end
@doc """
Same functionality as `chat/2`, but will shutdown the task after the provided `timeout` (in milliseconds, default value `120_000`).
"""
@doc since: "0.1.0"
def chat_with_timeout(%ChatRequest{} = request, %API{} = api, timeout \\ 120_000)
when is_integer(timeout) do
Helpers.create_task(&chat/2, [request, api])
|> Helpers.yield_or_timeout_and_shutdown(timeout)
end
@doc """
[Generate embeddings](https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-embeddings) from an LLM using the `/embeddings` endpoint of the ollama API.
"""
@doc since: "0.2.0"
def embeddings(%PromptRequest{} = request, %API{} = api) do
r = prompt(request, "embeddings", api)
%LLMResponse{errors: errors} = r
case errors do
nil -> %{r | model: request.model}
_ -> r
end
end
@doc """
Same functionality as `embeddings/2`, but will shutdown the task after the provided `timeout` (in milliseconds, default value `120_000`).
"""
@doc since: "0.2.0"
def embeddings_with_timeout(%PromptRequest{} = request, %API{} = api, timeout \\ 120_000)
when is_integer(timeout) do
Helpers.create_task(&embeddings/2, [request, api])
|> Helpers.yield_or_timeout_and_shutdown(timeout)
end
end