lib/ex_llama.ex

defmodule ExLLama do
  def load_model(path), do: ExLLama.Model.load_from_file(path)
  def load_model(path, %ExLLama.ModelOptions{} = opts), do: ExLLama.Model.load_from_file(path, opts)

  def create_session(model), do: ExLLama.Model.create_session(model)
  def create_session(model, options), do: ExLLama.Model.create_session(model, options)

  def advance_context(session, content), do: ExLLama.Session.advance_context(session, content)
  def completion(session, max_tokens, stop), do: ExLLama.Session.completion(session, max_tokens, stop)


  @default_choices 1
  @default_max_tokens 512

  def chat_completion(model, thread, options) do

    so = cond do
      x = options[:session_options] -> put_in(x, [:seed], options[:seed])
      x = options[:seed] -> [seed: x]
      :else -> nil
    end
    options = update_in(options || [], [:add_generation_prompt],
      fn
        x when is_nil(x) -> true
        x -> x
      end
    )
    session_options = ExLLama.SessionOptions.new(so)
    seed = session_options.seed
    choices = options[:choices] || @default_choices
    max_tokens = options[:max_tokens] || @default_max_tokens
    with {:ok, session} <- ExLLama.create_session(model, session_options),
         {:ok, thread_context} <- ExLLama.ChatTemplate.to_context(thread, model, options),
         {:ok, _} <- ExLLama.Session.set_context(session, thread_context),
         {:ok, prompt_tokens} = ExLLama.Session.context_size(session) do
      choices = Enum.map(1..choices,
                  fn(_) ->
                    with {:ok, %{content: result, token_length: l}} <- ExLLama.Session.completion(session, max_tokens, nil) do
                      {:ok, {l, result}}
                    end
                  end
                )
                |> Enum.filter(
                     fn
                       {:ok, _} -> true
                       _ -> false
                     end)
                |> Enum.map(fn {:ok, x} -> x end)

      options = (options || [])
                |> put_in([:prompt_tokens], prompt_tokens)
      ExLLama.ChatTemplate.extract_response(choices, model, options)
    end
  end
end