Skip to main content

lib/agent_sea/embeddings.ex

defmodule AgentSea.Embeddings do
  @moduledoc """
  Ties an `AgentSea.Embedder` to an `AgentSea.VectorStore`: embed-and-index text
  documents, then semantic-search by text.

  ## Example

      {:ok, store} = AgentSea.VectorStore.Memory.start_link()

      handle =
        AgentSea.Embeddings.new(
          store_mod: AgentSea.VectorStore.Memory,
          store: store,
          embedder: AgentSea.Embedder.Hashing
        )

      AgentSea.Embeddings.index(handle, [
        %{id: "a", text: "the cat sat on the mat"},
        %{id: "b", text: "quarterly revenue grew 12%"}
      ])

      [%{id: "a"} | _] = AgentSea.Embeddings.search(handle, "where is the cat", 1)
  """

  @enforce_keys [:store_mod, :store, :embedder]
  defstruct [:store_mod, :store, :embedder, embed_opts: []]

  @type t :: %__MODULE__{
          store_mod: module(),
          store: GenServer.server(),
          embedder: module(),
          embed_opts: keyword()
        }

  @type entry :: %{
          required(:id) => term(),
          required(:text) => String.t(),
          optional(:metadata) => map()
        }

  @doc "Build a handle bundling a store + embedder."
  @spec new(keyword()) :: t()
  def new(opts) do
    %__MODULE__{
      store_mod: Keyword.fetch!(opts, :store_mod),
      store: Keyword.fetch!(opts, :store),
      embedder: Keyword.fetch!(opts, :embedder),
      embed_opts: Keyword.get(opts, :embed_opts, [])
    }
  end

  @doc "Embed each entry's text and upsert it into the store."
  @spec index(t(), [entry()]) :: :ok | {:error, term()}
  def index(%__MODULE__{} = handle, entries) do
    texts = Enum.map(entries, &Map.fetch!(&1, :text))

    case handle.embedder.embed(texts, handle.embed_opts) do
      {:ok, vectors} ->
        records =
          entries
          |> Enum.zip(vectors)
          |> Enum.map(fn {entry, vector} ->
            %{
              id: Map.fetch!(entry, :id),
              vector: vector,
              metadata: Map.get(entry, :metadata, %{}),
              text: Map.fetch!(entry, :text)
            }
          end)

        handle.store_mod.upsert(handle.store, records)

      {:error, _reason} = error ->
        error
    end
  end

  @doc "Embed the query text and return the `k` most similar records."
  @spec search(t(), String.t(), pos_integer(), keyword()) ::
          [AgentSea.VectorStore.hit()] | {:error, term()}
  def search(%__MODULE__{} = handle, query, k, opts \\ []) do
    case handle.embedder.embed([query], handle.embed_opts) do
      {:ok, [vector]} -> handle.store_mod.query(handle.store, vector, k, opts)
      {:error, _reason} = error -> error
    end
  end
end