lib/indexes/vectorstore_server.ex

defmodule LangChain.VectorStore do
  @moduledoc """
    ## VectorStore Genserver, provides all the services for storing and searching vectors

  You can specify a provider when you launch the GenServer in your Application tree,
  so you can have multiple VectorStore servers running in your application, each with a different provider.

    ## options:
    :provider           -- the actual vector db provider you are using, must implement the VectorStore.Provider protocol
    :embed_documents    -- optional function for embedding multiple docs presented as strings
    :embed_query        -- optional function for embedding a single query when presented as a string
  """

  use GenServer
  alias LangChain.VectorStore.Provider
  require Logger

  def start_link(opts \\ []) do
    provider = Keyword.get(opts, :provider) || default_provider()
    embed_documents = Keyword.get(opts, :embed_documents) || default_embed_documents()
    embed_query = Keyword.get(opts, :embed_query) || default_embed_query()
    GenServer.start_link(__MODULE__, {provider, embed_documents, embed_query}, opts)
  end

  defp default_provider do
    IO.warn(
      "No :provider option specified, will fallback to default provider from the application environment defined in :vector_store_provider."
    )

    Application.get_env(:lang_chain, :vector_store_provider)
  end

  defp default_embed_documents do
    fn
      _, _ -> []
    end
  end

  defp default_embed_query do
    fn
      _, _ -> []
    end
  end

  def init({provider, embed_documents, embed_query}) do
    state = %{
      provider: provider,
      embed_documents: embed_documents,
      embed_query: embed_query
    }

    {:ok, state}
  end

  # Public API
  def add_documents(pid, document_list) do
    GenServer.call(pid, {:add_documents, document_list})
  end

  @doc """
  Add a list of vectors to the vector store.
  """
  def add_vectors(pid, vector_list) do
    GenServer.call(pid, {:add_vectors, vector_list})
  end

  @doc """
  perform a similarity search on the vector store
  if query is a string it will be run through embed_query first
  """
  def similarity_search(pid, query, k, filter) when is_binary(query) do
    GenServer.call(pid, {:similarity_search_string, query, k, filter})
  end

  def similarity_search(pid, query, k, filter) when is_list(query) do
    GenServer.call(pid, {:similarity_search, query, k, filter})
  end

  @doc """
  perform a similarity search on the vector store and return score
  if query is a string it will be run through embed_query first
  """
  def similarity_search_with_score(pid, query, k, filter) when is_binary(query) do
    GenServer.call(pid, {:similarity_search_with_score_string, query, k, filter})
  end

  def similarity_search_with_score(pid, query, k, filter) when is_list(query) do
    GenServer.call(pid, {:similarity_search_with_score, query, k, filter})
  end

  @doc """
  load a vector store from a directory
  """
  def load(pid, directory, embeddings) do
    GenServer.call(pid, {:load, directory, embeddings})
  end

  # Callbacks
  def handle_call({:add_documents, document_list}, _from, state) do
    embeddings = state.embed_documents.(document_list, state.provider)
    result = _add_vectors(state, embeddings)
    {:reply, {:ok, result}, state}
  end

  def handle_call({:add_vectors, vector_list}, _from, state) do
    result = _add_vectors(state, vector_list)
    {:reply, {:ok, result}, state}
  end

  def handle_call({:similarity_search_string, query, k, filter}, _from, state) do
    embedding = state.embed_query.(query, state.provider)
    result = _similarity_search(state, embedding, k, filter)
    {:reply, result, state}
  end

  def handle_call({:similarity_search_with_score_string, query, k, filter}, _from, state) do
    embedding = state.embed_query.(query, state.provider)
    result = _similarity_search_with_score(state, embedding, k, filter)
    {:reply, result, state}
  end

  def handle_call({:similarity_search_with_score, query, k, filter}, _from, state) do
    result = _similarity_search_with_score(state, query, k, filter)
    {:reply, result, state}
  end

  def handle_call({:similarity_search, query, k, filter}, _from, state) do
    result = _similarity_search(state, query, k, filter)
    {:reply, result, state}
  end

  def handle_call({:load, directory, embeddings}, _from, state) do
    new_state = _load(state, directory, embeddings)
    {:reply, :ok, new_state}
  end

  # Private API
  defp _add_vectors(state, vector_list) do
    try do
      # Logger.debug("why no state???")
      Provider.add_vectors(state.provider, vector_list)
    rescue
      error ->
        Logger.error("Error occurred: #{inspect(error)}")
    end

    state
  end

  defp _similarity_search(state, query, k, filter) do
    Provider.similarity_search(state.provider, query, k, filter)
  end

  defp _similarity_search_with_score(state, query, k, filter) do
    Provider.similarity_search_with_score(state.provider, query, k, filter)
  end

  defp _load(state, directory, embeddings) do
    Provider.load(state.provider, directory, embeddings)
  end
end