Skip to main content

lib/vettore.ex

defmodule Vettore do
  @moduledoc """
  Vettore public API.

  `Vettore.new/1` creates an ETS-backed collection. The older
  `Vettore.new/0` database-style API remains available for compatibility.
  """

  alias Vettore.{Collection, Distance, Embedding, Result}

  @doc """
  Creates a lightweight ETS-backed compatibility database.

  ## Examples

      iex> db = Vettore.new()
      iex> match?(%Vettore.DB{}, db)
      true
  """
  @spec new() :: Vettore.DB.t()
  def new do
    table =
      :ets.new(:vettore_db, [
        :set,
        :public,
        read_concurrency: true,
        write_concurrency: true
      ])

    %Vettore.DB{table: table}
  end

  @doc """
  Creates an ETS-backed vector collection.

  This is the preferred public constructor for new code. Use `Vettore.new/0`
  only when you need the older compatibility database API.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :cosine)
      iex> collection.metric
      :cosine
  """
  @spec new(keyword()) :: {:ok, Collection.t()} | {:error, term()}
  def new(opts) when is_list(opts), do: Collection.new(opts)

  @doc """
  Saves a collection snapshot.

  Only canonical ETS state is written. Native index state is rebuilt when a
  snapshot is loaded.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> path = Path.join(System.tmp_dir!(), "vettore_snapshot_example.ets")
      iex> Vettore.snapshot(collection, path)
      :ok
      iex> File.rm(path)
      :ok
  """
  @spec snapshot(Collection.t(), Path.t()) :: :ok | {:error, term()}
  def snapshot(%Collection{} = collection, path) when is_binary(path) do
    collection.store_mod.snapshot(collection.store_state, path)
  end

  def snapshot(_collection, _path), do: {:error, :invalid_snapshot}

  @doc """
  Loads a collection snapshot.

  Pass options such as `index: :flat` or `index: :hnsw` to rebuild the loaded
  collection with a different index.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> :ok = Vettore.put(collection, %{id: "a", vector: [1.0, 2.0]})
      iex> path = Path.join(System.tmp_dir!(), "vettore_load_example.ets")
      iex> :ok = Vettore.snapshot(collection, path)
      iex> {:ok, loaded} = Vettore.load_snapshot(path)
      iex> {:ok, embedding} = Vettore.get(loaded, "a")
      iex> File.rm(path)
      iex> embedding.id
      "a"
  """
  @spec load_snapshot(Path.t(), keyword()) :: {:ok, Collection.t()} | {:error, term()}
  def load_snapshot(path, opts \\ []), do: Collection.load_snapshot(path, opts)

  @doc """
  Inserts one embedding into a collection.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> Vettore.put(collection, %{id: "a", vector: [0.0, 0.0]})
      :ok
  """
  @spec put(Collection.t(), Embedding.t() | map()) :: :ok | {:error, term()}
  def put(%Collection{} = collection, embedding), do: Collection.put(collection, embedding)

  @doc """
  Inserts many embeddings into a collection.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> Vettore.put_many(collection, [
      ...>   %{id: "a", vector: [0.0, 0.0]},
      ...>   %{id: "b", vector: [1.0, 1.0]}
      ...> ])
      :ok
  """
  @spec put_many(Collection.t(), [Embedding.t() | map()]) :: :ok | {:error, term()}
  def put_many(%Collection{} = collection, embeddings),
    do: Collection.put_many(collection, embeddings)

  @doc """
  Fetches one embedding by id.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> :ok = Vettore.put(collection, %{id: "a", vector: [0.0, 0.0]})
      iex> {:ok, %Vettore.Embedding{id: "a"}} = Vettore.get(collection, "a")
  """
  @spec get(Collection.t(), String.t()) :: {:ok, Embedding.t()} | {:error, term()}
  def get(%Collection{} = collection, id) when is_binary(id) do
    collection.store_mod.get(collection.store_state, id)
  end

  @doc """
  Deletes one embedding by id.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> :ok = Vettore.put(collection, %{id: "a", vector: [0.0, 0.0]})
      iex> Vettore.delete(collection, "a")
      :ok
  """
  @spec delete(Collection.t(), String.t()) :: :ok | {:error, term()}
  def delete(%Collection{} = collection, id) when is_binary(id) do
    with :ok <- collection.store_mod.delete(collection.store_state, id) do
      collection.index_mod.delete(collection, id)
    end
  end

  @doc """
  Returns all embeddings in a collection.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> :ok = Vettore.put(collection, %{id: "a", vector: [0.0, 0.0]})
      iex> {:ok, [%Vettore.Embedding{id: "a"}]} = Vettore.all(collection)
  """
  @spec all(Collection.t()) :: {:ok, [Embedding.t()]} | {:error, term()}
  def all(%Collection{} = collection) do
    collection.store_mod.all(collection.store_state)
  end

  @doc """
  Runs the configured collection search.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2)
      iex> :ok = Vettore.put(collection, %{id: "near", vector: [0.0, 0.0]})
      iex> {:ok, [%Vettore.Result{id: "near"}]} = Vettore.search(collection, [0.0, 0.0], limit: 1)
  """
  @spec search(Collection.t(), [number()], keyword()) :: {:ok, [Result.t()]} | {:error, term()}
  def search(%Collection{} = collection, query, opts \\ []) do
    collection.index_mod.search(collection, query, opts)
  end

  @doc """
  Runs Matryoshka-style funnel search.

  Each stage scores a candidate set with a vector prefix, then final results are
  reranked with the full stored vectors.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 3, metric: :l2, index: :flat)
      iex> :ok =
      ...>   Vettore.put_many(collection, [
      ...>     %{id: "near", vector: [1.0, 0.0, 0.0]},
      ...>     %{id: "far", vector: [-1.0, 0.0, 0.0]}
      ...>   ])
      iex> {:ok, [%Vettore.Result{id: "near"}]} =
      ...>   Vettore.funnel_search(collection, [1.0, 0.0, 0.0],
      ...>     stages: [1, 3],
      ...>     candidates: 2,
      ...>     limit: 1
      ...>   )
  """
  @spec funnel_search(Collection.t(), [number()], keyword()) ::
          {:ok, [Result.t()]} | {:error, term()}
  def funnel_search(%Collection{} = collection, query, opts \\ []),
    do: Collection.funnel_search(collection, query, opts)

  @doc """
  Runs binary sign-bit candidate search followed by exact reranking.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2, index: :flat)
      iex> :ok =
      ...>   Vettore.put_many(collection, [
      ...>     %{id: "near", vector: [1.0, 1.0]},
      ...>     %{id: "far", vector: [-1.0, -1.0]}
      ...>   ])
      iex> {:ok, [%Vettore.Result{id: "near"}]} =
      ...>   Vettore.quantized_search(collection, [1.0, 1.0],
      ...>     candidates: 2,
      ...>     limit: 1
      ...>   )
  """
  @spec quantized_search(Collection.t(), [number()], keyword()) ::
          {:ok, [Result.t()]} | {:error, term()}
  def quantized_search(%Collection{} = collection, query, opts \\ []),
    do: Collection.quantized_search(collection, query, opts)

  @doc """
  Runs ColBERT-style late interaction over multi-vector records.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :inner_product)
      iex> :ok =
      ...>   Vettore.put_many(collection, [
      ...>     %{id: "both_axes", vectors: [[1.0, 0.0], [0.0, 1.0]]},
      ...>     %{id: "one_axis", vectors: [[1.0, 0.0], [-1.0, 0.0]]}
      ...>   ])
      iex> {:ok, [%Vettore.Result{id: "both_axes", score: 2.0} | _]} =
      ...>   Vettore.multi_vector_search(
      ...>     collection,
      ...>     [[1.0, 0.0], [0.0, 1.0]],
      ...>     limit: 1
      ...>   )
  """
  @spec multi_vector_search(Collection.t(), [[number()]], keyword()) ::
          {:ok, [Result.t()]} | {:error, term()}
  def multi_vector_search(%Collection{} = collection, query_vectors, opts \\ []),
    do: Collection.multi_vector_search(collection, query_vectors, opts)

  @doc """
  Runs a hybrid candidate pipeline with final reranking.

  `:generators` accepts atoms or keyword entries. Supported generators:

    * `:funnel` - Matryoshka-style prefix candidate search
    * `:quantized` - binary sign-bit candidate search
    * `:search` - the collection's configured index
    * `:hnsw` - alias for `:search` when the collection uses `index: :hnsw`

  The default final reranker is exact vector scoring. Pass
  `rerank: {:multi_vector, query_vectors}` for ColBERT-style late interaction
  over the union of generated candidates.

  ## Examples

      iex> {:ok, collection} = Vettore.new(dimensions: 2, metric: :l2, index: :flat)
      iex> :ok =
      ...>   Vettore.put_many(collection, [
      ...>     %{id: "near", vector: [1.0, 1.0]},
      ...>     %{id: "far", vector: [-1.0, -1.0]}
      ...>   ])
      iex> {:ok, [%Vettore.Result{id: "near"}]} =
      ...>   Vettore.hybrid_search(collection, [1.0, 1.0],
      ...>     generators: [
      ...>       funnel: [stages: [1, 2], candidates: 2],
      ...>       quantized: [candidates: 2]
      ...>     ],
      ...>     limit: 1
      ...>   )
  """
  @spec hybrid_search(Collection.t(), [number()], keyword()) ::
          {:ok, [Result.t()]} | {:error, term()}
  def hybrid_search(%Collection{} = collection, query, opts \\ []),
    do: Collection.hybrid_search(collection, query, opts)

  @doc """
  Validates and normalizes a query for a collection.
  """
  @spec prepare_query(Collection.t(), [number()]) :: {:ok, [float()]} | {:error, term()}
  def prepare_query(%Collection{} = collection, query),
    do: Collection.prepare_query(collection, query)

  @doc """
  Compatibility collection creation.

  ## Examples

      iex> db = Vettore.new()
      iex> Vettore.create_collection(db, "docs", 2, :cosine)
      {:ok, "docs"}

      iex> Vettore.create_collection(:bad_db, "docs", 2, :cosine)
      {:error, :invalid_arguments}
  """
  @spec create_collection(Vettore.DB.t(), String.t(), pos_integer(), atom(), keyword()) ::
          {:ok, String.t()} | {:error, term()}
  def create_collection(db, name, dimensions, metric, opts \\ [])

  def create_collection(%Vettore.DB{} = db, name, dimensions, metric, opts)
      when is_binary(name) and is_integer(dimensions) and dimensions > 0 do
    metric = normalize_metric(metric)
    index = Keyword.get(opts, :index, if(metric == :hnsw, do: :hnsw, else: :flat))
    metric = if metric == :hnsw, do: :l2, else: metric

    collection_opts = [
      name: name,
      dimensions: dimensions,
      metric: metric,
      index: index,
      store: Keyword.get(opts, :store, :ets),
      normalize: Keyword.get(opts, :normalize, default_normalize(metric)),
      score: Keyword.get(opts, :score, :similarity)
    ]

    with {:ok, collection} <- Collection.new(collection_opts) do
      true = :ets.insert(db.table, {{:collection, name}, collection})
      {:ok, name}
    end
  end

  def create_collection(_db, _name, _dimensions, _metric, _opts), do: {:error, :invalid_arguments}

  @doc """
  Deletes a compatibility collection.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :cosine)
      iex> Vettore.delete_collection(db, "docs")
      {:ok, "docs"}

      iex> Vettore.delete_collection(:bad_db, "docs")
      {:error, :invalid_arguments}
  """
  @spec delete_collection(Vettore.DB.t(), String.t()) ::
          {:ok, String.t()} | {:error, :invalid_arguments}
  def delete_collection(%Vettore.DB{} = db, name) when is_binary(name) do
    with true <- :ets.delete(db.table, {:collection, name}) do
      {:ok, name}
    end
  end

  def delete_collection(_db, _name), do: {:error, :invalid_arguments}

  @doc """
  Inserts one embedding through the compatibility API.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :cosine)
      iex> embedding = %Vettore.Embedding{id: "a", vector: [1.0, 0.0]}
      iex> Vettore.insert(db, "docs", embedding)
      {:ok, "a"}

      iex> Vettore.insert(:bad_db, "docs", %Vettore.Embedding{id: "a", vector: [1.0, 0.0]})
      {:error, :invalid_arguments}
  """
  @spec insert(Vettore.DB.t(), String.t(), Embedding.t()) ::
          {:ok, String.t() | nil} | {:error, term()}
  def insert(%Vettore.DB{} = db, collection_name, %Embedding{} = embedding)
      when is_binary(collection_name) do
    with {:ok, collection} <- fetch_collection(db, collection_name),
         :ok <- put(collection, embedding) do
      {:ok, embedding.id || embedding.value}
    end
  end

  def insert(_db, _collection_name, _embedding), do: {:error, :invalid_arguments}

  @doc """
  Inserts many embeddings through the compatibility API.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :l2)
      iex> embeddings = [
      ...>   %Vettore.Embedding{id: "a", vector: [0.0, 0.0]},
      ...>   %Vettore.Embedding{id: "b", vector: [1.0, 1.0]}
      ...> ]
      iex> Vettore.batch(db, "docs", embeddings)
      {:ok, ["a", "b"]}

      iex> Vettore.batch(:bad_db, "docs", embeddings)
      {:error, :invalid_arguments}
  """
  @spec batch(Vettore.DB.t(), String.t(), [Embedding.t()]) ::
          {:ok, [String.t() | nil]} | {:error, term()}
  def batch(%Vettore.DB{} = db, collection_name, embeddings)
      when is_binary(collection_name) and is_list(embeddings) do
    with {:ok, collection} <- fetch_collection(db, collection_name),
         :ok <- put_many(collection, embeddings) do
      {:ok, Enum.map(embeddings, &(&1.id || &1.value))}
    end
  end

  def batch(_db, _collection_name, _embeddings), do: {:error, :invalid_arguments}

  @doc """
  Fetches one embedding by id through the compatibility API.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :cosine)
      iex> {:ok, "a"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "a", vector: [1.0, 0.0]})
      iex> {:ok, %Vettore.Embedding{id: "a"}} = Vettore.get_by_value(db, "docs", "a")

      iex> Vettore.get_by_value(db, "docs", "missing")
      {:error, :not_found}
  """
  @spec get_by_value(Vettore.DB.t(), String.t(), String.t()) ::
          {:ok, Embedding.t()} | {:error, term()}
  def get_by_value(%Vettore.DB{} = db, collection_name, id)
      when is_binary(collection_name) and is_binary(id) do
    with {:ok, collection} <- fetch_collection(db, collection_name) do
      get(collection, id)
    end
  end

  def get_by_value(_db, _collection_name, _id), do: {:error, :invalid_arguments}

  @doc """
  Fetches the first embedding matching a normalized vector.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :cosine)
      iex> {:ok, "a"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "a", vector: [1.0, 0.0]})
      iex> {:ok, %Vettore.Embedding{id: "a"}} = Vettore.get_by_vector(db, "docs", [1.0, 0.0])

      iex> Vettore.get_by_vector(db, "docs", [0.0, 1.0])
      {:error, :not_found}
  """
  @spec get_by_vector(Vettore.DB.t(), String.t(), [number()]) ::
          {:ok, Embedding.t()} | {:error, term()}
  def get_by_vector(%Vettore.DB{} = db, collection_name, vector)
      when is_binary(collection_name) and is_list(vector) do
    with {:ok, collection} <- fetch_collection(db, collection_name),
         {:ok, embeddings} <- all(collection),
         {:ok, prepared} <- prepare_query(collection, vector) do
      embeddings
      |> Enum.find(fn embedding -> embedding.vector == prepared end)
      |> case do
        nil -> {:error, :not_found}
        embedding -> {:ok, embedding}
      end
    end
  end

  def get_by_vector(_db, _collection_name, _vector), do: {:error, :invalid_arguments}

  @doc """
  Deletes one embedding by id through the compatibility API.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :l2)
      iex> {:ok, "a"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "a", vector: [0.0, 0.0]})
      iex> Vettore.delete(db, "docs", "a")
      {:ok, "a"}

      iex> Vettore.delete(:bad_db, "docs", "a")
      {:error, :invalid_arguments}
  """
  @spec delete(Vettore.DB.t(), String.t(), String.t()) ::
          {:ok, String.t()} | {:error, term()}
  def delete(%Vettore.DB{} = db, collection_name, id)
      when is_binary(collection_name) and is_binary(id) do
    with {:ok, collection} <- fetch_collection(db, collection_name),
         :ok <- delete(collection, id) do
      {:ok, id}
    end
  end

  def delete(_db, _collection_name, _id), do: {:error, :invalid_arguments}

  @doc """
  Returns all compatibility collection records as legacy tuples.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :l2)
      iex> {:ok, "a"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "a", vector: [0.0, 0.0], metadata: %{kind: :origin}})
      iex> {:ok, [{"a", [0.0, 0.0], %{kind: :origin}}]} = Vettore.get_all(db, "docs")

      iex> Vettore.get_all(:bad_db, "docs")
      {:error, :invalid_arguments}
  """
  @spec get_all(Vettore.DB.t(), String.t()) ::
          {:ok, [{String.t(), [float()], map() | nil}]} | {:error, term()}
  def get_all(%Vettore.DB{} = db, collection_name) when is_binary(collection_name) do
    with {:ok, collection} <- fetch_collection(db, collection_name),
         {:ok, embeddings} <- all(collection) do
      {:ok, Enum.map(embeddings, &{&1.id, &1.vector, &1.metadata})}
    end
  end

  def get_all(_db, _collection_name), do: {:error, :invalid_arguments}

  @doc """
  Searches a compatibility collection.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :cosine)
      iex> {:ok, "a"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "a", vector: [1.0, 0.0]})
      iex> {:ok, [{"a", score}]} = Vettore.similarity_search(db, "docs", [1.0, 0.0], limit: 1)
      iex> score
      1.0

      iex> Vettore.similarity_search(:bad_db, "docs", [1.0, 0.0])
      {:error, :invalid_arguments}
  """
  @spec similarity_search(Vettore.DB.t(), String.t(), [number()], keyword()) ::
          {:ok, [{String.t(), float()}]} | {:error, term()}
  def similarity_search(db, collection_name, query, opts \\ [])

  def similarity_search(%Vettore.DB{} = db, collection_name, query, opts)
      when is_binary(collection_name) and is_list(query) and is_list(opts) do
    with {:ok, collection} <- fetch_collection(db, collection_name),
         {:ok, results} <- search(collection, query, opts) do
      {:ok, Enum.map(results, &{&1.id, &1.score})}
    end
  end

  def similarity_search(_db, _collection_name, _query, _opts), do: {:error, :invalid_arguments}

  @doc """
  Applies MMR reranking to compatibility search results.

  ## Examples

      iex> db = Vettore.new()
      iex> {:ok, "docs"} = Vettore.create_collection(db, "docs", 2, :cosine)
      iex> {:ok, "a"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "a", vector: [1.0, 0.0]})
      iex> {:ok, "b"} = Vettore.insert(db, "docs", %Vettore.Embedding{id: "b", vector: [0.0, 1.0]})
      iex> Vettore.rerank(db, "docs", [{"a", 0.9}, {"b", 0.8}], limit: 1)
      {:ok, [{"a", 0.9}]}

      iex> Vettore.rerank(:bad_db, "docs", [{"a", 0.9}])
      {:error, :invalid_arguments}
  """
  @spec rerank(Vettore.DB.t(), String.t(), [{String.t(), float()}], keyword()) ::
          {:ok, [{String.t(), float()}]} | {:error, term()}
  def rerank(db, collection_name, initial, opts \\ [])

  def rerank(%Vettore.DB{} = db, collection_name, initial, opts)
      when is_binary(collection_name) and is_list(initial) and is_list(opts) do
    limit = Keyword.get(opts, :limit, 10)
    alpha = Keyword.get(opts, :alpha, 0.5)

    with {:ok, collection} <- fetch_collection(db, collection_name),
         {:ok, embeddings} <- all(collection) do
      pairs = Enum.map(embeddings, &{&1.id, &1.vector})
      Distance.mmr_rerank(initial, pairs, collection.metric, alpha, limit)
    end
  end

  def rerank(_db, _collection_name, _initial, _opts), do: {:error, :invalid_arguments}

  @spec fetch_collection(Vettore.DB.t(), String.t()) ::
          {:ok, Collection.t()} | {:error, :collection_not_found}
  defp fetch_collection(%Vettore.DB{} = db, name) do
    case :ets.lookup(db.table, {:collection, name}) do
      [{{:collection, ^name}, collection}] -> {:ok, collection}
      [] -> {:error, :collection_not_found}
    end
  end

  @spec normalize_metric(atom()) :: atom()
  defp normalize_metric(:euclidean), do: :l2
  defp normalize_metric(:binary), do: :hamming
  defp normalize_metric(:dot), do: :inner_product
  defp normalize_metric(:hnsw), do: :hnsw
  defp normalize_metric(metric), do: metric

  @spec default_normalize(atom()) :: :l2 | :none
  defp default_normalize(:cosine), do: :l2
  defp default_normalize(_metric), do: :none
end