lib/fuzler.ex

defmodule Fuzler do
  # use Rustler, otp_app: :fuzler, crate: "fuzler"

  @moduledoc """
  A lightweight, reusable cache built on an ETS table, wrapped in a `GenServer`.

  ## Highlights

  * **Named ETS** – give any atom as `:table`.
  * **Public reads / protected writes** – `:public`, `read_concurrency:` &
    `write_concurrency:` enabled; only the owner process mutates the table.
  * **O(1) table lookup** – mapping from server‐name → table stored in
    `:persistent_term`, avoiding a `GenServer.call/2` round‑trip for every
    public API invocation.
  * **Hot reload, insert, get, predicate stream** – as before.
  * **Fuzzy full‑text search on keys** – `text_search/3` uses
    `String.jaro_distance/2` (or any custom scorer) and thresholding.
  """

  use GenServer

  @enforce_keys [:table, :loader]
  defstruct [:table, :loader]

  alias Fuzler.TopHeap

  @type key :: term()
  @type value :: term()
  @opaque t :: %__MODULE__{table: atom(), loader: (-> Enumerable.t())}

  version = Mix.Project.config()[:version]

  use RustlerPrecompiled,
    otp_app: :fuzler,
    crate: "fuzler",
    base_url: "https://github.com/elchemista/fuzler/releases/download/v#{version}",
    force_build: System.get_env("RUSTLER_PRECOMPILATION_EXAMPLE_BUILD") in ["1", "true"],
    version: version

  # Public API

  @doc """
  Starts the cache.

  Options:

    * `:table`  – **required** atom, the ETS table name.
    * `:loader` – **required** `() -> Enumerable.t()` that yields `{key, value}`.
    * `:ets_opts` – extra ETS options merged with sensible defaults
      `[:named_table, :public, read_concurrency: true, write_concurrency: true]`.
    * `:name` – process name (defaults to the module itself).
  """
  @spec start_link(keyword()) :: GenServer.on_start()
  def start_link(opts) when is_list(opts) do
    name = Keyword.get(opts, :name, __MODULE__)
    GenServer.start_link(__MODULE__, opts, name: name)
  end

  @doc """
  Reloads the cache by wiping the table and invoking the loader again.
  """
  @spec reload(GenServer.server()) :: :ok
  def reload(server \\ __MODULE__), do: GenServer.call(server, :reload)

  @doc """
  Inserts a `{key, value}` tuple into the cache.
  """
  @spec insert({key, value}, GenServer.server()) :: :ok
  def insert(tuple, server \\ __MODULE__), do: GenServer.cast(server, {:insert, tuple})

  @doc """
  Fetches `value` for `key`, or `nil` if not present.
  """
  @spec get(key, GenServer.server()) :: value | nil
  def get(key, server \\ __MODULE__) do
    case :ets.lookup(table_name(server), key) do
      [{^key, value}] -> value
      [] -> nil
    end
  end

  @doc """
  Returns a lazy stream of `{key, value}` pairs whose value satisfies
  `predicate.(value)`. When no predicate given, returns every entry.
  """
  @spec stream((value -> as_boolean(term)) | nil, GenServer.server()) :: Enumerable.t()
  def stream(predicate \\ fn _ -> true end, server \\ __MODULE__)
      when is_function(predicate, 1) do
    table = table_name(server)

    Stream.resource(
      fn -> :ets.first(table) end,
      fn
        :"$end_of_table" ->
          {:halt, nil}

        key ->
          [{^key, value}] = :ets.lookup(table, key)
          next = :ets.next(table, key)
          if predicate.(value), do: {[{key, value}], next}, else: {[], next}
      end,
      fn _ -> :ok end
    )
  end

  @key_count_threshold 999

  @doc """
  Top‐N fuzzy search selecting the scorer based on table size:

    * If the cache has fewer than `@key_count_threshold` entries, uses
      pure‐Elixir `String.jaro_distance/2`.
    * Otherwise uses the SIMD‐accelerated `nif_similarity_score/2`.

  * `query` – the search string.
  * `opts`  – `:limit` (default 15).

  Returns a list of `{key, value, score}` sorted by descending similarity.
  """
  @spec text_search(String.t(), keyword(), GenServer.server()) :: [{key, value, float()}]
  def text_search(query, opts \\ [], server \\ __MODULE__)
      when is_binary(query) and is_list(opts) do
    limit = Keyword.get(opts, :limit, 15)
    table = table_name(server)

    # decide scorer based on number of entries
    count = :ets.info(table, :size)

    scorer =
      if count < @key_count_threshold,
        do: &String.jaro_distance/2,
        else: &nif_similarity_score/2

    :ets.foldl(
      fn {k, v}, heap_acc ->
        score = scorer.(query, to_string(k))

        if score < 0.10,
          do: heap_acc,
          else: TopHeap.push_top({k, v, score}, heap_acc)
      end,
      TopHeap.new(limit),
      table
    )
    |> TopHeap.to_desc_list()
  end

  # GenServer callbacks

  @impl true
  def init(opts) do
    table = Keyword.fetch!(opts, :table)
    loader = Keyword.fetch!(opts, :loader)
    ets_opt = Keyword.get(opts, :ets_opts, [])
    name = Keyword.get(opts, :name, __MODULE__)

    :ets.new(
      table,
      [:named_table, :public, read_concurrency: true, write_concurrency: true] ++ ets_opt
    )

    load(loader, table)

    # O(1) lookup from server name/ PID → ETS table via persistent_term
    :persistent_term.put(pterm_key(name), table)

    {:ok, %__MODULE__{table: table, loader: loader}}
  end

  @impl true
  def handle_call(:reload, _from, %__MODULE__{table: table, loader: loader} = state) do
    :ets.delete_all_objects(table)
    load(loader, table)
    {:reply, :ok, state}
  end

  @impl true
  def handle_call(:table_name, _from, %__MODULE__{table: table} = state),
    do: {:reply, table, state}

  @impl true
  def handle_cast({:insert, {key, value}}, %__MODULE__{table: table} = state) do
    :ets.insert(table, {key, value})
    {:noreply, state}
  end

  # Helpers

  # Constant‑time ETS table lookup using persistent_term when possible.
  defp table_name(server) when is_atom(server) do
    case :persistent_term.get(pterm_key(server), :undefined) do
      :undefined -> GenServer.call(server, :table_name)
      table -> table
    end
  end

  defp table_name(server) when is_pid(server), do: GenServer.call(server, :table_name)

  defp pterm_key(name), do: {__MODULE__, name}

  defp load(loader, table) do
    loader.() |> Enum.each(fn {k, v} -> :ets.insert(table, {k, v}) end)
  end

  # NIF
  @spec nif_similarity_score(String.t(), String.t()) :: float()
  def nif_similarity_score(_q, _t), do: :erlang.nif_error(:nif_not_loaded)
end