lib/cubdb.ex

defmodule CubDB do
  @moduledoc """
  `CubDB` is an embedded key-value database for the Elixir language. It is
  designed for robustness, and for minimal need of resources.

  ## Features

    - Both keys and values can be any Elixir (or Erlang) term.

    - Basic `get/3`, `put/3`, and `delete/2` operations, selection of ranges of
    entries sorted by key with `select/2`.

    - Atomic, Consistent, Isolated, Durable (ACID) transactions.

    - Multi version concurrency control (MVCC) allowing concurrent read
    operations, that do not block nor are blocked by writes.

    - Unexpected shutdowns or crashes won't corrupt the database or break
    atomicity of transactions.

    - Manual or automatic compaction to reclaim disk space.

  To ensure consistency, performance, and robustness to data corruption, `CubDB`
  database file uses an append-only, immutable B-tree data structure. Entries
  are never changed in-place, and read operations are performed on zero cost
  immutable snapshots.

  More information can be found in the following sections:

    - [Frequently Asked Questions](faq.html)
    - [How To](howto.html)

  ## Usage

  Start `CubDB` by specifying a directory for its database file (if not existing,
  it will be created):

      {:ok, db} = CubDB.start_link("my/data/directory")

  Alternatively, to specify more options, a keyword list can be passed:

      {:ok, db} = CubDB.start_link(data_dir: "my/data/directory", auto_compact: true)

  _Important: avoid starting multiple `CubDB` processes on the same data
  directory. Only one `CubDB` process should use a specific data directory at any
  time._

  `CubDB` functions can be called concurrently from different processes, but it
  is important that only one `CubDB` process is started on the same data
  directory.

  The `get/2`, `put/3`, and `delete/2` functions work as you probably expect:

      CubDB.put(db, :foo, "some value")
      #=> :ok

      CubDB.get(db, :foo)
      #=> "some value"

      CubDB.delete(db, :foo)
      #=> :ok

      CubDB.get(db, :foo)
      #=> nil

  Both keys and values can be any Elixir (or Erlang) term:

      CubDB.put(db, {"some", 'tuple', :key}, %{foo: "a map value"})
      #=> :ok

      CubDB.get(db, {"some", 'tuple', :key})
      #=> %{foo: "a map value"}


  Multiple operations can be performed atomically with the `transaction/2`
  function and the `CubDB.Tx` module:

      # Swapping `:a` and `:b` atomically:
      CubDB.transaction(db, fn tx ->
        a = CubDB.Tx.get(tx, :a)
        b = CubDB.Tx.get(tx, :b)

        tx = CubDB.Tx.put(tx, :a, b)
        tx = CubDB.Tx.put(tx, :b, a)

        {:commit, tx, :ok}
      end)
      #=> :ok

  Alternatively, it is possible to use `put_multi/2`, `delete_multi/2`, and the
  other `[...]_multi` functions, which also guarantee atomicity:

      CubDB.put_multi(db, [a: 1, b: 2, c: 3, d: 4, e: 5, f: 6, g: 7, h: 8])
      #=> :ok

  Range of entries sorted by key are retrieved using `select/2`:

      CubDB.select(db, min_key: :b, max_key: :e) |> Enum.to_list()
      #=> [b: 2, c: 3, d: 4, e: 5]

  The `select/2` function can select entries in normal or reverse order, and returns
  a lazy stream, so one can use functions in the `Stream` and `Enum` modules to
  map, filter, and transform the result, only fetching from the database the
  relevant entries:

      # Take the sum of the last 3 even values:
      CubDB.select(db, reverse: true) # select entries in reverse order
      |> Stream.map(fn {_key, value} -> value end) # discard the key and keep only the value
      |> Stream.filter(fn value -> is_integer(value) && Integer.is_even(value) end) # filter only even integers
      |> Stream.take(3) # take the first 3 values
      |> Enum.sum() # sum the values
      #=> 18

  Read-only snapshots are useful when one needs to perform several reads or
  selects, ensuring isolation from concurrent writes, but without blocking them.
  When nothing needs to be written, using a snapshot is preferable to using a
  transaction, because it will not block writes.

  Snapshots come at no cost: nothing is actually copied or written on disk or in
  memory, apart from some small internal bookkeeping. After obtaining a snapshot
  with `with_snapshot/2`, one can read from it using the functions in the
  `CubDB.Snapshot` module:

      # the key of y depends on the value of x, so we ensure consistency by getting
      # both entries from the same snapshot, isolating from the effects of concurrent
      # writes
      {x, y} = CubDB.with_snapshot(db, fn snap ->
        x = CubDB.Snapshot.get(snap, :x)
        y = CubDB.Snapshot.get(snap, x)

        {x, y}
      end)

  The functions that read multiple entries like `get_multi/2`, `select/2`, etc.
  are internally using a snapshot, so they always ensure consistency and
  isolation from concurrent writes, implementing multi version concurrency
  control (MVCC).

  Because `CubDB` uses an immutable, append-only data structure, write
  operations cause the data file to grow. When necessary, `CubDB` runs a
  compaction operation to optimize the file size and reclaim disk space.
  Compaction runs in the background, without blocking other operations. By
  default, `CubDB` runs compaction automatically when necessary (see
  documentation of `set_auto_compact/2` for details). Alternatively, it can be
  started manually by calling `compact/1`.
  """

  @doc """
  Returns a specification to start this module under a supervisor.

  The default options listed in `Supervisor` are used.
  """
  use GenServer

  alias CubDB.Btree
  alias CubDB.CleanUp
  alias CubDB.Compactor
  alias CubDB.Reader
  alias CubDB.Store
  alias CubDB.Snapshot
  alias CubDB.Tx

  @db_file_extension ".cub"
  @compaction_file_extension ".compact"
  @auto_compact_defaults {100, 0.25}

  @type key :: any
  @type value :: any
  @type entry :: {key, value}
  @type option :: {:auto_compact, {pos_integer, number} | boolean} | {:auto_file_sync, boolean}
  @type select_option ::
          {:min_key, any}
          | {:max_key, any}
          | {:min_key_inclusive, boolean}
          | {:max_key_inclusive, boolean}
          | {:reverse, boolean}

  defmodule State do
    @moduledoc false

    @type t :: %CubDB.State{
            btree: Btree.t(),
            data_dir: String.t(),
            task_supervisor: pid,
            compactor: pid | nil,
            clean_up: pid,
            clean_up_pending: boolean,
            old_btrees: [Btree.t()],
            readers: %{required(reference) => String.t()},
            auto_compact: {pos_integer, number} | false,
            auto_file_sync: boolean,
            subs: list(pid),
            writer: GenServer.from() | nil,
            write_queue: :queue.queue()
          }

    @enforce_keys [:btree, :data_dir, :clean_up]
    defstruct [
      :task_supervisor,
      btree: nil,
      data_dir: nil,
      compactor: nil,
      clean_up: nil,
      clean_up_pending: false,
      old_btrees: [],
      readers: %{},
      auto_compact: true,
      auto_file_sync: true,
      subs: [],
      writer: nil,
      write_queue: :queue.new()
    ]
  end

  @spec start_link(
          String.t()
          | [option | {:data_dir, String.t()} | GenServer.option()]
        ) :: GenServer.on_start()

  @doc """
  Starts the `CubDB` database process linked to the current process.

  The argument is a keyword list of options:

    - `data_dir`: the directory path where the database files will be stored.
    This option is required. If the directory does not exist, it will be
    created. Only one `CubDB` instance can run per directory, so if you run
    several databases, they should each use their own separate data directory.

    - `auto_compact`: whether to perform compaction automatically. It defaults
    to `true`. See `set_auto_compact/2` for the possible values

    - `auto_file_sync`: whether to force flush the disk buffer on each write. It
    defaults to `true`. If set to `false`, write performance is faster, but
    durability of writes is not strictly guaranteed. See `set_auto_file_sync/2`
    for details.

  `GenServer` options like `name` and `timeout` can also be given, and are
  forwarded to `GenServer.start_link/3` as the third argument.

  If only the `data_dir` is specified, it is possible to pass it as a single
  string argument.

  ## Examples

      # Passing only the data dir
      {:ok, db} = CubDB.start_link("some/data/dir")

      # Passing data dir and other options
      {:ok, db} = CubDB.start_link(data_dir: "some/data/dir", auto_compact: true, name: :db)
  """
  def start_link(data_dir_or_options) do
    case split_options(data_dir_or_options) do
      {:ok, {data_dir, options, gen_server_options}} ->
        GenServer.start_link(__MODULE__, [data_dir, options], gen_server_options)

      error ->
        error
    end
  end

  def start_link(data_dir, options) do
    start_link(Keyword.merge(options, data_dir: data_dir))
  end

  @spec start(String.t() | [option | {:data_dir, String.t()} | GenServer.option()]) ::
          GenServer.on_start()

  @doc """
  Starts the `CubDB` database without a link.

  See `start_link/2` for more information about options.
  """
  def start(data_dir_or_options) do
    case split_options(data_dir_or_options) do
      {:ok, {data_dir, options, gen_server_options}} ->
        GenServer.start(__MODULE__, [data_dir, options], gen_server_options)

      error ->
        error
    end
  end

  def start(data_dir, options) do
    start(Keyword.merge(options, data_dir: data_dir))
  end

  @spec stop(GenServer.server(), term(), timeout()) :: :ok

  @doc """
  Synchronously stops the `CubDB` database.

  See `GenServer.stop/3` for details.
  """

  def stop(db, reason \\ :normal, timeout \\ :infinity) do
    GenServer.stop(db, reason, timeout)
  end

  @spec get(GenServer.server(), key, value) :: value

  @doc """
  Gets the value associated to `key` from the database.

  If no value is associated with `key`, `default` is returned (which is `nil`,
  unless specified otherwise).
  """
  def get(db, key, default \\ nil) do
    with_snapshot(db, fn %Snapshot{btree: btree} ->
      Reader.get(btree, key, default)
    end)
  end

  @spec fetch(GenServer.server(), key) :: {:ok, value} | :error

  @doc """
  Fetches the value for the given `key` in the database, or returns `:error` if
  `key` is not present.

  If the database contains an entry with the given `key` and value `value`, it
  returns `{:ok, value}`. If `key` is not found, it returns `:error`.
  """
  def fetch(db, key) do
    with_snapshot(db, fn %Snapshot{btree: btree} ->
      Reader.fetch(btree, key)
    end)
  end

  @spec has_key?(GenServer.server(), key) :: boolean

  @doc """
  Returns whether an entry with the given `key` exists in the database.
  """
  def has_key?(db, key) do
    with_snapshot(db, fn %Snapshot{btree: btree} ->
      Reader.has_key?(btree, key)
    end)
  end

  @spec select(GenServer.server(), [select_option]) :: Enumerable.t()

  @doc """
  Selects a range of entries from the database, returning a lazy stream.

  The returned lazy stream can be filtered, mapped, and transformed with
  standard functions in the `Stream` and `Enum` modules. The actual database
  read is deferred to when the stream is iterated or evaluated.

  ## Options

  The `min_key` and `max_key` specify the range of entries that are selected. By
  default, the range is inclusive, so all entries that have a key greater or
  equal than `min_key` and less or equal then `max_key` are selected:

      # Select all entries where "a" <= key <= "d"
      CubDB.select(db, min_key: "b", max_key: "d")

  The range boundaries can be excluded by setting `min_key_inclusive` or
  `max_key_inclusive` to `false`:

      # Select all entries where "a" <= key < "d"
      CubDB.select(db, min_key: "b", max_key: "d", max_key_inclusive: false)

  Any of `:min_key` and `:max_key` can be omitted, to leave the range
  open-ended.

      # Select entries where key <= "a"
      CubDB.select(db, max_key: "a")

  Since `nil` is a valid key, setting `min_key` or `max_key` to `nil` *does NOT*
  leave the range open ended:

      # Select entries where nil <= key <= "a"
      CubDB.select(db, min_key: nil, max_key: "a")

  The `reverse` option, when set to true, causes the entries to be selected and
  traversed in reverse order. This is more efficient than selecting them in
  normal ascending order and then reversing the resulting collection.

  Note that, when selecting a key range, specifying `min_key` and/or `max_key`
  is more performant than using functions in `Enum` or `Stream` to filter out
  entries out of range, because `min_key` and `max_key` avoid loading
  unnecessary entries from disk entirely.

  ## Examples

  To select all entries with keys between `:a` and `:c` as a stream of `{key,
  value}` entries we can do:

      entries = CubDB.select(db, min_key: :a, max_key: :c)

  Since `select/2` returns a lazy stream, at this point nothing has been fetched
  from the database yet. We can turn the stream into a list, performing the
  actual query:

      Enum.to_list(entries)

  If we want to get all entries with keys between `:a` and `:c`, with `:c`
  excluded, we can do:

      entries =
        CubDB.select(db,
          min_key: :a,
          max_key: :c,
          max_key_inclusive: false
        )
        |> Enum.to_list()

  To select the last 3 entries, we can do:

      entries = CubDB.select(db, reverse: true) |> Enum.take(3)

  If we want to obtain the sum of the first 10 positive numeric values
  associated to keys from `:a` to `:f`, we can do:

      sum =
        CubDB.select(db,
          min_key: :a,
          max_key: :f
        )
        |> Stream.map(fn {_key, value} -> value end) # map values
        |> Stream.filter(fn n -> is_number(n) and n > 0 end) # only positive numbers
        |> Stream.take(10) # take only the first 10 entries in the range
        |> Enum.sum() # sum the selected values

  Using functions from the `Stream` module for mapping and filtering ensures
  that we do not fetch unnecessary items from the database. In the example
  above, for example, after fetching the first 10 entries satisfying the filter
  condition, no further entry is fetched from the database.
  """
  def select(db, options \\ []) when is_list(options) do
    Stream.resource(
      fn ->
        snap = CubDB.snapshot(db, :infinity)
        %Snapshot{btree: btree} = snap
        stream = Reader.select(btree, options)
        step = fn val, _acc -> {:suspend, val} end
        next = &Enumerable.reduce(stream, &1, step)
        {snap, next}
      end,
      fn {snap, next} ->
        case next.({:cont, nil}) do
          {:done, _} ->
            {:halt, {snap, nil}}

          {:suspended, value, next} ->
            {[value], {snap, next}}
        end
      end,
      fn {snap, _} -> CubDB.release_snapshot(snap) end
    )
  end

  @spec size(GenServer.server()) :: non_neg_integer

  @doc """
  Returns the number of entries present in the database.
  """
  def size(db) do
    with_snapshot(db, fn %Snapshot{btree: btree} ->
      Reader.size(btree)
    end)
  end

  @spec snapshot(GenServer.server(), timeout) :: Snapshot.t()

  @doc """
  Returns a snapshot of the database in its current state.

  _Note: it is usually better to use `with_snapshot/2` instead of `snapshot/2`,
  as the former automatically manages the snapshot life cycle, even in case of
  crashes._

  A snapshot is an immutable, read-only representation of the database at a
  specific point in time. Getting a snapshot is basically zero-cost: nothing
  needs to be copied or written, apart from some small in-memory bookkeeping.

  The only cost of a snapshot is that it delays cleanup of old files after
  compaction for as long as it is in use. For this reason, a snapshot has a
  timeout, configurable as the optional second argument of `snapshot/2`
  (defaulting to 5000, or 5 seconds, if not specified). After such timeout
  elapses, the snapshot cannot be used anymore, and any pending cleanup is
  performed.

  It is possible to pass `:infinity` as the timeout, but then one must manually
  call `release_snapshot/1` to release the snapshot after use.

  Using `with_snapshot/1` is often a better alternative to `snapshot/2`, as it
  does not require to choose an arbitrary timeout, and automatically ensures
  that the the snapshot is released after use, even in case of a crash.

  After obtaining a snapshot, it is possible to read from it using the functions
  in `CubDB.Snapshot`, which work the same way as the functions in `CubDB` with
  the same name, such as `CubDB.Snapshot.get/3`, `CubDB.Snapshot.get_multi/2`,
  `CubDB.Snapshot.fetch/2`, `CubDB.Snapshot.has_key?/2`,
  `CubDB.Snapshot.select/2`.

  It is *not* possible to write on a snapshot.

  ## Example

      CubDB.put(db, :a, 123)
      snap = CubDB.snapshot(db)

      CubDB.put(db, :a, 0)

      # Getting a value from the snapshot returns the value of the entry at the
      # time the snapshot was obtained, even if the entry has changed in the
      # meanwhile
      CubDB.Snapshot.get(snap, :a)
      # => 123

      # Getting the same value from the database returns the latest value
      CubDB.get(db, :a)
      # => 0
  """
  def snapshot(db, timeout \\ 5000) do
    GenServer.call(db, {:snapshot, timeout}, :infinity)
  end

  @spec release_snapshot(Snapshot.t()) :: :ok

  @doc """
  Releases a snapshot when it is not needed anymore, releasing related resources

  This allows `CubDB` to perform cleanup operations after compaction that are
  otherwise blocked by the snapshot. When creating a snapshot with a timeout, it
  is not necessary to call `release_snapshot/1`, as it will be automatically
  released after the timeout elapses. When getting a snapshot with a timeout of
  `:infinity` though, one has to manually call `release_snapshot/1` once the
  snapshot is not needed anymore.

  In most cases, using `with_snapshot/2` is a better alternative to manually
  calling `snapshot/2` and `release_snapshot/1`
  """
  def release_snapshot(snapshot) do
    %Snapshot{db: db} = snapshot
    GenServer.call(db, {:release_snapshot, snapshot}, :infinity)
  end

  @spec with_snapshot(GenServer.server(), (Snapshot.t() -> result)) :: result when result: any

  @doc """
  Calls `fun` passing a snapshot, and automatically releases the snapshot when
  the function returns

  It returns the value returned by the function `fun`.

  A snapshot is an immutable, read-only representation of the database at a
  specific point in time, isolated from writes. It is basically zero-cost:
  nothing needs to be copied or written, apart from some small in-memory
  bookkeeping.

  Calling `with_snapshot/2` is equivalent to obtaining a snapshot with
  `snapshot/2` using a timeout of `:infinity`, calling `fun`, then manually
  releasing the snapshot with `release_snapshot/1`, but `with_snapshot/2`
  automatically manages the snapshot life cycle, also in case an exception is
  raised, a value is thrown, or the process exists. This makes `with_snapshot/2`
  usually a better choice than `snapshot/2`.

  After obtaining a snapshot, it is possible to read from it using the functions
  in `CubDB.Snapshot`, which work the same way as the functions in `CubDB` with
  the same name, such as `CubDB.Snapshot.get/3`, `CubDB.Snapshot.get_multi/2`,
  `CubDB.Snapshot.fetch/2`, `CubDB.Snapshot.has_key?/2`,
  `CubDB.Snapshot.select/2`.

  It is *not* possible to write on a snapshot.

  ## Example

  Assume that we have two entries in the database, and the key of the second
  entry depends on the value of the first (so the value of the first entry
  "points" to the other entry). In this case, we want to get both entries from
  the same snapshot, to avoid inconsistencies due to concurrent writes. Here's
  how that can be done with `with_snapshot/2`:

      {x, y} = CubDB.with_snapshot(db, fn snap ->
        x = CubDB.Snapshot.get(snap, :x)
        y = CubDB.Snapshot.get(snap, x)
        {x, y}
      end)
  """
  def with_snapshot(db, fun) do
    snap = snapshot(db, :infinity)

    try do
      fun.(snap)
    after
      release_snapshot(snap)
    end
  end

  @spec transaction(GenServer.server(), (Tx.t() -> {:commit, Tx.t(), result} | {:cancel, result})) ::
          result
        when result: any

  @doc """
  Starts a write transaction, passes it to the given function, and commits or
  cancels it depending on the return value.

  The transaction blocks other writers until the function returns, but does not
  block concurrent readers. When the need is to only read inside a transaction,
  and not perform any write, using a snapshot is a better choice, as it does not
  block writers (see `with_snapshot/2`).

  The module `CubDB.Tx` contains functions to perform read and write operations
  whithin the transaction. The function `fun` is called with the transaction as
  argument, and should return `{:commit, tx, results}` to commit the transaction
  `tx` and return `result`, or `{:cancel, result}` to cancel the transaction and
  return `result`.

  If an exception is raised, or a value thrown, or the process exits while
  inside of a transaction, the transaction is cancelled.

  Only use `CubDB.Tx` functions to write when inside a transaction (like
  `CubDB.Tx.put/3` or `CubDB.Tx.delete/2`). Using functions in the `CubDB`
  module to perform a write when inside a transaction (like `CubDB.put/3` or
  `CubDB.delete/2`) raises an exception. Note that write functions in `CubDB.Tx`
  have a functional API: they return a modified transaction rather than mutating
  it in place.

  The transaction value passed to `fun` should not be used outside of the
  function.

  ## Example:

  Suppose the keys `:a` and `:b` map to balances, and we want to transfer 5 from
  `:a` to `:b`, if `:a` has enough balance:

      CubDB.transaction(db, fn tx ->
        a = CubDB.Tx.get(tx, :a)
        b = CubDB.Tx.get(tx, :b)

        if a >= 5 do
          tx = CubDB.Tx.put(tx, :a, a - 5)
          tx = CubDB.Tx.put(tx, :b, b + 5)
          {:commit, tx, :ok}
        else
          {:cancel, :not_enough_balance}
        end
      end)

  The read functions in `CubDB.Tx` read the in-transaction state, as opposed to
  the live database state, so they see writes performed inside the transaction
  even before they are committed:

      # Assuming we start from an empty database
      CubDB.transaction(db, fn tx ->
        tx = CubDB.Tx.put(tx, :a, 123)

        # CubDB.Tx.get sees the in-transaction value
        CubDB.Tx.get(tx, :a)
        # => 123

        # CubDB.get instead does not see the uncommitted write
        CubDB.get(db, :a)
        # => nil

        {:commit, tx, nil}
      end)

      # After the transaction is committed, CubDB.get sees the write
      CubDB.get(db, :a)
      # => 123
  """
  def transaction(db, fun) do
    tx = start_transaction(db)

    returned =
      try do
        fun.(tx)
      rescue
        exception ->
          cancel_transaction(db)
          reraise(exception, __STACKTRACE__)
      catch
        :throw, value ->
          cancel_transaction(db)
          throw(value)

        :exit, value ->
          cancel_transaction(db)
          exit(value)
      end

    case returned do
      {:commit, %Tx{} = tx, result} ->
        commit_transaction(db, tx)
        result

      {:cancel, result} ->
        cancel_transaction(db)
        result

      _ ->
        raise "Wrong return value from CubDB.transaction/2 function, only {:commit, transaction, result} or {:cancel, result} are allowed"
    end
  end

  @spec start_transaction(GenServer.server()) :: Tx.t()

  defp start_transaction(db) do
    case GenServer.call(db, :start_transaction, :infinity) do
      {:error, :already_in_transaction} ->
        raise "Cannot start nested write transaction. You might be using CubDB instead of CubDB.Tx to perform writes inside of a transaction"

      %Tx{} = tx ->
        tx
    end
  end

  @spec cancel_transaction(GenServer.server()) :: :ok

  defp cancel_transaction(db) do
    GenServer.call(db, :cancel_transaction, :infinity)
  end

  @spec commit_transaction(GenServer.server(), Tx.t()) :: :ok

  defp commit_transaction(db, tx) do
    case GenServer.call(db, {:commit_transaction, tx}, :infinity) do
      {:error, :invalid_owner} ->
        raise "Attempt to commit a transaction started by a different owner"

      :ok ->
        :ok
    end
  end

  @spec dirt_factor(GenServer.server()) :: float

  @doc """
  Returns the dirt factor.

  The dirt factor is a number, ranging from 0 to 1, giving an indication about
  the amount of overhead disk space (or "dirt") that can be cleaned up with a
  compaction operation. A value of 0 means that there is no overhead, so a
  compaction would have no benefit. The closer to 1 the dirt factor is, the more
  can be cleaned up in a compaction operation.
  """
  def dirt_factor(db) do
    GenServer.call(db, :dirt_factor, :infinity)
  end

  @spec put(GenServer.server(), key, value) :: :ok

  @doc """
  Writes an entry in the database, associating `key` to `value`.

  If `key` was already present, it is overwritten.
  """
  def put(db, key, value) do
    transaction(db, fn tx ->
      {:commit, Tx.put(tx, key, value), :ok}
    end)
  end

  @spec put_new(GenServer.server(), key, value) :: :ok | {:error, :exists}

  @doc """
  Writes an entry in the database, associating `key` to `value`, only if `key`
  is not yet in the database.

  If `key` is already present, it does not change it, and returns `{:error,
  :exists}`.
  """
  def put_new(db, key, value) do
    transaction(db, fn tx ->
      case Tx.put_new(tx, key, value) do
        {:error, :exists} = reply ->
          {:cancel, reply}

        tx ->
          {:commit, tx, :ok}
      end
    end)
  end

  @spec delete(GenServer.server(), key) :: :ok

  @doc """
  Deletes the entry associated to `key` from the database.

  If `key` was not present in the database, nothing is done.
  """
  def delete(db, key) do
    transaction(db, fn tx ->
      {:commit, Tx.delete(tx, key), :ok}
    end)
  end

  @spec update(GenServer.server(), key, value, (value -> value)) :: :ok

  @doc """
  Updates the entry corresponding to `key` using the given function.

  If `key` is present in the database, `fun` is invoked with the corresponding
  `value`, and the result is set as the new value of `key`. If `key` is not
  found, `initial` is inserted as the value of `key`.
  """
  def update(db, key, initial, fun) do
    get_and_update_multi(db, [key], fn entries ->
      case Map.fetch(entries, key) do
        :error ->
          {:ok, %{key => initial}, []}

        {:ok, value} ->
          {:ok, %{key => fun.(value)}, []}
      end
    end)
  end

  @spec get_and_update(GenServer.server(), key, (value -> {any, value} | :pop)) :: any

  @doc """
  Gets the value corresponding to `key` and updates it, in one atomic transaction.

  `fun` is called with the current value associated to `key` (or `nil` if not
  present), and must return a two element tuple: the result value to be
  returned, and the new value to be associated to `key`. `fun` may also return
  `:pop`, in which case the current value is deleted and returned.

  Note that in case the value to update returned by `fun` is the same as the
  original value, no write is performed to disk.
  """
  def get_and_update(db, key, fun) do
    get_and_update_multi(db, [key], fn entries ->
      value = Map.get(entries, key, nil)

      case fun.(value) do
        {result, ^value} -> {result, [], []}
        {result, new_value} -> {result, %{key => new_value}, []}
        :pop -> {value, [], [key]}
      end
    end)
  end

  @spec get_and_update_multi(
          GenServer.server(),
          [key],
          (%{optional(key) => value} -> {any, %{optional(key) => value} | nil, [key] | nil})
        ) :: any

  @doc """
  Gets and updates or deletes multiple entries in an atomic transaction.

  Gets all values associated with keys in `keys_to_get`, and passes them as a
  map of `%{key => value}` entries to `fun`. If a key is not found, it won't be
  added to the map passed to `fun`. Updates the database and returns a result
  according to the return value of `fun`.

  The function `fun` should return a tuple of three elements: `{return_value,
  entries_to_put, keys_to_delete}`, where `return_value` is an arbitrary value
  to be returned, `entries_to_put` is a map of `%{key => value}` entries to be
  written to the database, and `keys_to_delete` is a list of keys to be deleted.

  The read and write operations are executed as an atomic transaction, so they
  will either all succeed, or all fail. Note that `get_and_update_multi/3`
  blocks other write operations until it completes.

  ## Example

  Assuming a database of names as keys, and integer monetary balances as values,
  and we want to transfer 10 units from `"Anna"` to `"Joy"`, returning their
  updated balance:

      {anna, joy} = CubDB.get_and_update_multi(db, ["Anna", "Joy"], fn entries ->
        anna = Map.get(entries, "Anna", 0)
        joy = Map.get(entries, "Joy", 0)

        if anna < 10, do: raise(RuntimeError, message: "Anna's balance is too low")

        anna = anna - 10
        joy = joy + 10

        {{anna, joy}, %{"Anna" => anna, "Joy" => joy}, []}
      end)

  Or, if we want to transfer all of the balance from `"Anna"` to `"Joy"`,
  deleting `"Anna"`'s entry, and returning `"Joy"`'s resulting balance:

      joy = CubDB.get_and_update_multi(db, ["Anna", "Joy"], fn entries ->
        anna = Map.get(entries, "Anna", 0)
        joy = Map.get(entries, "Joy", 0)

        joy = joy + anna

        {joy, %{"Joy" => joy}, ["Anna"]}
      end)
  """
  def get_and_update_multi(db, keys_to_get, fun) do
    transaction(db, fn %Tx{btree: btree} = tx ->
      key_values = Reader.get_multi(btree, keys_to_get)
      {result, entries_to_put, keys_to_delete} = fun.(key_values)

      case do_put_and_delete_multi(tx, entries_to_put, keys_to_delete) do
        {:cancel, :ok} ->
          {:cancel, result}

        {:commit, tx, :ok} ->
          {:commit, tx, result}
      end
    end)
  end

  @spec put_and_delete_multi(GenServer.server(), %{key => value}, [key]) :: :ok

  @doc """
  Writes and deletes multiple entries all at once, atomically.

  Entries to put are passed as a map of `%{key => value}` or a list of `{key,
  value}`. Keys to delete are passed as a list of keys.
  """
  def put_and_delete_multi(db, entries_to_put, keys_to_delete) do
    transaction(db, fn tx ->
      do_put_and_delete_multi(tx, entries_to_put, keys_to_delete)
    end)
  end

  @spec do_put_and_delete_multi(Tx.t(), [entry], [key]) ::
          {:commit, Tx.t(), :ok} | {:cancel, :ok}

  defp do_put_and_delete_multi(_tx, [], []), do: {:cancel, :ok}

  defp do_put_and_delete_multi(_tx, entries_to_put, []) when entries_to_put == %{},
    do: {:cancel, :ok}

  defp do_put_and_delete_multi(tx, entries_to_put, keys_to_delete) do
    tx =
      Enum.reduce(entries_to_put || [], tx, fn {key, value}, tx ->
        Tx.put(tx, key, value)
      end)

    tx =
      Enum.reduce(keys_to_delete || [], tx, fn key, tx ->
        Tx.delete(tx, key)
      end)

    {:commit, tx, :ok}
  end

  @spec get_multi(GenServer.server(), [key]) :: %{key => value}

  @doc """
  Gets multiple entries corresponding by the given keys all at once, atomically.

  The keys to get are passed as a list. The result is a map of key/value entries
  corresponding to the given keys. Keys that are not present in the database
  won't be in the result map.

  ## Example

      CubDB.put_multi(db, a: 1, b: 2, c: nil)

      CubDB.get_multi(db, [:a, :b, :c, :x])
      # => %{a: 1, b: 2, c: nil}
  """
  def get_multi(db, keys) do
    with_snapshot(db, fn %Snapshot{btree: btree} ->
      Reader.get_multi(btree, keys)
    end)
  end

  @spec put_multi(GenServer.server(), %{key => value} | [entry]) :: :ok

  @doc """
  Writes multiple entries all at once, atomically.

  Entries are passed as a map of `%{key => value}` or a list of `{key, value}`.
  """
  def put_multi(db, entries) do
    put_and_delete_multi(db, entries, [])
  end

  @spec delete_multi(GenServer.server(), [key]) :: :ok

  @doc """
  Deletes multiple entries corresponding to the given keys all at once, atomically.

  The `keys` to be deleted are passed as a list.
  """
  def delete_multi(db, keys) do
    put_and_delete_multi(db, %{}, keys)
  end

  @spec clear(GenServer.server()) :: :ok

  @doc """
  Deletes all entries, resulting in an empty database.

  The deletion is atomic, and is much more performant than deleating each entry
  manually.

  The operation respects all the guarantees of consistency of other concurrent
  operations. For example, if `select\2` was called before the call to `clear/1`
  and is running concurrently, the `select\2` will still see all the entries.

  If a compaction is in progress when `clear/1` is called, the compaction is
  halted, and a new one started immediately after. The new compaction should be
  very fast, as the database is empty as a result of the `clear/1` call.
  """
  def clear(db) do
    transaction(db, fn tx ->
      {:commit, Tx.clear(tx), :ok}
    end)
  end

  @spec compact(GenServer.server()) :: :ok | {:error, String.t()}

  @doc """
  Runs a database compaction.

  As write operations are performed on a database, its file grows. Occasionally,
  a compaction operation can be run to shrink the file to its optimal size.
  Compaction runs in the background and does not block operations.

  Only one compaction operation can run at any time, therefore if this function
  is called when a compaction is already running, it returns `{:error,
  :pending_compaction}`.

  When compacting, `CubDB` will create a new data file, and eventually switch to
  it and remove the old one as the compaction succeeds. For this reason, during
  a compaction, there should be enough disk space for a second copy of the
  database file.

  Compaction can create disk contention, so it should not be performed
  unnecessarily often.
  """
  def compact(db) do
    GenServer.call(db, :compact, :infinity)
  end

  @spec set_auto_compact(GenServer.server(), boolean | {integer, integer | float}) ::
          :ok | {:error, String.t()}

  @doc """
  Configures whether to perform automatic compaction, and how.

  If set to `false`, no automatic compaction is performed. If set to `true`,
  auto-compaction is performed, following a write operation, if at least 100
  write operations occurred since the last compaction, and the dirt factor is at
  least 0.25. These values can be customized by setting the `auto_compact`
  option to `{min_writes, min_dirt_factor}`.

  It returns `:ok`, or `{:error, reason}` if `setting` is invalid.

  Compaction is performed in the background and does not block other operations,
  but can create disk contention, so it should not be performed unnecessarily
  often. When writing a lot into the database, such as when importing data from
  an external source, it is advisable to turn off auto compaction, and manually
  run compaction at the end of the import.
  """
  def set_auto_compact(db, setting) do
    GenServer.call(db, {:set_auto_compact, setting}, :infinity)
  end

  @spec halt_compaction(GenServer.server()) :: :ok | {:error, :no_compaction_running}

  @doc """
  Stops a running compaction.

  If a compaction operation is running, it is halted, and the function returns
  `:ok`. Otherwise it returns `{:error, :no_compaction_running}`. If a new
  compaction is started (manually or automatically), it will start from scratch,
  the halted compaction is completely discarded.

  This function can be useful if one wants to make sure that no compaction
  operation is running in a certain moment, for example to perform some
  write-intensive workload without incurring in additional load. In this case
  one can pause auto compaction, and call `halt_compaction/1` to stop any
  running compaction.
  """
  def halt_compaction(db) do
    GenServer.call(db, :halt_compaction, :infinity)
  end

  @spec compacting?(GenServer.server()) :: boolean

  @doc """
  Returns true if a compaction operation is currently running, false otherwise.
  """
  def compacting?(db) do
    GenServer.call(db, :compacting?, :infinity)
  end

  @spec file_sync(GenServer.server()) :: :ok

  @doc """
  Performs a `fsync`, forcing to flush all data that might be buffered by the OS
  to disk.

  Calling this function ensures that all writes up to this point are committed
  to disk, and will be available after a restart.

  If `CubDB` is started with the option `auto_file_sync: true`, calling this
  function is not necessary, as every write operation will be automatically
  flushed to the storage device.

  If this function is NOT called, the operative system will control when the
  file buffer is flushed to the storage device, which leads to better write
  performance, but might affect durability of recent writes in case of a sudden
  shutdown.
  """

  def file_sync(db) do
    GenServer.call(db, :file_sync, :infinity)
  end

  @spec set_auto_file_sync(GenServer.server(), boolean) :: :ok

  @doc """
  Configures whether to automatically force file sync upon each write operation.

  If set to `false`, no automatic file sync is performed. That improves write
  performance, but leaves to the operative system the decision of when to flush
  disk buffers. This means that there is the possibility that recent writes
  might not be durable in case of a sudden machine shutdown. In any case,
  atomicity of multi operations is preserved, and partial writes will not
  corrupt the database.

  If set to `true`, the file buffer will be forced to flush upon every write
  operation, ensuring durability even in case of sudden machine shutdowns, but
  decreasing write performance.
  """
  def set_auto_file_sync(db, bool) do
    GenServer.call(db, {:set_auto_file_sync, bool}, :infinity)
  end

  @spec data_dir(GenServer.server()) :: String.t()

  @doc """
  Returns the path of the data directory, as given when the `CubDB` process was
  started.

  ## Example

      {:ok, db} = CubDB.start_link("some/data/directory")

      CubDB.data_dir(db)
      #=> "some/data/directory"
  """

  def data_dir(db) do
    GenServer.call(db, :data_dir, :infinity)
  end

  @spec current_db_file(GenServer.server()) :: String.t()

  @doc """
  Returns the path of the current database file.

  The current database file will change after a compaction operation.

  ## Example

      {:ok, db} = CubDB.start_link("some/data/directory")

      CubDB.current_db_file(db)
      #=> "some/data/directory/0.cub"
  """

  def current_db_file(db) do
    GenServer.call(db, :current_db_file, :infinity)
  end

  @spec back_up(GenServer.server(), Path.t()) :: :ok | {:error, term}

  @doc """
  Creates a backup of the database into the target directory path

  The directory is created upon calling `back_up/2`, and an error tuple is
  returned if it already exists.

  The function will block until the backup is completed, then return :ok. The
  backup does not block other readers or writers, and reflects the database
  state at the time it was started, without any later write.

  After the backup completes successfully, it is possible to open it by starting
  a `CubDB` process using the target path as its data directory.
  """
  def back_up(db, target_path) do
    with_snapshot(db, fn snapshot ->
      Snapshot.back_up(snapshot, target_path)
    end)
  end

  @spec cubdb_file?(String.t()) :: boolean

  @doc false
  def cubdb_file?(file_name) do
    file_extensions = [@db_file_extension, @compaction_file_extension]
    basename = Path.basename(file_name, Path.extname(file_name))

    Enum.member?(file_extensions, Path.extname(file_name)) &&
      Regex.match?(~r/^[\da-fA-F]+$/, basename)
  end

  @spec compaction_file?(String.t()) :: boolean

  @doc false
  def compaction_file?(file_name) do
    Path.extname(file_name) == @compaction_file_extension
  end

  @doc false
  def subscribe(db) do
    GenServer.call(db, {:subscribe, self()}, :infinity)
  end

  @doc false
  def file_name_to_n(file_name) do
    base_name = Path.basename(file_name, Path.extname(file_name))
    String.to_integer(base_name, 16)
  end

  # OTP callbacks

  @doc false
  def init([data_dir, options]) do
    auto_compact = parse_auto_compact!(Keyword.get(options, :auto_compact, true))
    auto_file_sync = Keyword.get(options, :auto_file_sync, true)

    with file_name when is_binary(file_name) or is_nil(file_name) <- find_db_file(data_dir),
         {:ok, store} <-
           Store.File.create(Path.join(data_dir, file_name || "0#{@db_file_extension}")),
         {:ok, clean_up} <- CleanUp.start_link(data_dir),
         {:ok, task_supervisor} <- Task.Supervisor.start_link() do
      {:ok,
       %State{
         btree: Btree.new(store),
         task_supervisor: task_supervisor,
         data_dir: data_dir,
         clean_up: clean_up,
         auto_compact: auto_compact,
         auto_file_sync: auto_file_sync
       }}
    else
      {:error, reason} ->
        {:stop, reason}
    end
  end

  @doc false
  def terminate(_reason, %State{btree: btree}) do
    Btree.stop(btree)
  end

  def handle_call({:snapshot, ttl}, _, state) do
    %State{btree: btree} = state

    ref = make_ref()
    state = checkin_reader(ref, btree, state)
    snapshot = %Snapshot{db: self(), btree: btree, reader_ref: ref}

    case ttl do
      :infinity ->
        {:reply, snapshot, state}

      ttl when is_integer(ttl) ->
        Process.send_after(self(), {:snapshot_timeout, ref}, ttl)
        {:reply, snapshot, state}
    end
  end

  def handle_call({:release_snapshot, snapshot}, _, state) do
    %Snapshot{reader_ref: ref} = snapshot
    {:reply, :ok, checkout_reader(ref, state)}
  end

  def handle_call({:extend_snapshot, snapshot}, _, state) do
    %Snapshot{reader_ref: ref, btree: btree} = snapshot
    %State{readers: readers} = state

    if Map.has_key?(readers, ref) do
      new_ref = make_ref()
      state = checkin_reader(new_ref, btree, state)
      snapshot = %Snapshot{db: self(), btree: btree, reader_ref: new_ref}

      {:reply, {:ok, snapshot}, state}
    else
      {:reply, {:error, :invalid}, state}
    end
  end

  def handle_call(:dirt_factor, _, state = %State{btree: btree}) do
    {:reply, Btree.dirt_factor(btree), state}
  end

  def handle_call(:start_transaction, from, state = %State{writer: nil}) do
    %State{btree: btree} = state

    tx = %Tx{
      btree: btree,
      compacting: compaction_running?(state),
      owner: from,
      db: self()
    }

    {:reply, tx, %State{state | writer: from}}
  end

  def handle_call(:start_transaction, {pid, _}, state = %State{writer: {pid, _}}) do
    {:reply, {:error, :already_in_transaction}, state}
  end

  def handle_call(:start_transaction, from, state) do
    %State{write_queue: queue} = state
    {:noreply, %State{state | write_queue: :queue.in(from, queue)}}
  end

  def handle_call(:cancel_transaction, {pid, _}, state = %State{writer: {pid, _}}) do
    {:reply, :ok, advance_write_queue(state)}
  end

  def handle_call({:commit_transaction, %Tx{owner: owner}}, _, state = %State{writer: writer})
      when owner != writer do
    {:reply, {:error, :invalid_owner}, state}
  end

  def handle_call({:commit_transaction, tx}, {pid, _}, state = %State{writer: {pid, _}}) do
    %Tx{btree: btree, recompact: recompact} = tx
    %State{btree: current_btree, old_btrees: old_btrees} = state

    btree = if btree != current_btree, do: maybe_sync(Btree.commit(btree), state), else: btree

    %Btree{store: current_store} = current_btree
    %Btree{store: new_store} = btree

    # If store changed, this write is completing a compaction
    state =
      if new_store != current_store do
        trigger_clean_up(%State{
          state
          | btree: finalize_compaction(btree),
            old_btrees: [current_btree | old_btrees]
        })
      else
        %State{state | btree: btree}
      end

    state = advance_write_queue(state)

    state =
      if recompact do
        state = do_halt_compaction(state)
        trigger_compaction(state)
        state
      else
        maybe_auto_compact(state)
      end

    {:reply, :ok, state}
  end

  def handle_call({:validate_transaction, %Tx{owner: owner}}, _, state = %State{writer: owner}) do
    {:reply, :ok, state}
  end

  def handle_call({:validate_transaction, _}, _, state) do
    {:reply, :error, state}
  end

  def handle_call(:compact, _, state) do
    case trigger_compaction(state) do
      {:ok, compactor} ->
        {:reply, :ok, %State{state | compactor: compactor}}

      error ->
        {:reply, error, state}
    end
  end

  def handle_call({:set_auto_compact, setting}, _, state) do
    case parse_auto_compact(setting) do
      {:ok, setting} -> {:reply, :ok, %State{state | auto_compact: setting}}
      {:error, reason} -> {:reply, {:error, reason}, state}
    end
  end

  def handle_call({:set_auto_file_sync, bool}, _, state) do
    {:reply, :ok, %State{state | auto_file_sync: bool}}
  end

  def handle_call({:subscribe, pid}, _, state = %State{subs: subs}) do
    {:reply, :ok, %State{state | subs: [pid | subs]}}
  end

  def handle_call(:file_sync, _, state = %State{btree: btree}) do
    btree = Btree.sync(btree)
    {:reply, :ok, %State{state | btree: btree}}
  end

  def handle_call(:data_dir, _, state = %State{data_dir: data_dir}) do
    {:reply, data_dir, state}
  end

  def handle_call(:current_db_file, _, state = %State{btree: btree}) do
    %Btree{store: store} = btree
    %Store.File{file_path: file_path} = store
    {:reply, file_path, state}
  end

  def handle_call(:halt_compaction, _, state) do
    if compaction_running?(state) do
      state = state |> do_halt_compaction() |> trigger_clean_up()
      {:reply, :ok, state}
    else
      {:reply, {:error, :no_compaction_running}, state}
    end
  end

  def handle_call(:compacting?, _, state) do
    {:reply, compaction_running?(state), state}
  end

  def handle_info(message, state)
      when message == :compaction_completed or message == :catch_up_completed do
    for pid <- state.subs, do: send(pid, message)
    {:noreply, state}
  end

  def handle_info({:snapshot_timeout, ref}, state) do
    {:noreply, checkout_reader(ref, state)}
  end

  def handle_info({:DOWN, _ref, :process, pid, _reason}, state = %State{compactor: pid}) do
    {:noreply, %State{state | compactor: nil}}
  end

  def handle_info({:DOWN, _ref, :process, _pid, _reason}, state) do
    {:noreply, state}
  end

  @spec maybe_sync(Btree.t(), State.t()) :: Btree.t()

  defp maybe_sync(btree, %State{auto_file_sync: false}), do: btree
  defp maybe_sync(btree, %State{auto_file_sync: true}), do: Btree.sync(btree)

  @spec checkin_reader(reference, Btree.t(), State.t()) :: State.t()

  defp checkin_reader(ref, btree, state) do
    %State{readers: readers} = state
    %Btree{store: %Store.File{file_path: file_path}} = btree

    %State{state | readers: Map.put(readers, ref, file_path)}
  end

  @spec checkout_reader(reference, State.t()) :: State.t()

  defp checkout_reader(ref, state) do
    %State{readers: readers} = state

    case Map.pop(readers, ref) do
      {nil, _readers} ->
        state

      {_, readers} ->
        if state.clean_up_pending == true do
          trigger_clean_up(%State{state | readers: readers})
        else
          %State{state | readers: readers}
        end
    end
  end

  @spec find_db_file(String.t()) :: String.t() | nil | {:error, any}

  defp find_db_file(data_dir) do
    with :ok <- File.mkdir_p(data_dir),
         {:ok, files} <- File.ls(data_dir) do
      files
      |> Enum.filter(fn file_name ->
        cubdb_file?(file_name) && String.ends_with?(file_name, @db_file_extension)
      end)
      |> Enum.sort_by(&file_name_to_n/1)
      |> List.last()
    end
  end

  @spec trigger_compaction(State.t()) :: {:ok, pid} | {:error, any}

  defp trigger_compaction(state = %State{data_dir: data_dir, clean_up: clean_up}) do
    case compaction_running?(state) do
      false ->
        for pid <- state.subs, do: send(pid, :compaction_started)
        {:ok, store} = new_compaction_store(data_dir)
        CleanUp.clean_up_old_compaction_files(clean_up, store)

        with result <-
               Task.Supervisor.start_child(state.task_supervisor, Compactor, :run, [
                 self(),
                 store
               ]),
             {:ok, pid} <- result do
          Process.monitor(pid)
          result
        end

      true ->
        {:error, :pending_compaction}
    end
  end

  @spec finalize_compaction(Btree.t()) :: Btree.t()

  defp finalize_compaction(btree = %Btree{store: compacted_store}) do
    Btree.sync(btree)
    Store.close(compacted_store)

    new_path =
      String.replace_suffix(
        compacted_store.file_path,
        @compaction_file_extension,
        @db_file_extension
      )

    :ok = File.rename(compacted_store.file_path, new_path)

    {:ok, store} = Store.File.create(new_path)
    Btree.new(store)
  end

  @spec new_compaction_store(String.t()) :: {:ok, Store.t()} | {:error, any}

  defp new_compaction_store(data_dir) do
    with {:ok, file_names} <- File.ls(data_dir) do
      new_filename =
        file_names
        |> Enum.filter(&cubdb_file?/1)
        |> Enum.map(&file_name_to_n/1)
        |> Enum.sort()
        |> List.last()
        |> (&(&1 + 1)).()
        |> Integer.to_string(16)
        |> (&(&1 <> @compaction_file_extension)).()

      Store.File.create(Path.join(data_dir, new_filename))
    end
  end

  @spec compaction_running?(State.t()) :: boolean

  defp compaction_running?(%State{compactor: nil}), do: false

  defp compaction_running?(_), do: true

  @spec do_halt_compaction(State.t()) :: State.t()

  defp do_halt_compaction(state = %State{compactor: nil}), do: state

  defp do_halt_compaction(state = %State{compactor: pid}) do
    if pid != nil, do: Process.exit(pid, :halt)
    %State{state | compactor: nil}
  end

  @spec trigger_clean_up(State.t()) :: State.t()

  defp trigger_clean_up(state) do
    if can_clean_up?(state),
      do: clean_up_now(state),
      else: clean_up_when_possible(state)
  end

  @spec can_clean_up?(State.t()) :: boolean

  defp can_clean_up?(%State{btree: %Btree{store: store}, readers: readers}) do
    %Store.File{file_path: file_path} = store

    Enum.all?(readers, fn {_reader, file} ->
      file == file_path
    end)
  end

  @spec clean_up_now(State.t()) :: State.t()

  defp clean_up_now(state = %State{btree: btree, clean_up: clean_up}) do
    for old_btree <- state.old_btrees do
      if Btree.alive?(old_btree), do: :ok = Btree.stop(old_btree)
    end

    :ok = CleanUp.clean_up(clean_up, btree)
    for pid <- state.subs, do: send(pid, :clean_up_started)
    %State{state | clean_up_pending: false, old_btrees: []}
  end

  @spec clean_up_when_possible(State.t()) :: State.t()

  defp clean_up_when_possible(state) do
    %State{state | clean_up_pending: true}
  end

  @spec maybe_auto_compact(State.t()) :: State.t()

  defp maybe_auto_compact(state) do
    if should_auto_compact?(state) do
      case trigger_compaction(state) do
        {:ok, compactor} ->
          %State{state | compactor: compactor}

        {:error, _} ->
          state
      end
    else
      state
    end
  end

  @spec should_auto_compact?(State.t()) :: boolean

  defp should_auto_compact?(%State{auto_compact: false}), do: false

  defp should_auto_compact?(%State{btree: btree, auto_compact: auto_compact}) do
    {min_writes, min_dirt_factor} = auto_compact
    %Btree{dirt: dirt} = btree
    dirt_factor = Btree.dirt_factor(btree)
    dirt >= min_writes and dirt_factor >= min_dirt_factor
  end

  @spec advance_write_queue(State.t()) :: State.t()

  defp advance_write_queue(state) do
    %State{write_queue: queue, btree: btree} = state

    {writer, queue} =
      case :queue.out(queue) do
        {{:value, next}, queue} ->
          GenServer.reply(next, %Tx{
            btree: btree,
            compacting: compaction_running?(state),
            owner: next,
            db: self()
          })

          {next, queue}

        {:empty, queue} ->
          {nil, queue}
      end

    %State{state | writer: writer, write_queue: queue}
  end

  @spec parse_auto_compact(any) :: {:ok, false | {pos_integer, number}} | {:error, any}

  defp parse_auto_compact(setting) do
    case setting do
      false ->
        {:ok, false}

      true ->
        {:ok, @auto_compact_defaults}

      {min_writes, min_dirt_factor} when is_integer(min_writes) and is_number(min_dirt_factor) ->
        if min_writes >= 0 and min_dirt_factor >= 0 and min_dirt_factor <= 1,
          do: {:ok, {min_writes, min_dirt_factor}},
          else: {:error, "invalid auto compact setting"}

      _ ->
        {:error, "invalid auto compact setting"}
    end
  end

  @spec parse_auto_compact!(any) :: false | {pos_integer, number}

  defp parse_auto_compact!(setting) do
    case parse_auto_compact(setting) do
      {:ok, setting} -> setting
      {:error, reason} -> raise(ArgumentError, message: reason)
    end
  end

  @spec split_options(
          [option | {:data_dir, String.t()} | GenServer.option()]
          | String.t()
        ) :: {:ok, {String.t(), [option], GenServer.options()}} | {:error, term}

  defp split_options(data_dir) when is_binary(data_dir) do
    {:ok, {data_dir, [], []}}
  end

  defp split_options(data_dir_or_options) do
    case Keyword.pop(data_dir_or_options, :data_dir) do
      {nil, data_dir_or_options} ->
        try do
          {:ok, {to_string(data_dir_or_options), [], []}}
        rescue
          ArgumentError ->
            {:error, "Options must include :data_dir"}

          Protocol.UndefinedError ->
            {:error, "data_dir must be a string (or implement String.Chars)"}
        end

      {data_dir, options} ->
        {gen_server_opts, opts} =
          Keyword.split(options, [:name, :timeout, :spawn_opt, :hibernate_after, :debug])

        try do
          {:ok, {to_string(data_dir), opts, gen_server_opts}}
        rescue
          Protocol.UndefinedError ->
            {:error, "data_dir must be a string (or implement String.Chars)"}
        end
    end
  end
end