defmodule ExBuffer do
@moduledoc """
An `ExBuffer` is a process that maintains a collection of items and flushes
them once certain conditions have been met.
An `ExBuffer` can flush based on a timeout, a maximum length (item count), a
maximum byte size, or a combination of the three. When multiple conditions are
used, the `ExBuffer` will flush when the **first** condition is met.
`ExBuffer` also includes a number of helpful tools for testing and debugging.
"""
alias ExBuffer.Partition
@supervisor_fields [:name, :partitioner, :partitions]
################################
# Callbacks
################################
@doc """
Invoked to flush an `ExBuffer`.
The first argument (`data`) is a list of items inserted into the `ExBuffer` and the
second argument (`opts`) is a keyword list of flush options. See the `:flush_callback`
and `:flush_meta` options for `ExBuffer.start_link/2` for more information.
This callback can return any term as the return value is disregarded by the `ExBuffer`.
This callback is required.
"""
@callback handle_flush(data :: list(), opts :: keyword()) :: term()
@doc """
Invoked to determine the size of an inserted item.
The only argument (`item`) is any term that was inserted into the `ExBuffer`.
This callback must return a non-negative integer representing the item's byte size.
This callback is optional. See the `:size_callback` option for `ExBuffer.start_link/2`
for information about the default implementation.
"""
@callback handle_size(item :: term()) :: non_neg_integer()
@optional_callbacks handle_size: 1
################################
# Public API
################################
@doc false
@spec child_spec(keyword()) :: map()
def child_spec(opts) do
%{id: __MODULE__, start: {__MODULE__, :start_link, [opts]}}
end
@doc """
Starts an `ExBuffer` process linked to the current process.
The first argument (`module`) is optional. It is intended to be used when calling
this function from a module that implements the `ExBuffer` behaviour. When a module
is passed, it may interact with the options that were passed in:
* If the module implements the `handle_flush/2` callback, it will override the
`:flush_callback` option.
* If the module implements the `handle_size/1` callback, it will override the
`:size_callback` option.
* If a `:name` option is not present, the module name will be used.
## Options
An `ExBuffer` can be started with the following options:
* `:flush_callback` - The function that will be invoked to handle a flush.
This function should expect two parameters: a list of items and a keyword
list of flush opts. The flush opts include the size and length of the buffer
at the time of the flush, the partition index of the flushed buffer, and any
provided metadata (see `:flush_meta` for more information). This function can
return any term as the return value is disregarded by the `ExBuffer`. (Required)
* `:buffer_timeout` - A non-negative integer representing the maximum time
(in ms) allowed between flushes of the `ExBuffer`. Once this amount of time
has passed, the `ExBuffer` will be flushed. By default, an `ExBuffer` does not
have a timeout. (Optional)
* `:flush_meta` - A term to be included in the flush opts under the `meta` key.
By default, this value will be `nil`. (Optional)
* `:jitter_rate` - A float between 0 and 1 that is used to offset the limits of
`ExBuffer` partitions. Limits are **decreased** by a random rate between 0 and this
value. By default, no jitter is applied to an `ExBuffer`. (Optional)
* `:max_length` - A non-negative integer representing the maximum allowed
length (item count) of the `ExBuffer`. Once the limit is hit, the `ExBuffer` will
be flushed. By default, an `ExBuffer` does not have a max length. (Optional)
* `:max_size` - A non-negative integer representing the maximum allowed size
(in bytes) of the `ExBuffer`. Once the limit is hit (or exceeded), the `ExBuffer`
will be flushed. The `:size_callback` option determines how item size is computed.
By default, an `ExBuffer` does not have a max size. (Optional)
* `:name` - The registered name for the `ExBuffer`. This must be either an atom or a
`:via` tuple. By default (when an implementation module is not used), the name of an
`ExBuffer` is `ExBuffer`. (Optional)
* `:partitioner` - The strategy for assigning items to a partition. The partitioner
can be either `:rotating` or `:random`. The former assigns items to partitions in a
round-robin fashion and the latter assigns items randomly. By default, an `ExBuffer`
uses a `:rotating` partitioner. (Optional)
* `:partitions` - The number of partitions for the `ExBuffer`. By default, an `ExBuffer`
has 1 partition. (Optional)
* `:size_callback` - The function that will be invoked to determine the size of an item.
This function should expect a single parameter representing an item and should return
a single non-negative integer representing that item's byte size. The default
`ExBuffer` size callback is `Kernel.byte_size/1` (`:erlang.term_to_binary/1` is used
to convert non-bitstring inputs to binary if necessary). (Optional)
Additionally, an ExBuffer can also be started with any `GenServer` options.
"""
@spec start_link(module() | nil, keyword()) :: Supervisor.on_start()
def start_link(module \\ nil, opts) do
opts = maybe_update_opts(opts, module)
with {:ok, partitions} <- validate_partitions(opts),
{:ok, partitioner} <- validate_partitioner(opts),
{:ok, _} = result <- do_start_link(opts) do
partitioner = build_partitioner(partitions, partitioner)
name = Keyword.get(opts, :name)
put_buffer(name, partitioner, partitions)
result
end
end
@doc """
Dumps the contents of the given `ExBuffer` to a list, bypassing a flush
callback and resetting the buffer.
While this functionality may occasionally be desriable in a production environment,
it is intended to be used primarily for testing and debugging.
## Options
An `ExBuffer` can be dumped with the following options:
* `:partition` - A non-negative integer representing the specific partition index to
dump. By default, this function dumps all partitions and concatenates the results
together. (Optional)
## Examples
iex> ExBuffer.insert(ExBuffer, "foo")
iex> ExBuffer.insert(ExBuffer, "bar")
iex> ExBuffer.dump(ExBuffer)
["foo", "bar"]
iex> ExBuffer.insert(ExBuffer, "foo")
iex> ExBuffer.insert(ExBuffer, "bar")
iex> ExBuffer.dump(ExBuffer, partition: 0)
["foo"]
"""
@spec dump(PartitionSupervisor.name(), keyword()) :: list()
def dump(buffer, opts \\ []) do
with {:ok, {_, parts}} <- fetch_buffer(buffer),
{:ok, part} <- validate_partition(opts, parts) do
Enum.reduce(enumerate_parts(parts, part), [], fn part, acc ->
acc ++ do_part(buffer, part, &Partition.dump/1)
end)
else
{:error, reason} -> raise(ArgumentError, to_message(reason))
end
end
@doc """
Flushes the given `ExBuffer`, regardless of whether or not the flush conditions
have been met.
While this functionality may occasionally be desriable in a production environment,
it is intended to be used primarily for testing and debugging.
## Options
An `ExBuffer` can be flushed with the following options:
* `:mode` - A value denoting whether the flush will be synchronous or asynchronous.
Possible values are ':sync` and `:async`. By default, this value is `:async`.
(Optional)
* `:partition` - A non-negative integer representing the specific partition index to
flush. By default, this function flushes all partitions. (Optional)
## Example
iex> ExBuffer.insert(ExBuffer, "foo")
iex> ExBuffer.insert(ExBuffer, "bar")
...>
...> # Invokes flush callback on ["foo"] and then on ["bar"]
iex> ExBuffer.flush(ExBuffer)
:ok
iex> ExBuffer.insert(ExBuffer, "foo")
iex> ExBuffer.insert(ExBuffer, "bar")
...>
...> # Invokes flush callback on ["foo"]
iex> ExBuffer.flush(ExBuffer, partition: 0)
:ok
"""
@spec flush(GenServer.server(), keyword()) :: :ok
def flush(buffer, opts \\ []) do
with {:ok, {_, parts}} <- fetch_buffer(buffer),
{:ok, part} <- validate_partition(opts, parts) do
Enum.each(enumerate_parts(parts, part), fn part ->
do_part(buffer, part, &Partition.flush(&1, opts))
end)
else
{:error, reason} -> raise(ArgumentError, to_message(reason))
end
end
@doc """
Returns information about the given `ExBuffer`.
This function returns a map per partition with the following keys:
* `:length` - The number of items in the `ExBuffer` partition.
* `:max_length` - The maximum length of the `ExBuffer` partition after applying the
`:jitter_rate`.
* `:max_size` - the maximum byte-size of the `ExBuffer` partition after applying the
`:jitter_rate`.
* `:next_flush` - The amount of time (in ms) until the next scheduled flush of the
`ExBuffer` partition (or `nil` if the `ExBuffer` was started without a time limit).
* `:partition` - The index of the `ExBuffer` partition.
* `:size` - The byte-size of the `ExBuffer` partition.
* `:timeout` - The maximum amount of time (in ms) allowed between flushes of the
`ExBuffer` partition after applying the `:jitter_rate`.
While this functionality may occasionally be desriable in a production environment,
it is intended to be used primarily for testing and debugging.
## Options
The information about an `ExBuffer` can be retrieved with the following options:
* `:partition` - A non-negative integer representing the specific partition index to
retrieve information for. By default, this function retrieves information for all
partitions. (Optional)
## Examples
iex> ExBuffer.insert(ExBuffer, "foo")
iex> [%{length: length}, %{}] = ExBuffer.info(ExBuffer)
iex> length
1
iex> ExBuffer.insert(ExBuffer, "foo")
iex> [%{length: length, partition: 0}] = ExBuffer.info(ExBuffer, partition: 0)
iex> length
1
"""
@spec info(GenServer.server(), keyword()) :: [map()]
def info(buffer, opts \\ []) do
with {:ok, {_, parts}} <- fetch_buffer(buffer),
{:ok, part} <- validate_partition(opts, parts) do
Enum.map(enumerate_parts(parts, part), fn part ->
do_part(buffer, part, &Partition.info/1)
end)
else
{:error, reason} -> raise(ArgumentError, to_message(reason))
end
end
@doc """
Inserts the given item into the given `ExBuffer` based on the partitioner that the
given `ExBuffer` was started with.
## Example
iex> ExBuffer.insert(ExBuffer, "foo")
:ok
"""
@spec insert(GenServer.server(), term()) :: :ok
def insert(buffer, item) do
case fetch_buffer(buffer) do
{:ok, {partitioner, _}} -> do_part(buffer, partitioner.(), &Partition.insert(&1, item))
{:error, reason} -> raise(ArgumentError, to_message(reason))
end
end
@doc """
Inserts the given batch of items into the given `ExBuffer` based on the partitioner that
the given `ExBuffer` was started with. This function returns the number of items that were
inserted.
All items in the batch are inserted into the same partition.
> #### Tip {: .tip}
>
> When inserting multiple items into an `ExBuffer`, this function will be far more performant
> than calling `ExBuffer.insert/2` for each one. As such, whenever items become available in
> batches, this function should be preferred.
## Options
A batch of items can be inserted into an `ExBuffer` with the following options:
* `:flush_mode` - A value denoting whether how buffer will be flushed (if applicable).
Possible values are `:sync` and `:async`. By default, this value is `:sync`, meaning
that, if a flush condition is met while inserting items, the `ExBuffer` partition will
synchronously flush before continuing to insert items. If this value is set to `:async`,
all items will be inserted before checking if any flush conditions have been met.
Afterwards, if a flush condition has been met, the `ExBuffer` partition will be flushed
asynchronously. (Optional)
## Example
iex> ExBuffer.insert_batch(ExBuffer, ["foo", "bar", "baz"])
3
"""
@spec insert_batch(GenServer.server(), Enumerable.t(), keyword()) :: non_neg_integer()
def insert_batch(buffer, items, opts \\ []) do
case fetch_buffer(buffer) do
{:ok, {partitioner, _}} ->
do_part(buffer, partitioner.(), &Partition.insert_batch(&1, items, opts))
{:error, reason} ->
raise(ArgumentError, to_message(reason))
end
end
@doc false
@spec __using__(keyword()) :: Macro.t()
defmacro __using__(_opts) do
quote location: :keep do
@behaviour ExBuffer
if Module.get_attribute(__MODULE__, :doc) == nil do
@doc """
Returns a specification to start this ExBuffer under a supervisor.
See `Supervisor`.
"""
end
def child_spec(opts) do
%{id: __MODULE__, start: {__MODULE__, :start_link, [opts]}}
end
defoverridable(child_spec: 1)
end
end
################################
# Private API
################################
defguardp is_valid_part(part, parts) when part == :all or (part >= 0 and part < parts)
defp maybe_update_opts(opts, nil), do: Keyword.put_new(opts, :name, __MODULE__)
defp maybe_update_opts(opts, module) do
opts
|> Keyword.put_new(:name, module)
|> maybe_update_flush_callback(module)
|> maybe_update_size_callback(module)
end
defp maybe_update_flush_callback(opts, module) do
if function_exported?(module, :handle_flush, 2) do
Keyword.put(opts, :flush_callback, &module.handle_flush/2)
else
opts
end
end
defp maybe_update_size_callback(opts, module) do
if function_exported?(module, :handle_size, 1) do
Keyword.put(opts, :size_callback, &module.handle_size/1)
else
opts
end
end
defp validate_partitioner(opts) do
case Keyword.get(opts, :partitioner, :rotating) do
partitioner when partitioner in [:random, :rotating] -> {:ok, partitioner}
_ -> {:error, :invalid_partitioner}
end
end
defp validate_partitions(opts) do
case Keyword.get(opts, :partitions, 1) do
parts when is_integer(parts) and parts > 0 -> {:ok, parts}
_ -> {:error, :invalid_partitions}
end
end
defp validate_partition(opts, partitions) do
case Keyword.get(opts, :partition, :all) do
part when is_valid_part(part, partitions) -> {:ok, part}
_ -> {:error, :invalid_partition}
end
end
defp do_start_link(opts) do
{sup_opts, buffer_opts} = Keyword.split(opts, @supervisor_fields)
with_args = fn [opts], part -> [Keyword.put(opts, :partition, part)] end
child_spec = {Partition, buffer_opts}
sup_opts
|> Keyword.merge(with_arguments: with_args, child_spec: child_spec)
|> PartitionSupervisor.start_link()
end
defp build_partitioner(1, _), do: fn -> 0 end
defp build_partitioner(partitions, :random) do
fn -> :rand.uniform(partitions) - 1 end
end
defp build_partitioner(partitions, :rotating) do
atomics_ref = :atomics.new(1, [])
fn ->
case :atomics.add_get(atomics_ref, 1, 1) do
part when part > partitions ->
:atomics.put(atomics_ref, 1, 0)
0
part ->
part - 1
end
end
end
defp put_buffer(buffer, partitioner, partitions) do
buffer
|> build_key()
|> :persistent_term.put({partitioner, partitions})
end
defp fetch_buffer(buffer) do
buffer
|> build_key()
|> :persistent_term.get(nil)
|> case do
nil -> {:error, :buffer_not_found}
buffer -> {:ok, buffer}
end
end
defp build_key(buffer), do: {__MODULE__, buffer}
defp enumerate_parts(parts, :all), do: 0..(parts - 1)
defp enumerate_parts(_, part), do: [part]
defp do_part(buffer, partition, fun) do
buffer
|> partition_name(partition)
|> fun.()
end
defp partition_name(buffer, partition) do
{:via, PartitionSupervisor, {buffer, partition}}
end
defp to_message(reason), do: String.replace(to_string(reason), "_", " ")
end