lib/ex_zstd.ex

defmodule ExZstd do
  @moduledoc """
  ExZstd aims to expose ZSTD stable interfaces in an idiomatic Elixir way.
  If you have any doubts the official documentation resides here: https://github.com/facebook/zstd/blob/v1.5.5/lib/zstd.h.
  The naming used in this wrapper library aims to reflect upstream closely where possible.

  ### Simple API
  99% of users only needs this API, it's composed by two methods: `compress` and `decompress` that do what they say on the tin.
  Note that decompress reads the final size from the header, so only pass trusted input to this function. To decompress untrused input see
  `decompress_stream` under the stream API section.

  ### Context API
  This API allows the caller to reuse the same context for multiple compression and specify advanced parameters, see the `zstd.h` header to
  know more. Your code will probably looks simlar to the follwing:
  ```elixir
  alias ExZstd.CCtx
  alias ExZstd.Cparam
  alias ExZstd.Cparam.Strategy
  alias ExZstd.DCtx

  data = :crypto.strong_rand_bytes(16)

  cctx = CCtx.create()
    |> CCtx.set_cparam(Cparam.compression_level, 3)
    |> CCtx.set_cparam(Cparam.strategy, Strategy.btopt)

  compressed_data = ExZstd.CCtx.compress!(cctx, data)

  dctx = DCtx.create()
  original_data = ExZstd.DCtx.decompress(dctx, compressed_data)

  data == original_data
  ```

  *Important note*: the various context API that set/get parameters will modify the underlying context object, *not* create a new one. So the two following snippets both modify `cctx`:
  ```elixir
  # first
  cctx = CCtx.new()
  CCtx.set_cparam(cctx, param.compression_level, 3)

  #second
  cctx = CCtx.new() |> CCtx.set_cparam(Cparam.compression_level, 3)
  ```

  ### Threads
  - 1 context = 1 thread AND no simultaneous compressions
  - If you pass the right parameters you'll be able to use the multithreading capabilities of zstd. The thread pool would live outside the BEAM so avoid it unless absolutely necessary.

  ### Stream API
  The stream API allows you to compress and decompress multiple data in frames lazily. Use this api if:
  - you want to compress complex data (e.g. iolists) without transforming it into a binary first.
  - you want to decompress untrusted data
  - you want to control how the input is split into multiple frames

  Example:

  ```elixir
  alias ExZstd.Cctx
  alias ExZstd.Dctx
  alias ExZstd.Cparam
  alias ExZstd.Dparam

  cctx = CCtx.create()
  dctx = DCtx.create()

  # compress the data
  first_chunk = CCtx.compress_stream!(cctx, Fixtures.data(), CCtx.stream_continue())
  second_chunk = CCtx.compress_stream!(cctx, Fixtures.data2(), CCtx.stream_end())

  # retreive the input
  DCtx.decompress_stream!(dctx, first_chunk) <> DCtx.decompress_stream!(dctx, second_chunk)
  ```

  ## Dictionary API

  You can use the dictionary API to further improve compression size.
  Excerpts from the `zdict.h` header:
  - In general, it's recommended to provide a few thousands samples, though this can vary a lot.
  - It's recommended that total size of all samples be about ~x100 times the target size of dictionary.

  You can train the dictionaries by using the `zstd --train` interface of the CLI. At the moment this interface is *not* tested as part of `ex_zstd` test suite.

  Example:

  ```elixir
  alias ExZstd.CCtx
  alias ExZstd.Cparam
  alias ExZstd.Cparam.Strategy
  alias ExZstd.DCtx

  data = :crypto.strong_rand_bytes(16)
  dictionary = File.read!("my_dictionary.bin")

  cctx = CCtx.create()
  CCtx.load_dictionary(dictionary)

  compressed_data = ExZstd.CCtx.compress!(cctx, data)

  dctx = DCtx.create()
  DCtx.load_dictionary(dictionary)
  original_data = ExZstd.DCtx.decompress(dctx, compressed_data)

  data == original_data
  ```

  """

  alias ExZstd.CCtx
  alias ExZstd.Cparam
  alias ExZstd.DCtx
  alias ExZstd.Nif

  @typep maybe(t, err) :: {:ok, t} | {:error, err}

  @type decompression_error() ::
          error()
          | :content_size_unknown
          | :content_size_error

  @type error() :: String.t()

  @type reset_directive() :: 1 | 2 | 3

  @spec reset_session_only() :: 1
  def reset_session_only, do: 1

  @spec reset_parameters() :: 2
  def reset_parameters, do: 2

  @spec reset_session_and_parameters() :: 3
  def reset_session_and_parameters, do: 3

  @doc """
  Compresses a chunk of data.
  """
  @spec compress!(binary(), integer()) :: binary()
  def compress!(data, level \\ default_compression_level()) do
    CCtx.new()
    |> CCtx.set_cparam(Cparam.compression_level(), level)
    |> CCtx.compress!(data)
  end

  @doc """
  Identical to `compress!`. Wraps the result in a maybe instead of raising.
  """
  @spec compress(binary(), integer()) :: maybe(binary(), error())
  def compress(data, level \\ default_compression_level()) do
    {:ok, compress!(data, level)}
  rescue
    error -> {:error, Exception.message(error)}
  end

  @spec default_compression_level() :: 3
  def default_compression_level, do: 3

  @doc """
  Decompresses a chunk of data. If `:content_size_unknown` is returned stream decompression can be still attempted.

  Note: this functions reads the header to find out the size of the decompressed data. Only feed it trusted input!
  """
  @spec decompress!(binary()) :: binary()
  def decompress!(data) do
    DCtx.decompress!(DCtx.new(), data)
  end

  @doc """
  Identical to `decompress!`. Wraps the result in a maybe instead of raising.

  Note: this functions reads the header to find out the size of the decompressed data. Only feed it trusted input!
  """
  @spec decompress(binary()) :: maybe(binary(), decompression_error())
  def decompress(data) do
    {:ok, decompress!(data)}
  rescue
    ExZstd.ContentSizeError -> {:error, :content_size_error}
    ExZstd.ContentSizeUnknown -> {:error, :content_size_unknown}
    error -> {:error, Exception.message(error)}
  end

  @doc """
  Returns the boundaries for a compresson parameter.
  """
  @spec get_cparam_bounds(pos_integer()) :: {integer, integer}
  defdelegate get_cparam_bounds(cparam), to: Nif

  @doc """
  Returns the boundaries of a decompression parameter.
  """
  @spec get_dparam_bounds(pos_integer()) :: {integer, integer}
  defdelegate get_dparam_bounds(cparam), to: Nif

  @doc """
  Returns ZSTD_CONTENTSIZE_UNKNOWN.
  """
  @spec content_size_unknown() :: pos_integer()
  defdelegate content_size_unknown, to: Nif

  @doc false
  @spec get_frame_content_size(binary()) :: pos_integer()
  defdelegate get_frame_content_size(data), to: Nif
end