lib/bumblebee/text/blenderbot_tokenizer.ex

defmodule Bumblebee.Text.BlenderbotTokenizer do
  @moduledoc """
  Blenderbot tokenizer.
  """

  @behaviour Bumblebee.Text.Conversation

  import Bumblebee.Shared

  tokenizer_impl(
    special_tokens: %{
      unk: "<unk>",
      bos: "<s>",
      eos: "</s>",
      pad: "<pad>",
      sep: "</s>",
      cls: "<s>",
      mask: "<mask>"
    }
  )

  @impl true
  def conversation_history_to_text(_tokenizer, history) do
    history
    |> Enum.reverse()
    |> Enum.map_join("  ", fn
      # The model generates a leading space, for user inputs we add one
      {:user, text} -> " " <> text
      {:generated, text} -> text
    end)
  end
end