lib/bumblebee/text/xlm_roberta_tokenizer.ex

defmodule Bumblebee.Text.XlmRobertaTokenizer do
  @moduledoc """
  XLM-RoBERTa tokenizer.
  """

  import Bumblebee.Shared

  tokenizer_impl(
    special_tokens: %{
      bos: "<s>",
      eos: "</s>",
      unk: "<unk>",
      sep: "</s>",
      pad: "<pad>",
      cls: "<s>",
      mask: "<mask>"
    }
  )
end