lib/bumblebee/text/llama_tokenizer.ex
defmodule Bumblebee.Text.LlamaTokenizer do
@moduledoc """
Llama tokenizer.
"""
import Bumblebee.Shared
tokenizer_impl(
special_tokens: %{
eos: "</s>",
unk: "<unk>",
sep: "</s>",
# Llama doesn't originally have a pad token, however when necessary
# we pad with the EOS token
pad: "</s>"
}
)
end