defmodule Cldr.AcceptLanguage do
@moduledoc """
Tokenizer and parser for HTTP `Accept-Language` header values as defined in
[rfc2616](https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4).
The Accept-Language request-header field is similar to Accept, but restricts
the set of natural languages that are preferred as a response to the request.
Language tags function are provided in `Cldr.LanguageTag`.
The format of an `Accept-Language` header is as follows in `ABNF` format:
Accept-Language = "Accept-Language" ":"
1#( language-range [ ";" "q" "=" qvalue ] )
language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
Each language-range MAY be given an associated quality value which represents an
estimate of the user's preference for the languages specified by that range. The
quality value defaults to "q=1". For example,
Accept-Language: da, en-gb;q=0.8, en;q=0.7
would mean: "I prefer Danish, but will accept British English and other types of English."
"""
alias Cldr.Locale
alias Cldr.LanguageTag
@default_quality 1.0
@low_quality 0.2
@doc """
Splits the language ranges for an `Accept-Language` header
value into tuples `{quality, language}`.
* `accept-language` is any string in the format defined by [rfc2616](https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4)
## Example
iex> Cldr.AcceptLanguage.tokenize "da,zh-TW;q=0.3"
[{1.0, "da"}, {0.3, "zh-tw"}]
"""
@spec tokenize(String.t()) :: [{float(), String.t()}, ...]
@language_separator ","
def tokenize(accept_language) do
accept_language
|> String.downcase()
|> remove_whitespace
|> String.split(@language_separator)
|> Enum.reject(&is_nil/1)
|> Enum.reject(&String.starts_with?(&1, "*"))
|> Enum.map(&token_tuple/1)
end
@quality_separator ";q="
defp token_tuple(language) do
case String.split(language, @quality_separator) do
[language, quality] ->
{parse_quality(quality), language}
[language] ->
{@default_quality, language}
[language | _rest] ->
{@low_quality, language}
end
end
@doc """
Parses an `Accept-Language` header value in its string
or tokenized form to return a tuple of the form
`{:ok, [{quality, %Cldr.LanguageTag{}}, ...]}` sorted by quality.
## Arguments
* `accept-language` is any string in the format defined by
[rfc2616](https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4)
* `backend` is any module that includes `use Cldr` and therefore
is a `Cldr` backend module
## Returns
* `{:ok, [{quality, language_tag}, ...]}` or
* `{:error, {Cldr.AcceptLanguageError, String.t}}`
If at least one valid language tag is found but errors are also
detected on one more more tags, an `{ok, list}` tuple is returned
wuth an error tuple for each invalid tag added at the end of the list.
## Example
iex> Cldr.AcceptLanguage.parse("da,zh-TW;q=0.3", TestBackend.Cldr)
{:ok,
[
{1.0,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "da",
cldr_locale_name: :da,
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "da",
locale: %{},
private_use: [],
rbnf_locale_name: :da,
requested_locale_name: "da",
script: :Latn,
territory: :DK,
transform: %{},
language_variants: []
}},
{0.3,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "zh-TW",
cldr_locale_name: :"zh-Hant",
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "zh",
locale: %{},
private_use: [],
rbnf_locale_name: :"zh-Hant",
requested_locale_name: "zh-TW",
script: :Hant,
territory: :TW,
transform: %{},
language_variants: []
}}
]}
iex> Cldr.AcceptLanguage.parse("invalid_tag", TestBackend.Cldr)
{:error,
{Cldr.LanguageTag.ParseError,
"Expected a BCP47 language tag. Could not parse the remaining \\"g\\" starting at position 11"}}
iex> Cldr.AcceptLanguage.parse("da,zh-TW;q=0.3,invalid_tag", TestBackend.Cldr)
{:ok,
[
{1.0,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "da",
cldr_locale_name: :da,
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "da",
locale: %{},
private_use: [],
rbnf_locale_name: :da,
requested_locale_name: "da",
script: :Latn,
territory: :DK,
transform: %{},
language_variants: []
}},
{0.3,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "zh-TW",
cldr_locale_name: :"zh-Hant",
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "zh",
locale: %{},
private_use: [],
rbnf_locale_name: :"zh-Hant",
requested_locale_name: "zh-TW",
script: :Hant,
territory: :TW,
transform: %{},
language_variants: []
}},
{:error,
{Cldr.LanguageTag.ParseError,
"Expected a BCP47 language tag. Could not parse the remaining \\"g\\" starting at position 11"}}
]}
"""
@spec parse([{float(), String.t()}, ...] | String.t(), Cldr.backend()) ::
{:ok,
[
{float(), LanguageTag.t()} | {:error, {Cldr.InvalidLanguageTag, String.t()}},
...
]}
| {:error, {Cldr.AcceptLanguageError, String.t()}}
def parse(tokens, backend) when is_list(tokens) do
accept_language =
tokens
|> parse_language_tags(backend)
|> sort_by_quality
case accept_language do
[error: reason] ->
{:error, reason}
_ ->
{:ok, accept_language}
end
end
def parse(string, backend) when is_binary(string) do
string
|> tokenize
|> parse(backend)
end
@doc """
Parses an `Accept-Language` header value in its string
or tokenized form to produce a list of tuples of the form
`[{quality, %Cldr.LanguageTag{}}, ...]` sorted by quality
in decending order.
* `accept-language` is any string in the format defined by [rfc2616](https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4)
Returns:
* `{:ok, [{quality, language_tag}, ...]}` or
* raises a `Cldr.AcceptLanguageError` exception
If at least one valid language tag is found but errors are also
detected on one more more tags, an `{ok, list}` tuple is returned
wuth an error tuple for each invalid tag added at the end of the list.
## Example
iex> Cldr.AcceptLanguage.parse!("da,zh-TW;q=0.3", TestBackend.Cldr)
[
{1.0,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "da",
cldr_locale_name: :da,
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "da",
locale: %{},
private_use: [],
rbnf_locale_name: :da,
requested_locale_name: "da",
script: :Latn,
territory: :DK,
transform: %{},
language_variants: []
}},
{0.3,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "zh-TW",
cldr_locale_name: :"zh-Hant",
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "zh",
locale: %{},
private_use: [],
rbnf_locale_name: :"zh-Hant",
requested_locale_name: "zh-TW",
script: :Hant,
territory: :TW,
transform: %{},
language_variants: []
}}
]
Cldr.AcceptLanguage.parse! "invalid_tag"
** (Cldr.AcceptLanguageError) "Expected a BCP47 language tag. Could not parse the remaining "g" starting at position 11
(ex_cldr) lib/cldr/accept_language.ex:304: Cldr.AcceptLanguage.parse!/1
iex> Cldr.AcceptLanguage.parse!("da,zh-TW;q=0.3,invalid_tag", TestBackend.Cldr)
[
{1.0,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "da",
cldr_locale_name: :da,
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "da",
locale: %{},
private_use: [],
rbnf_locale_name: :da,
requested_locale_name: "da",
script: :Latn,
territory: :DK,
transform: %{},
language_variants: []
}},
{0.3,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "zh-TW",
cldr_locale_name: :"zh-Hant",
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "zh",
locale: %{},
private_use: [],
rbnf_locale_name: :"zh-Hant",
requested_locale_name: "zh-TW",
script: :Hant,
territory: :TW,
transform: %{},
language_variants: []
}},
{:error,
{Cldr.LanguageTag.ParseError,
"Expected a BCP47 language tag. Could not parse the remaining \\"g\\" starting at position 11"}}
]
"""
def parse!(accept_language, backend) do
case parse(accept_language, backend) do
{:ok, parse_result} -> parse_result
{:error, {exception, reason}} -> raise exception, reason
end
end
@doc """
Parse an `Accept-Language` string and return the best match for
a configured `Cldr` locale.
* `accept_langauge` is a string representing an accept language header
Returns:
* `{:ok, language_tag}` or
* `{:error, reason}`
## Examples
iex> Cldr.AcceptLanguage.best_match("da;q=0.1,zh-TW;q=0.3", TestBackend.Cldr)
{:ok,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "zh-TW",
cldr_locale_name: :"zh-Hant",
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "zh",
locale: %{},
private_use: [],
rbnf_locale_name: :"zh-Hant",
requested_locale_name: "zh-TW",
script: :Hant,
territory: :TW,
transform: %{},
language_variants: []
}}
iex> Cldr.AcceptLanguage.best_match("da;q=0.1,zh-TW;q=0.3", TestBackend.Cldr)
{:ok,
%Cldr.LanguageTag{
backend: TestBackend.Cldr,
canonical_locale_name: "zh-TW",
cldr_locale_name: :"zh-Hant",
language_subtags: [],
extensions: %{},
gettext_locale_name: nil,
language: "zh",
locale: %{},
private_use: [],
rbnf_locale_name: :"zh-Hant",
requested_locale_name: "zh-TW",
script: :Hant,
territory: :TW,
transform: %{},
language_variants: []
}}
iex> Cldr.AcceptLanguage.best_match("xx,yy;q=0.3", TestBackend.Cldr)
{:error,
{Cldr.NoMatchingLocale,
"No configured locale could be matched to \\"xx,yy;q=0.3\\""}}
iex> Cldr.AcceptLanguage.best_match("invalid_tag", TestBackend.Cldr)
{:error, {Cldr.LanguageTag.ParseError,
"Expected a BCP47 language tag. Could not parse the remaining \\"g\\" starting at position 11"}}
"""
@spec best_match(String.t(), Cldr.backend()) ::
{:ok, LanguageTag.t()}
| {:error, {Cldr.AcceptLanguageError | Cldr.NoMatchingLocale, String.t()}}
def best_match(accept_language, backend) when is_binary(accept_language) do
with {:ok, languages} <- parse(accept_language, backend) do
candidates =
Enum.filter(languages, fn
{priority, %LanguageTag{cldr_locale_name: locale_name}}
when is_float(priority) and not is_nil(locale_name) ->
true
_ ->
false
end)
case candidates do
[{_priority, language_tag} | _] ->
{:ok, language_tag}
_ ->
{
:error,
{
Cldr.NoMatchingLocale,
"No configured locale could be matched to #{inspect(accept_language)}"
}
}
end
else
{:error, reason} -> {:error, reason}
end
end
@doc """
Filters the returned results of `parse/1` to return
only the error tuples.
## Example
iex> Cldr.AcceptLanguage.parse!("da,zh-TW;q=0.3,invalid_tag", TestBackend.Cldr)
...> |> Cldr.AcceptLanguage.errors
[
error: {Cldr.LanguageTag.ParseError,
"Expected a BCP47 language tag. Could not parse the remaining \\"g\\" starting at position 11"}
]
"""
@spec errors([tuple(), ...]) :: [{:error, {Cldr.InvalidLanguageTag, String.t()}}, ...]
def errors(parse_result) when is_list(parse_result) do
Enum.filter(parse_result, fn
{:error, _} -> true
_ -> false
end)
end
defp parse_quality(quality_string) do
case Float.parse(quality_string) do
:error -> @low_quality
{quality, _} -> quality
end
end
defp parse_language_tags(tokens, backend) do
Enum.map(tokens, fn {quality, language_tag} ->
case Locale.canonical_language_tag(language_tag, backend) do
{:ok, tag} ->
{quality, tag}
{:error, reason} ->
{:error, reason}
end
end)
end
defp remove_whitespace(accept_language) do
String.replace(accept_language, " ", "")
end
def sort_by_quality(tokens) do
Enum.sort(tokens, fn
{:error, _}, {_quality_2, _} -> false
{_quality_2, _}, {:error, _} -> true
{quality_1, _}, {quality_2, _} when quality_1 == quality_2 -> true
{quality_1, _}, {quality_2, _} -> quality_1 > quality_2
end)
end
end