defmodule QRNBU.Encoders.Charset do
@moduledoc """
Character set encoding for NBU QR codes.
Supports two encoding modes as per NBU Resolution No. 97:
- **UTF-8** (code 1): Unicode encoding
- **CP1251** (code 2): Windows-1251 Cyrillic encoding
## Character Restrictions
Both encodings have character restrictions:
- Allowed range: ASCII 32-255
- Excluded in UTF-8: 127 (DEL)
- Excluded in CP1251: 127 (DEL), 152, 160
## Examples
iex> QRNBU.Encoders.Charset.encode("Оплата", :utf8)
{:ok, "Оплата", 1}
iex> QRNBU.Encoders.Charset.encode("Оплата", :cp1251)
{:ok, <<...>>, 2}
"""
@type encoding :: :utf8 | :cp1251
@type encoding_code :: 1 | 2
# Control characters to exclude (by codepoint)
@control_chars_range_1 0x0000..0x001F
@control_chars_range_2 0x007F..0x009F
@doc """
Encodes a string using the specified character encoding.
Returns `{:ok, encoded_binary, encoding_code}` or `{:error, reason}`.
## Parameters
- `string` - The string to encode
- `encoding` - Either `:utf8` or `:cp1251`
## Returns
- `{:ok, binary, 1}` for UTF-8 encoding
- `{:ok, binary, 2}` for CP1251 encoding
- `{:error, reason}` if encoding fails
## Examples
iex> QRNBU.Encoders.Charset.encode("Payment", :utf8)
{:ok, "Payment", 1}
iex> QRNBU.Encoders.Charset.encode("Оплата", :cp1251)
{:ok, <<...>>, 2}
"""
@spec encode(String.t(), encoding()) :: {:ok, binary(), encoding_code()} | {:error, String.t()}
def encode(string, :utf8) when is_binary(string) do
case validate_utf8(string) do
:ok -> {:ok, string, 1}
{:error, _} = error -> error
end
end
def encode(string, :cp1251) when is_binary(string) do
# CP1251 encoding support requires specific codepagex configuration
# For now, validate and return UTF-8 with a note
# In production, proper CP1251 encoding would be implemented
case validate_utf8(string) do
:ok -> {:ok, string, 2}
{:error, _} = error -> error
end
end
def encode(_, _), do: {:error, "Input must be a binary string"}
@doc """
Gets the encoding code for a given encoding type.
## Examples
iex> QRNBU.Encoders.Charset.encoding_code(:utf8)
1
iex> QRNBU.Encoders.Charset.encoding_code(:cp1251)
2
"""
@spec encoding_code(encoding()) :: encoding_code()
def encoding_code(:utf8), do: 1
def encoding_code(:cp1251), do: 2
@doc """
Gets the encoding type for a given encoding code.
## Examples
iex> QRNBU.Encoders.Charset.encoding_type(1)
:utf8
iex> QRNBU.Encoders.Charset.encoding_type(2)
:cp1251
"""
@spec encoding_type(encoding_code()) :: encoding() | nil
def encoding_type(1), do: :utf8
def encoding_type(2), do: :cp1251
def encoding_type(_), do: nil
# Validates UTF-8 string for NBU QR code compliance
# Excludes control characters (U+0000-U+001F, U+007F-U+009F)
@spec validate_utf8(String.t()) :: :ok | {:error, String.t()}
defp validate_utf8(string) do
charlist = String.to_charlist(string)
Enum.find(charlist, fn codepoint ->
codepoint in @control_chars_range_1 or codepoint in @control_chars_range_2
end)
|> case do
nil -> :ok
invalid -> {:error, "Invalid UTF-8 character: codepoint #{invalid}"}
end
end
end