# MIT License
#
# Copyright (c) 2019-2023 Knoxen
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
defmodule Puid.Chars do
@moduledoc """
Pre-defined character sets for use when creating `Puid` modules.
## Example
iex> defmodule(AlphanumId, do: use(Puid, chars: :alphanum))
## Pre-defined Chars
### :alpha
Upper/lower case alphabet
```none
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
```
bits per character: `5.7`
### :alpha_lower
Lower case alphabet
```none
abcdefghijklmnopqrstuvwxyz
```
bits per character: `4.7`
### :alpha_upper
Upper case alphabet
```none
ABCDEFGHIJKLMNOPQRSTUVWXYZ
```
bits per character: `4.7`
### :alphanum
Upper/lower case alphabet and numbers
```none
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789
```
bits per character: `5.95`
### :alphanum_lower
Lower case alphabet and numbers
```none
abcdefghijklmnopqrstuvwxyz0123456789
```
bits per character: `5.17`
### :alphanum_upper
Upper case alphabet and numbers
```none
ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789
```
bits per character: `5.17`
### :base16
[RFC 4648](https://tools.ietf.org/html/rfc4648#section-8) base16 character set
```
0123456789ABCDEF
```
bits per character: `4`
### :base32
[RFC 4648](https://tools.ietf.org/html/rfc4648#section-6) base32 character set
```none
ABCDEFGHIJKLMNOPQRSTUVWXYZ234567
```
bits per character: `5`
### :base32_hex
[RFC 4648](https://tools.ietf.org/html/rfc4648#section-7) base32 extended hex character set
with lowercase letters
```none
0123456789abcdefghijklmnopqrstuv
```
bits per character: `5`
### :base32_hex_upper
[RFC 4648](https://tools.ietf.org/html/rfc4648#section-7) base32 extended hex character set
```none
0123456789ABCDEFGHIJKLMNOPQRSTUV
```
bits per character: `5`
### :crockford32
[Crockford 32](https://www.crockford.com/base32.html)
```none
0123456789ABCDEFGHJKMNPQRSTVWXYZ
```
### :decimal
Decimal digits
```none
0123456789
```
bits per character: `3.32`
### :hex
Lowercase hexadecimal
```none
0123456789abcdef
```
bits per character: `4`
### :hex_upper
Uppercase hexadecimal
```none
0123456789ABCDEF
```
bits per character: `4`
### :safe_ascii
ASCII characters from `?!` to `?~`, minus backslash, backtick, single-quote and double-quote
```none
`!#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_\abcdefghijklmnopqrstuvwxyz{|}~`
```
bits per character: `6.49`
### :safe32
Strings that don't look like English words and are easier to parse visually
```none
2346789bdfghjmnpqrtBDFGHJLMNPQRT
```
- remove all upper and lower case vowels (including y)
- remove all numbers that look like letters
- remove all letters that look like numbers
- remove all letters that have poor distinction between upper and lower case values
bits per character: `6.49`
### :safe64
[RFC 4648](https://tools.ietf.org/html/rfc4648#section-5) file system and URL safe character set
```none
ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_
```
bits per character: `6`
### :symbol
:safe_ascii characters not in :alphanum
```none
`!#$%&()*+,-./:;<=>?@[]^_{|}~`
```
bits per character: `4.81`
### :wordSafe32
Strings that don't look like English words
```none
23456789CFGHJMPQRVWXcfghjmpqrvwx
```
Origin unknown
bits per character: `6.49`
"""
@typedoc """
Chars can be designated by a pre-defined atom, a binary or a charlist
"""
@type puid_chars() :: atom() | String.t() | charlist()
@typedoc """
Character encoding scheme. `:ascii` encoding uses cross-product character pairs.
"""
@type puid_encoding() :: :ascii | :utf8
##
## Chars count max is 256 due to optimized bit slicing scheme
##
@chars_count_max 256
## -----------------------------------------------------------------------------------------------
## `charlist` of characters
## -----------------------------------------------------------------------------------------------
@doc """
`charlist` for a pre-defined `Puid.Chars`, a String.t() or a charlist.
The characters for either String.t() or charlist types must be unique, have more than one
character, and not be invalid ascii.
## Example
iex> Puid.Chars.charlist(:safe32)
{:ok, ~c"2346789bdfghjmnpqrtBDFGHJLMNPQRT"}
iex> Puid.Chars.charlist("dingosky")
{:ok, ~c"dingosky"}
iex> Puid.Chars.charlist("unique")
{:error, "Characters not unique"}
"""
@spec charlist(puid_chars()) :: {:ok, charlist()} | Puid.Error.t()
def charlist(chars) do
try do
{:ok, charlist!(chars)}
rescue
error in Puid.Error ->
{:error, error.message()}
end
end
@doc """
Same as `charlist/1` but either returns __charlist__ or raises a `Puid.Error`
## Example
iex> Puid.Chars.charlist!(:safe32)
~c"2346789bdfghjmnpqrtBDFGHJLMNPQRT"
iex> Puid.Chars.charlist!("dingosky")
~c"dingosky"
iex> Puid.Chars.charlist!("unique")
# (Puid.Error) Characters not unique
"""
@spec charlist!(puid_chars()) :: charlist() | Puid.Error.t()
def charlist!(chars)
def charlist!(:alpha), do: charlist!(:alpha_upper) ++ charlist!(:alpha_lower)
def charlist!(:alpha_lower), do: Enum.to_list(?a..?z)
def charlist!(:alpha_upper), do: Enum.to_list(?A..?Z)
def charlist!(:alphanum), do: charlist!(:alpha) ++ charlist!(:decimal)
def charlist!(:alphanum_lower), do: charlist!(:alpha_lower) ++ charlist!(:decimal)
def charlist!(:alphanum_upper), do: charlist!(:alpha_upper) ++ charlist!(:decimal)
def charlist!(:base32), do: charlist!(:alpha_upper) ++ ~c"234567"
def charlist!(:base32_hex), do: charlist!(:decimal) ++ Enum.to_list(?a..?v)
def charlist!(:base32_hex_upper), do: charlist!(:decimal) ++ Enum.to_list(?A..?V)
def charlist!(:crockford32), do: charlist!(:decimal) ++ (charlist!(:alpha_upper) -- ~c"ILOU")
def charlist!(:decimal), do: Enum.to_list(?0..?9)
def charlist!(:hex), do: charlist!(:decimal) ++ Enum.to_list(?a..?f)
def charlist!(:hex_upper), do: charlist!(:decimal) ++ Enum.to_list(?A..?F)
def charlist!(:safe_ascii), do: ?!..?~ |> Enum.filter(&safe_ascii?(&1))
def charlist!(:safe32), do: ~c"2346789bdfghjmnpqrtBDFGHJLMNPQRT"
def charlist!(:safe64),
do: charlist!(:alpha_upper) ++ charlist!(:alpha_lower) ++ charlist!(:decimal) ++ ~c"-_"
def charlist!(:symbol) do
alphanum = charlist!(:alphanum)
:safe_ascii |> charlist!() |> Enum.filter(&(!Enum.member?(alphanum, &1)))
end
def charlist!(:wordSafe32), do: ~c"23456789CFGHJMPQRVWXcfghjmpqrvwx"
def charlist!(charlist) when is_atom(charlist),
do: raise(Puid.Error, "Invalid pre-defined charlist: :#{charlist}")
def charlist!(chars) when is_binary(chars),
do: chars |> to_charlist() |> validate_charlist()
def charlist!(charlist) when is_list(charlist), do: validate_charlist(charlist)
@doc false
@spec encoding(charlist() | String.t()) :: puid_encoding()
def encoding(charlist_or_chars)
def encoding(chars) when is_binary(chars) do
chars |> to_charlist() |> encoding()
end
def encoding(charlist) when is_list(charlist) do
charlist
|> Enum.reduce(:ascii, fn code_point, encoding ->
cond do
code_point < 0x007F and safe_ascii?(code_point) ->
encoding
safe_code_point?(code_point) ->
:utf8
true ->
raise(Puid.Error, "Invalid char")
end
end)
end
@doc false
# Validate that:
# - at least 2 code points
# - no more than max code points
# - unique code points
# - valid code points
def validate_charlist(charlist) when is_list(charlist) do
len = length(charlist)
if len < 2, do: raise(Puid.Error, "Need at least 2 characters")
if @chars_count_max < len,
do: raise(Puid.Error, "Character count cannot be greater than #{@chars_count_max}")
if !unique?(charlist, %{}), do: raise(Puid.Error, "Characters not unique")
charlist
|> Enum.reduce(true, fn code_point, acc ->
acc and safe_code_point?(code_point)
end)
|> case do
false ->
raise(Puid.Error, "Invalid code point")
_ ->
charlist
end
end
# Prevent "unsafe" code points
defp safe_code_point?(cp) when cp < 0x007F, do: safe_ascii?(cp)
defp safe_code_point?(cp), do: safe_utf8?(cp)
# Safe ascii code points are chars from ?! to ?~,
# omitting backslash, backtick and single/double-quotes
defp safe_ascii?(cp) when cp < 0x0021, do: false
defp safe_ascii?(0x0022), do: false
defp safe_ascii?(0x0027), do: false
defp safe_ascii?(0x005C), do: false
defp safe_ascii?(0x0060), do: false
defp safe_ascii?(cp) when cp < 0x007F, do: true
defp safe_ascii?(_), do: false
# Reject code points between tilde and inverse bang
# CxNote There may be other utf8 code points that should be invalid.
defp safe_utf8?(g) when g < 0x00A1, do: false
defp safe_utf8?(_), do: true
# Are charlist characters unique?
defp unique?([], no_repeat?), do: no_repeat?
defp unique?([char | charlist], seen) do
if seen[char], do: unique?([], false), else: unique?(charlist, seen |> Map.put(char, true))
end
end