lib/unicode/script.ex

defmodule Unicode.Script do
  @moduledoc """
  Functions to introspect Unicode
  scripts for binaries
  (Strings) and codepoints.

  """

  @behaviour Unicode.Property.Behaviour

  alias Unicode.Utils

  @scripts Utils.scripts()
           |> Utils.remove_annotations()

  @doc """
  Returns the map of Unicode
  scripts.

  The script name is the map
  key and a list of codepoint
  ranges as tuples as the value.

  """

  def scripts do
    @scripts
  end

  @doc """
  Returns a list of known Unicode
  script names.

  This function does not return the
  names of any script aliases.

  """
  @known_scripts Map.keys(@scripts)
  def known_scripts do
    @known_scripts
  end

  @script_alias Utils.property_value_alias()
                |> Map.get("sc")
                |> Utils.invert_map()
                |> Utils.atomize_values()
                |> Utils.downcase_keys_and_remove_whitespace()
                |> Utils.add_canonical_alias()

  @doc """
  Returns a map of aliases for
  Unicode scripts.

  An alias is an alternative name
  for referring to a script. Aliases
  are resolved by the `fetch/1` and
  `get/1` functions.

  """
  @impl Unicode.Property.Behaviour
  def aliases do
    @script_alias
  end

  @doc """
  Returns the Unicode ranges for
  a given script as a list of
  ranges as 2-tuples.

  Aliases are resolved by this function.

  Returns either `{:ok, range_list}` or
  `:error`.

  """
  @impl Unicode.Property.Behaviour
  def fetch(script) when is_atom(script) do
    Map.fetch(scripts(), script)
  end

  def fetch(script) do
    script = Utils.downcase_and_remove_whitespace(script)
    script = Map.get(aliases(), script, script) |> Utils.maybe_atomize()
    Map.fetch(scripts(), script)
  end

  @doc """
  Returns the Unicode ranges for
  a given script as a list of
  ranges as 2-tuples.

  Aliases are resolved by this function.

  Returns either `range_list` or
  `nil`.

  """
  @impl Unicode.Property.Behaviour
  def get(script) do
    case fetch(script) do
      {:ok, script} -> script
      _ -> nil
    end
  end

  @doc """
  Returns the count of the number of characters
  for a given script.

  ## Example

      iex> Unicode.Script.count("mongolian")
      168

  """
  @impl Unicode.Property.Behaviour
  def count(script) do
    with {:ok, script} <- fetch(script) do
      Enum.reduce(script, 0, fn {from, to}, acc -> acc + to - from + 1 end)
    end
  end

  @doc """
  Returns the script name(s) for the
  given binary or codepoint.

  In the case of a codepoint, a single
  script name is returned.

  For a binary a list of distinct script
  names represented by the graphemes in
  the binary is returned.

  """
  def script(string) when is_binary(string) do
    string
    |> String.to_charlist()
    |> Enum.map(&script/1)
    |> Enum.uniq()
  end

  for {script, ranges} <- @scripts do
    def script(codepoint) when unquote(Utils.ranges_to_guard_clause(ranges)) do
      unquote(script)
    end
  end

  def script(codepoint) when is_integer(codepoint) and codepoint in 0..0x10FFFF do
    :unknown
  end
end