lib/string_naming.ex

defmodule StringNaming.H do
  def nested_module(mod, children) do
    [funs, mods] =
      Enum.reduce(children, [%{}, %{}], fn
        {k, v}, [funs, mods] when is_binary(v) -> [Map.put(funs, k, v), mods]
        {k, v}, [funs, mods] -> [funs, Map.put(mods, k, v)]
      end)

    ast =
      for {name, value} <- funs do
        name =
          name |> String.replace(~r/\A(\d)/, "N_\\1") |> Macro.underscore() |> String.to_atom()

        quote do: def(unquote(name)(), do: <<String.to_integer(unquote(value), 16)::utf8>>)
      end ++
        [
          quote do
            def __all__ do
              :functions
              |> __MODULE__.__info__()
              |> Enum.map(fn
                {:__all__, 0} -> nil
                {k, 0} -> {k, apply(__MODULE__, k, [])}
                _ -> nil
              end)
              |> Enum.filter(& &1)
            end
          end
        ]

    Module.create(Module.concat(mod), ast, Macro.Env.location(__ENV__))
    StringNaming.H.nesteds(mod, mods)
  end

  def nesteds(nested \\ [], map_or_code)

  def nesteds(nested, %{} = map) do
    Enum.each(map, fn
      {_key, code} when is_binary(code) ->
        :ok

      {k, v} ->
        mod = :lists.reverse([k | :lists.reverse(nested)])
        StringNaming.H.nested_module(mod, v)
    end)
  end
end

defmodule StringNaming do
  @moduledoc ~S"""
  The sibling of [`String.Casing`](https://github.com/elixir-lang/elixir/blob/9873e4239f063e044e5d6602e173ebee4f32391d/lib/elixir/unicode/properties.ex#L57),
    `String.Break` and `String.Normalizer` from Elixir core.

  It parses the [`NamesList.txt`](http://www.unicode.org/Public/UCD/latest/ucd/NamesList.txt) file provided by Consortium, building
    the set of nested modules under `StringNaming`. Each nested module is granted with `__all__/0` function that returns all the
    available symbols in that particular namespace, as well as with methods returning a symbol by itโ€™s name.

  ## Examples

      iex> StringNaming.AnimalSymbols.monkey
      "๐Ÿ’"
      iex> StringNaming.FrakturSymbols.Mathematical.Fraktur.Capital.__all__
      [a: "๐”„", b: "๐”…", d: "๐”‡", e: "๐”ˆ", f: "๐”‰", g: "๐”Š", j: "๐”",
       k: "๐”Ž", l: "๐”", m: "๐”", n: "๐”‘", o: "๐”’", p: "๐”“", q: "๐””",
       s: "๐”–", t: "๐”—", u: "๐”˜", v: "๐”™", w: "๐”š", x: "๐”›", y: "๐”œ"]

  """

  @categories Enum.uniq(
                StringNaming.Defaults.categories() ++
                  Application.compile_env(:string_naming, :categories, [])
              )

  data_path = Path.join([__DIR__, "string_naming", "unicode", "NamesList.txt"])

  ~S"""
  0021	EXCLAMATION MARK
  	= factorial
  	= bang
  	x (inverted exclamation mark - 00A1)
  	x (latin letter retroflex click - 01C3)
  """

  extract_prop = fn
    _rest, [_category, _names, _props] = acc ->
      # TODO make properties available as well
      # IO.inspect rest, label: "โ˜… property"
      acc
  end

  underscore = fn name ->
    name
    |> String.trim()
    |> String.replace(~r/\A(\d)/, "N_\\1")
    |> String.replace(~r/[^A-Za-z\d_ ]/, " ")
    |> String.split(" ")
    |> Enum.filter(&(&1 != ""))
    |> Enum.join("_")
    |> Macro.underscore()
  end

  @selected @categories
            |> Enum.filter(fn
              <<"#", _::binary>> -> false
              <<"=", _::binary>> -> false
              <<"+", _::binary>> -> false
              _ -> true
            end)
            |> Enum.map(&underscore.(&1))
  extract_name = fn
    _, <<"<"::binary, _::binary>>, [_category, _names, _props] = acc ->
      acc

    code, name, [category, names, props] ->
      [category, [{code, underscore.(name), category} | names], props]
  end

  [_category, names, _props] =
    Enum.reduce(File.stream!(data_path), ["Unknown", [], %{}], fn
      <<";"::binary, _::binary>>, acc ->
        acc

      <<"@\t"::binary, category::binary>>, [_, names, props] ->
        category = underscore.(category)
        category = if Enum.member?(@selected, category), do: category, else: ""
        [category, names, props]

      <<"@"::binary, _::binary>>, acc ->
        acc

      <<"\t"::binary, rest::binary>>, acc ->
        extract_prop.(rest, acc)

      code_name, [category, _, _] = acc when category != "" ->
        with [code, name] <- :binary.split(code_name, "\t") do
          extract_name.(code, name, acc)
        end

      <<"00", _::binary-size(2), "\t", _::binary>> = code_name, [_, names, props] ->
        with [code, name] <- :binary.split(code_name, "\t") do
          extract_name.(code, name, ["ascii", names, props])
        end

      _, acc ->
        acc
    end)

  names_tree =
    Enum.reduce(names, %{}, fn {code, name, category}, acc ->
      modules = [category | String.split(name, "_")] |> Enum.map(&Macro.camelize/1)

      {acc, ^modules} =
        Enum.reduce(modules, {acc, []}, fn
          key, {acc, keys} ->
            keys = :lists.reverse([key | :lists.reverse(keys)])

            {_, result} =
              get_and_update_in(acc, keys, fn
                nil -> {nil, %{}}
                map when is_map(map) -> {map, map}
                other -> {other, %{}}
              end)

            {result, keys}
        end)

      put_in(acc, modules, code)
    end)

  StringNaming.H.nesteds(["StringNaming"], names_tree)

  @doc ~S"""
  Returns graphemes for modules that have names matching the regular expression given as a parameter.
  The response is a plain keyword list with names taken from concatenated nested module names.

  ## Examples

      iex> StringNaming.graphemes ~r/AnimalFace/
      [
        animalfaces_bear_face: "๐Ÿป",
        animalfaces_cat_face: "๐Ÿฑ",
        animalfaces_cow_face: "๐Ÿฎ",
        animalfaces_dog_face: "๐Ÿถ",
        animalfaces_dragon_face: "๐Ÿฒ",
        animalfaces_frog_face: "๐Ÿธ",
        animalfaces_hamster_face: "๐Ÿน",
        animalfaces_horse_face: "๐Ÿด",
        animalfaces_monkey_face: "๐Ÿต",
        animalfaces_mouse_face: "๐Ÿญ",
        animalfaces_panda_face: "๐Ÿผ",
        animalfaces_pig_face: "๐Ÿท",
        animalfaces_pig_nose: "๐Ÿฝ",
        animalfaces_rabbit_face: "๐Ÿฐ",
        animalfaces_spouting_whale: "๐Ÿณ",
        animalfaces_tiger_face: "๐Ÿฏ",
        animalfaces_wolf_face: "๐Ÿบ"
      ]

      iex> StringNaming.graphemes ~r/fraktur.small/i
      [
        fraktursymbols_mathematical_fraktur_small_a: "๐”ž",
        fraktursymbols_mathematical_fraktur_small_b: "๐”Ÿ",
        fraktursymbols_mathematical_fraktur_small_c: "๐” ",
        fraktursymbols_mathematical_fraktur_small_d: "๐”ก",
        fraktursymbols_mathematical_fraktur_small_e: "๐”ข",
        fraktursymbols_mathematical_fraktur_small_f: "๐”ฃ",
        fraktursymbols_mathematical_fraktur_small_g: "๐”ค",
        fraktursymbols_mathematical_fraktur_small_h: "๐”ฅ",
        fraktursymbols_mathematical_fraktur_small_i: "๐”ฆ",
        fraktursymbols_mathematical_fraktur_small_j: "๐”ง",
        fraktursymbols_mathematical_fraktur_small_k: "๐”จ",
        fraktursymbols_mathematical_fraktur_small_l: "๐”ฉ",
        fraktursymbols_mathematical_fraktur_small_m: "๐”ช",
        fraktursymbols_mathematical_fraktur_small_n: "๐”ซ",
        fraktursymbols_mathematical_fraktur_small_o: "๐”ฌ",
        fraktursymbols_mathematical_fraktur_small_p: "๐”ญ",
        fraktursymbols_mathematical_fraktur_small_q: "๐”ฎ",
        fraktursymbols_mathematical_fraktur_small_r: "๐”ฏ",
        fraktursymbols_mathematical_fraktur_small_s: "๐”ฐ",
        fraktursymbols_mathematical_fraktur_small_t: "๐”ฑ",
        fraktursymbols_mathematical_fraktur_small_u: "๐”ฒ",
        fraktursymbols_mathematical_fraktur_small_v: "๐”ณ",
        fraktursymbols_mathematical_fraktur_small_w: "๐”ด",
        fraktursymbols_mathematical_fraktur_small_x: "๐”ต",
        fraktursymbols_mathematical_fraktur_small_y: "๐”ถ",
        fraktursymbols_mathematical_fraktur_small_z: "๐”ท"
      ]

      iex> StringNaming.graphemes ~r/\Aspace/i, false
      [
        space_medium_mathematical_space: "โŸ",
        space_narrow_no_break_space: "โ€ฏ",
        space_ogham_space_mark: "แš€",
        spaces_em_quad: "โ€",
        spaces_em_space: "โ€ƒ",
        spaces_en_quad: "โ€€",
        spaces_en_space: "โ€‚",
        spaces_figure_space: "โ€‡",
        spaces_four_per_em_space: "โ€…",
        spaces_hair_space: "โ€Š",
        spaces_punctuation_space: "โ€ˆ",
        spaces_six_per_em_space: "โ€†",
        spaces_thin_space: "โ€‰",
        spaces_three_per_em_space: "โ€„"
      ]


  """
  def graphemes(%Regex{} = filter, modules_only? \\ true) do
    with {:ok, modules} <- :application.get_key(:string_naming, :modules) do
      modules
      |> Enum.filter(fn m ->
        case {modules_only?, to_string(m)} do
          {false, _} ->
            match?({:module, ^m}, Code.ensure_loaded(m)) and function_exported?(m, :__all__, 0)

          {_, <<"Elixir.StringNaming."::binary, name::binary>>} ->
            Regex.match?(filter, name)

          _ ->
            false
        end
      end)
      |> Enum.flat_map(fn m ->
        m
        |> apply(:__all__, [])
        |> Enum.reduce([], fn {k, v}, acc ->
          <<"Elixir.StringNaming."::binary, name::binary>> = to_string(m)

          name =
            name
            |> String.split(~r/\W/)
            |> Kernel.++([k])
            |> Enum.join("_")

          if Regex.match?(filter, name),
            do: [{name |> String.downcase() |> String.to_atom(), v} | acc],
            else: acc
        end)
        |> Enum.reverse()
      end)
    end
  end
end

:code.purge(StringNaming.H)
:code.delete(StringNaming.H)