lib/unicode/transform/transform.ex

defmodule Unicode.Transform do
  @moduledoc """
  Implements the Unicode transform rules.

  The rules are implemented by the macros
  `filter/1`, `transform/1` and `replace/3`.

  Typically transform modules are generated
  from the CLDR transform specifications using
  `Unicode.Transform.Generator.generate/1`.

  These macros are then transformed to elixir
  code by the functions in this module.

  """

  @doc """
  Transform a string.
  """
  @callback transform(String.t()) :: String.t()

  @doc """
  Transform a string with a filter
  module provided
  """
  @callback transform(String.t(), module()) :: String.t()

  defmacro __using__(_) do
    module = __MODULE__

    quote do
      import unquote(module)
      Module.register_attribute(__MODULE__, :filter, accumulate: false)
      Module.register_attribute(__MODULE__, :rules, accumulate: true)
      Module.register_attribute(__MODULE__, :variables, accumulate: true)

      import Unicode.Regex, only: [compile!: 1]
      require Unicode.Set

      @behaviour Unicode.Transform

      @before_compile unquote(module)
    end
  end

  defmacro filter(filter) do
    quote do
      Module.put_attribute(__MODULE__, :filter, unquote(filter))
    end
  end

  defmacro transform(transform) do
    quote do
      Module.put_attribute(__MODULE__, :rules, {:transform, unquote(transform)})
    end
  end

  defmacro replace(from, to, options \\ []) do
    rule = Macro.escape({:replace, from, to, options})

    quote do
      Module.put_attribute(__MODULE__, :rules, unquote(rule))
    end
  end

  defmacro define(var, value) do
    quote do
      Module.put_attribute(__MODULE__, :variables, {:define, unquote(var), unquote(value)})
    end
  end

  defmacro __before_compile__(_env) do
    caller = __CALLER__.module
    filter = Module.get_attribute(caller, :filter)

    _variables =
      caller
      |> Module.get_attribute(:variables)
      |> Unicode.Transform.Utils.make_variable_map()

    rules =
      __CALLER__.module
      |> Module.get_attribute(:rules)
      |> Enum.reverse()
      |> group_rules()

    [
      generate_guard(filter),
      filter_function(filter),
      generate_transform(rules, caller),
      generate_conversions(rules, filter)
    ]
  end

  # Guard clause which represents the
  # filter rule

  defp generate_guard(nil) do
    quote do
      defguard iff(codepoint) when is_integer(codepoint)
    end
  end

  defp generate_guard(filter) do
    quote do
      defguardp iff(codepoint) when Unicode.Set.match?(codepoint, unquote(filter))
    end
  end

  # Generates a function for the filter
  # rule that can be called by other
  # transforms since the filter rule
  # is considered global and therefore
  # when a transform rule is invoked it
  # needs to conform to this filter too.

  def filter_function(nil) do
    quote do
      def filter?(_) do
        true
      end
    end
  end

  def filter_function(filter) do
    {filter, _} = Code.eval_quoted(filter)

    quote do
      def filter?(char) do
        Unicode.Set.match?(char, unquote(filter))
      end
    end
  end

  # Generate the functions which implement the
  # conversion rules

  defp generate_conversions(rules, filter) do
    rules
    |> Enum.filter(&is_list/1)
    |> Enum.reduce({0, []}, &generate_conversion(&1, &2, filter))
    |> elem(1)
    |> Enum.reverse()
  end

  # Generate the transform/1 function
  # that is the single public API

  defp generate_transform(rules, caller) do
    pipeline = generate_pipeline(rules, caller)
    [from, to] = extract_from_to(caller)

    quote do
      @doc """
      Transforms a string from #{inspect(unquote(from))} to #{inspect(unquote(to))}
      """
      @spec transform(String.t()) :: String.t()

      def transform(string) do
        unquote(pipeline)
      end

      @doc false
      def transform(string, _filter) do
        unquote(pipeline)
      end
    end
  end

  defp extract_from_to(caller) do
    caller
    |> Module.split()
    |> List.last()
    |> Macro.underscore()
    |> String.split("_")
    |> Enum.map(&String.capitalize/1)
  end

  # Generate the pipeline that executes
  # the transform rules and the conversion rules
  # in the correct order

  defp generate_pipeline(rules, caller) do
    rules
    |> Enum.reduce({0, []}, &generate_function_call(&1, &2, caller))
    |> elem(1)
    |> Enum.reverse()
    |> List.insert_at(
      0,
      quote do
        string
      end
    )
    |> Enum.reduce(&Macro.pipe(&2, &1, 0))
  end

  # Generate the function calls used in the pipeline

  def generate_function_call({:transform, name}, {counter, acc}, caller) do
    funcall =
      quote do
        unquote(filter_module(name)).transform(unquote(caller))
      end

    {counter, [funcall | acc]}
  end

  def generate_function_call(_rule_group, {counter, acc}, _caller) do
    counter = counter + 1
    function_name = :"replace_#{counter}"

    funcall =
      quote do
        unquote(function_name)()
      end

    {counter, [funcall | acc]}
  end

  # Generate the functions that do the conversions

  defp generate_conversion(conversions, {counter, acc}, _filter) when is_list(conversions) do
    counter = counter + 1
    function_name = :"replace_#{counter}"

    conversion_clauses =
      conversions
      |> Enum.map(&generate_replace_clause(&1, function_name))
      |> List.flatten()

    final_clause =
      quote do
        <<char::utf8>> <> rest -> <<char::utf8>> <> unquote(function_name)(rest)
      end

    conversion_function =
      quote do
        defp unquote(function_name)(<<char::utf8, rest::binary>>) when iff(char) do
          case <<char::utf8, rest::binary>> do
            unquote(conversion_clauses ++ final_clause)
          end
        end
      end

    no_conversion_function =
      quote do
        defp unquote(function_name)(<<char::utf8, rest::binary>>) do
          <<char::utf8>> <> unquote(function_name)(rest)
        end
      end

    empty_function =
      quote do
        defp unquote(function_name)("") do
          ""
        end
      end

    {counter, [empty_function, no_conversion_function, conversion_function | acc]}
  end

  # Generate the case clauses, one for each conversion rule

  defp generate_replace_clause({:replace, from, to, options}, function_name) do
    preceeded_by = Keyword.get(options, :preceeded_by)
    followed_by = Keyword.get(options, :followed_by)
    generate_replace_clause(from, to, preceeded_by, followed_by, function_name)
  end

  defp generate_replace_clause(from, to, nil, nil, function_name) do
    quote do
      unquote(from) <> rest -> unquote(to) <> unquote(function_name)(rest)
    end
  end

  defp generate_replace_clause(from, to, preceeded_by, nil, function_name) do
    quote do
      <<before::utf8, rest::binary>> when Unicode.Set.match?(before, unquote(preceeded_by)) ->
        replaced = String.replace(rest, compile!(unquote(from)), unquote(to))
        <<before::utf8>> <> unquote(function_name)(replaced)
    end
  end

  defp generate_replace_clause(from, to, nil, followed_by, function_name) do
    quote do
      unquote(from) <> <<next::utf8, rest::binary>>
      when Unicode.Set.match?(next, unquote(followed_by)) ->
        unquote(to) <> unquote(function_name)(<<next::utf8, rest::binary>>)
    end
  end

  defp generate_replace_clause(from, to, preceeded_by, followed_by, function_name) do
    quote do
      <<before::utf8, unquote(from), next::utf8, rest::binary>>
      when Unicode.Set.match?(before, unquote(preceeded_by)) and
             Unicode.Set.match?(next, unquote(followed_by)) ->
        <<before::utf8>> <> unquote(function_name)(<<unquote(to), next::utf8, rest::binary>>)
    end
  end

  # Groups clusters of conversion rules together so
  # that we can identify the breaks between transform
  # rules and conversion rules.

  # The return is a list of two entry types:
  # 1. A transform tuple {:transform, transform_name}
  # 2. Or a list of conversion tuples {:replace, ....}

  # The grouping reflects the final pipleine that will
  # be generated in which we execute transforms (which process
  # the whole string) and replacements (which process each
  # the string iteratively).

  # For example:
  # [
  #   {:transform, "NFD"},
  #   [
  #     {:replace, "[:Mn:]+", "", [preceeded_by: "[[:Latin:][0-9]]"]}
  #   ],
  #   {:transform, "NFC"},
  #   [
  #     {:replace, "Æ", "AE", []},
  #     {:replace, "Ð", "D", []},
  #     ....
  #   ]
  # ]

  defp group_rules([]) do
    []
  end

  defp group_rules([{:replace, _, _, _}] = rule) do
    rule
  end

  defp group_rules([{:transform, _} = t1 | rest]) do
    [t1 | group_rules(rest)]
  end

  defp group_rules([group, {:replace, _, _, _} = r1 | rest]) when is_list(group) do
    group_rules([[r1 | group] | rest])
  end

  defp group_rules([{:replace, _, _, _} = r1, {:replace, _, _, _} = r2 | rest]) do
    group_rules([[r2, r1] | rest])
  end

  defp group_rules([group, {:transform, _} = t1 | rest]) when is_list(group) do
    [Enum.reverse(group), t1 | group_rules(rest)]
  end

  defp group_rules([{:replace, _, _, _} = r1, {:transform, _} = t2 | rest]) do
    [[r1] | group_rules([t2 | rest])]
  end

  defp group_rules([group]) when is_list(group) do
    [Enum.reverse(group)]
  end

  # Derive the name of the module from the filter
  # Doesn't yet handle complex names

  defp filter_module(name) do
    Module.concat(__MODULE__, filter_module_name(name))
  end

  defp filter_module_name(name) do
    name
    |> String.downcase()
    |> String.split("-")
    |> case do
      [from] -> "Any" <> String.capitalize(from)
      [from, to] -> String.capitalize(from) <> String.capitalize(to)
    end
  end
end