lib/encoder.ex

defmodule MfmParser.Encoder do
  @moduledoc """
  An encoder who can turn a String with MFM functions, or an MFM tree from `MfmParser.Parser.parse/1`, into FEP-c16b compliant HTML.

  It only works for the MFM specific tags of the form `$[name.attributes content]`. Other parts of MFM (e.g. html, Markdown and [KaTeX](https://katex.org/)) are out of scope for this project.

  ## Examples

      iex> [
      ...>   %MfmParser.Node.MFM{
      ...>     name: "twitch",
      ...>     content: [%MfmParser.Node.Text{content: "🍮"}],
      ...>     attributes: [{"speed", "5s"}]
      ...>   }
      ...> ]
      ...> |> MfmParser.Encoder.to_html()
      ~S[<span class="mfm-twitch" data-mfm-speed="5s">🍮</span>]

      iex> "$[twitch.speed=5s 🍮]" |> MfmParser.Encoder.to_html()
      ~S[<span class="mfm-twitch" data-mfm-speed="5s">🍮</span>]
  """

  def to_html(input, opts \\ [])

  def to_html(mfm_string, opts) when is_binary(mfm_string) do
    mfm_string
    |> MfmParser.Parser.parse()
    |> to_html(opts)
  end

  def to_html(nodes, opts) when is_list(nodes) do
    Enum.map_join(nodes, &node_to_html(&1, opts))
  end

  defp node_to_html(%MfmParser.Node.Text{content: content}, opts) do
    if opts[:escape_text] do
      escape_html(content)
    else
      content
    end
  end

  defp node_to_html(
         %MfmParser.Node.MFM{name: name, attributes: attributes, content: content},
         opts
       ) do
    if valid_identifier?(name) do
      attributes_string =
        attributes
        |> Enum.reduce("", fn
          {name}, acc ->
            if valid_identifier?(name) do
              acc <> " data-mfm-#{name}"
            else
              acc
            end

          {name, value}, acc ->
            if valid_identifier?(name) do
              acc <> " data-mfm-#{name}=\"#{escape_html(value)}\""
            else
              acc
            end
        end)

      "<span class=\"mfm-#{name}\"#{attributes_string}>#{to_html(content, opts)}</span>"
    else
      to_html(content, opts)
    end
  end

  defp valid_identifier?(value) when is_binary(value) do
    Regex.match?(~r/^[A-Za-z0-9_-]+$/, value)
  end

  defp valid_identifier?(_), do: false

  defp escape_html(value) do
    value
    |> to_string()
    |> String.replace("&", "&amp;")
    |> String.replace("<", "&lt;")
    |> String.replace(">", "&gt;")
    |> String.replace("\"", "&quot;")
    |> String.replace("'", "&#39;")
  end
end