lib/parser.ex

defmodule MfmParser.Parser do
  alias MfmParser.Token
  alias MfmParser.Node
  alias MfmParser.Lexer

  @moduledoc """
  `MfmParser` is a [FEP-c16b](https://codeberg.org/fediverse/fep/src/branch/main/fep/c16b/fep-c16b.md) compatible parser for Misskey's [Markup language For Misskey](https://misskey-hub.net/en/docs/for-users/features/mfm/) MFM functions.

  It can parse a string representing text containing MFM functions and return a tree. There's also has an encoder who can turn a tree into HTML.

  It only parses the MFM specific tags of the form $[name.opts content].

  Other parts of MFM (html, Markdown and [KaTeX](https://katex.org/)) are out of scope for this project.

  ## Examples

      iex> MfmParser.Parser.parse("$[twitch.speed=0.5s 🍮]")
      [
        %MfmParser.Node.MFM{
          name: "twitch",
          attributes: [{"speed", "0.5s"}],
          content: [%MfmParser.Node.Text{content: "🍮"}]
        }
      ]
  """

  def parse(input, tree \\ [], is_open \\ false) do
    case Lexer.next(input) do
      {token, rest} ->
        case token do
          %Token.Text{} ->
            parse(
              rest,
              tree ++ [%Node.Text{content: token.content}],
              is_open
            )

          %Token.MFM.Open{} ->
            # Here we go deeper in the structure
            case parse(rest, [], true) do
              {children, child_rest} ->
                # Here we went dept already, so now we are parsing the next Open token on the same level
                parse(
                  child_rest,
                  tree ++ [token |> get_mfm_node() |> Map.put(:content, children)],
                  is_open
                )

              # Here we capture an edge case where an unclosed tag makes us hit :eof
              # this causes the tree to be returned directly instead of part of a tuple
              children ->
                parse(
                  "",
                  tree ++ [%Node.Text{content: token.content <> nodes_to_mfm(children)}],
                  is_open
                )
            end

          # We can either have a Close token who properly closes an Open token
          # Or we can have a stray Close token, while currently not processing an Open token
          # In the first case, we return what we have bc parsing of this Node is finished
          # In the second case, we add it as text
          %Token.MFM.Close{} ->
            if is_open do
              {tree, rest}
            else
              parse(
                rest,
                tree ++ [%Node.Text{content: token.content}]
              )
            end
        end

      :eof ->
        tree
    end
  end

  defp get_mfm_node(token) do
    {name, attributes} =
      case token.content
           |> String.trim()
           |> String.replace("$[", "")
           |> String.split(".", parts: 2) do
        [name] -> {name, []}
        [name, attributes_string] -> {name, build_attributes_list(attributes_string)}
      end

    %Node.MFM{name: name, attributes: attributes, content: []}
  end

  defp build_attributes_list(attributes_string) do
    attributes_string
    |> String.split(",")
    |> Enum.reduce([], fn attribute_string, acc ->
      attribute =
        case attribute_string |> String.split("=", parts: 2) do
          [name] -> {name}
          [name, value] -> {name, value}
        end

      acc ++ [attribute]
    end)
  end

  defp nodes_to_mfm(nodes) do
    Enum.map_join(nodes, &node_to_mfm/1)
  end

  defp node_to_mfm(%Node.Text{content: content}), do: content

  defp node_to_mfm(%Node.MFM{name: name, attributes: attributes, content: content}) do
    "$[" <> name <> attributes_to_mfm(attributes) <> " " <> nodes_to_mfm(content) <> "]"
  end

  defp attributes_to_mfm([]), do: ""

  defp attributes_to_mfm(attributes) do
    "." <> Enum.map_join(attributes, ",", &attribute_to_mfm/1)
  end

  defp attribute_to_mfm({name}), do: name
  defp attribute_to_mfm({name, value}), do: name <> "=" <> value
end