defmodule MfmParser.Parser do
alias MfmParser.Token
alias MfmParser.Node
alias MfmParser.Lexer
@moduledoc """
`MfmParser` is a [FEP-c16b](https://codeberg.org/fediverse/fep/src/branch/main/fep/c16b/fep-c16b.md) compatible parser for Misskey's [Markup language For Misskey](https://misskey-hub.net/en/docs/for-users/features/mfm/) MFM functions.
It can parse a string representing text containing MFM functions and return a tree. There's also has an encoder who can turn a tree into HTML.
It only parses the MFM specific tags of the form $[name.opts content].
Other parts of MFM (html, Markdown and [KaTeX](https://katex.org/)) are out of scope for this project.
## Examples
iex> MfmParser.Parser.parse("$[twitch.speed=0.5s 🍮]")
[
%MfmParser.Node.MFM{
name: "twitch",
attributes: [{"speed", "0.5s"}],
content: [%MfmParser.Node.Text{content: "🍮"}]
}
]
"""
def parse(input, tree \\ [], is_open \\ false) do
case Lexer.next(input) do
{token, rest} ->
case token do
%Token.Text{} ->
parse(
rest,
tree ++ [%Node.Text{content: token.content}],
is_open
)
%Token.MFM.Open{} ->
# Here we go deeper in the structure
case parse(rest, [], true) do
{children, child_rest} ->
# Here we went dept already, so now we are parsing the next Open token on the same level
parse(
child_rest,
tree ++ [token |> get_mfm_node() |> Map.put(:content, children)],
is_open
)
# Here we capture an edge case where an unclosed tag makes us hit :eof
# this causes the tree to be returned directly instead of part of a tuple
children ->
parse(
"",
tree ++ [%Node.Text{content: token.content <> nodes_to_mfm(children)}],
is_open
)
end
# We can either have a Close token who properly closes an Open token
# Or we can have a stray Close token, while currently not processing an Open token
# In the first case, we return what we have bc parsing of this Node is finished
# In the second case, we add it as text
%Token.MFM.Close{} ->
if is_open do
{tree, rest}
else
parse(
rest,
tree ++ [%Node.Text{content: token.content}]
)
end
end
:eof ->
tree
end
end
defp get_mfm_node(token) do
{name, attributes} =
case token.content
|> String.trim()
|> String.replace("$[", "")
|> String.split(".", parts: 2) do
[name] -> {name, []}
[name, attributes_string] -> {name, build_attributes_list(attributes_string)}
end
%Node.MFM{name: name, attributes: attributes, content: []}
end
defp build_attributes_list(attributes_string) do
attributes_string
|> String.split(",")
|> Enum.reduce([], fn attribute_string, acc ->
attribute =
case attribute_string |> String.split("=", parts: 2) do
[name] -> {name}
[name, value] -> {name, value}
end
acc ++ [attribute]
end)
end
defp nodes_to_mfm(nodes) do
Enum.map_join(nodes, &node_to_mfm/1)
end
defp node_to_mfm(%Node.Text{content: content}), do: content
defp node_to_mfm(%Node.MFM{name: name, attributes: attributes, content: content}) do
"$[" <> name <> attributes_to_mfm(attributes) <> " " <> nodes_to_mfm(content) <> "]"
end
defp attributes_to_mfm([]), do: ""
defp attributes_to_mfm(attributes) do
"." <> Enum.map_join(attributes, ",", &attribute_to_mfm/1)
end
defp attribute_to_mfm({name}), do: name
defp attribute_to_mfm({name, value}), do: name <> "=" <> value
end