lib/hologram/template/tokenizer.ex

defmodule Hologram.Template.Tokenizer do
  @moduledoc false

  @type token :: {:string | :symbol | :whitespace, String.t()}

  @doc """
  Splits template markup into a list of tokens.

  ## Examples

      iex> tokenize("<span>aaa bbb</span>")
      [
        symbol: "<",
        string: "span",
        symbol: ">",
        string: "aaa",
        whitespace: " ",
        string: "bbb",
        symbol: "</",
        string: "span",
        symbol: ">"
      ]
  """
  @spec tokenize(String.t()) :: list(token)
  def tokenize(markup)

  def tokenize(""), do: []

  def tokenize(" " <> rest) do
    [{:whitespace, " "} | tokenize(rest)]
  end

  def tokenize("\n" <> rest) do
    [{:whitespace, "\n"} | tokenize(rest)]
  end

  def tokenize("\r" <> rest) do
    [{:whitespace, "\r"} | tokenize(rest)]
  end

  def tokenize("\t" <> rest) do
    [{:whitespace, "\t"} | tokenize(rest)]
  end

  def tokenize("\\#" <> rest) do
    [{:symbol, "\\#"} | tokenize(rest)]
  end

  def tokenize("\#{/for}" <> rest) do
    [{:symbol, "#"} | [{:symbol, "{/for}"} | tokenize(rest)]]
  end

  def tokenize("\#{/if}" <> rest) do
    [{:symbol, "#"} | [{:symbol, "{/if}"} | tokenize(rest)]]
  end

  def tokenize("\#{/raw}" <> rest) do
    [{:symbol, "#"} | [{:symbol, "{/raw}"} | tokenize(rest)]]
  end

  def tokenize("\#{" <> rest) do
    [{:symbol, "\#{"} | tokenize(rest)]
  end

  def tokenize("#" <> rest) do
    [{:symbol, "#"} | tokenize(rest)]
  end

  def tokenize("\\$" <> rest) do
    [{:symbol, "\\$"} | tokenize(rest)]
  end

  def tokenize("${/for}" <> rest) do
    [{:symbol, "$"} | [{:symbol, "{/for}"} | tokenize(rest)]]
  end

  def tokenize("${/if}" <> rest) do
    [{:symbol, "$"} | [{:symbol, "{/if}"} | tokenize(rest)]]
  end

  def tokenize("${/raw}" <> rest) do
    [{:symbol, "$"} | [{:symbol, "{/raw}"} | tokenize(rest)]]
  end

  def tokenize("${" <> rest) do
    [{:symbol, "${"} | tokenize(rest)]
  end

  def tokenize("$" <> rest) do
    [{:symbol, "$"} | tokenize(rest)]
  end

  def tokenize("=" <> rest) do
    [{:symbol, "="} | tokenize(rest)]
  end

  def tokenize("\\\"" <> rest) do
    [{:symbol, "\\\""} | tokenize(rest)]
  end

  def tokenize("\"" <> rest) do
    [{:symbol, "\""} | tokenize(rest)]
  end

  def tokenize("\\'" <> rest) do
    [{:symbol, "\\'"} | tokenize(rest)]
  end

  def tokenize("'" <> rest) do
    [{:symbol, "'"} | tokenize(rest)]
  end

  def tokenize("\\`" <> rest) do
    [{:symbol, "\\`"} | tokenize(rest)]
  end

  def tokenize("`" <> rest) do
    [{:symbol, "`"} | tokenize(rest)]
  end

  def tokenize("\\{" <> rest) do
    [{:symbol, "\\{"} | tokenize(rest)]
  end

  def tokenize("{%else}" <> rest) do
    [{:symbol, "{%else}"} | tokenize(rest)]
  end

  def tokenize("{%for" <> rest) do
    [{:symbol, "{%for"} | tokenize(rest)]
  end

  def tokenize("{/for}" <> rest) do
    [{:symbol, "{/for}"} | tokenize(rest)]
  end

  def tokenize("{%if" <> rest) do
    [{:symbol, "{%if"} | tokenize(rest)]
  end

  def tokenize("{/if}" <> rest) do
    [{:symbol, "{/if}"} | tokenize(rest)]
  end

  def tokenize("{%raw}" <> rest) do
    [{:symbol, "{%raw}"} | tokenize(rest)]
  end

  def tokenize("{/raw}" <> rest) do
    [{:symbol, "{/raw}"} | tokenize(rest)]
  end

  def tokenize("{" <> rest) do
    [{:symbol, "{"} | tokenize(rest)]
  end

  def tokenize("%" <> rest) do
    [{:symbol, "%"} | tokenize(rest)]
  end

  def tokenize("\\}" <> rest) do
    [{:symbol, "\\}"} | tokenize(rest)]
  end

  def tokenize("}" <> rest) do
    [{:symbol, "}"} | tokenize(rest)]
  end

  def tokenize("</" <> rest) do
    [{:symbol, "</"} | tokenize(rest)]
  end

  def tokenize("<!--" <> rest) do
    [{:symbol, "<!--"} | tokenize(rest)]
  end

  def tokenize("<!" <> rest) do
    [{:symbol, "<!"} | tokenize(rest)]
  end

  def tokenize("<" <> rest) do
    [{:symbol, "<"} | tokenize(rest)]
  end

  def tokenize("/>" <> rest) do
    [{:symbol, "/>"} | tokenize(rest)]
  end

  def tokenize("-->" <> rest) do
    [{:symbol, "-->"} | tokenize(rest)]
  end

  def tokenize("-" <> rest) do
    [{:symbol, "-"} | tokenize(rest)]
  end

  def tokenize(">" <> rest) do
    [{:symbol, ">"} | tokenize(rest)]
  end

  def tokenize("/" <> rest) do
    [{:symbol, "/"} | tokenize(rest)]
  end

  def tokenize("\\" <> rest) do
    [{:symbol, "\\"} | tokenize(rest)]
  end

  def tokenize(rest) do
    excluded_chars = Regex.escape(" \n\r\t#$%-=\"'`{}<>/\\")
    regex = ~r/\A([^#{excluded_chars}]+)(.*)\z/s
    [_full_capture, value, rest] = Regex.run(regex, rest)
    [{:string, value} | tokenize(rest)]
  end
end