lib/phoenix_live_view/html_formatter.ex

defmodule Phoenix.LiveView.HTMLFormatter do
  @moduledoc """
  Format HEEx templates from `.heex` files or `~H` sigils.

  This is a `mix format` [plugin](https://hexdocs.pm/mix/main/Mix.Tasks.Format.html#module-plugins).

  > Note: The HEEx HTML Formatter requires Elixir v1.13+.

  ## Setup

  Add it as plugin to your `.formatter.exs` file and make sure to put the`heex` extension in
  the `inputs` option.

  ```elixir
  [
    plugins: [Phoenix.LiveView.HTMLFormatter],
    inputs: ["*.{heex,ex,exs}", "priv/*/seeds.exs", "{config,lib,test}/**/*.{heex,ex,exs}"],
    # ...
  ]
  ```

  ## Options

  * `:line_length` - The Elixir formatter defaults to a maximum line length
    of 98 characters, which can be overwritten with the `:line_length` option
    in your `.formatter.exs` file.

  * `:heex_line_length` - change the line length only for the HEEx formatter.

    ```elixir
    [
      # ...omitted
      heex_line_length: 300
    ]
    ```

  ## Formatting

  This formatter tries to be as consistent as possible with the Elixir formatter.

  Given HTML like this:

  ```eex
    <section><h1>   <b><%= @user.name %></b></h1></section>
  ```

  It will be formatted as:

  ```eex
  <section>
    <h1><b><%= @user.name %></b></h1>
  </section>
  ```

  A block element will go to the next line, while inline elements will be kept in the current line
  as long as they fit within the configured line length.

  The following links list all block and inline elements.

  * https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#elements
  * https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements#list_of_inline_elements

  It will also keep inline elements in their own lines if you intentionally write them this way:

  ```eex
  <section>
    <h1>
      <b><%= @user.name %></b>
    </h1>
  </section>
  ```

  This formatter will place all attributes on their own lines when they do not all fit in the
  current line. Therefore this:

  ```eex
  <section id="user-section-id" class="sm:focus:block flex w-full p-3" phx-click="send-event">
    <p>Hi</p>
  </section>
  ```

  Will be formatted to:

  ```eex
  <section
    id="user-section-id"
    class="sm:focus:block flex w-full p-3"
    phx-click="send-event"
  >
    <p>Hi</p>
  </section>
  ```

  This formatter **does not** format Elixir expressions with `do...end`.
  The content within it will be formatted accordingly though. Therefore, the given
  input:

  ```eex
  <%= live_redirect(
         to: "/my/path",
    class: "my class"
  ) do %>
          My Link
  <% end %>
  ```

  Will be formatted to

  ```eex
  <%= live_redirect(
         to: "/my/path",
    class: "my class"
  ) do %>
    My Link
  <% end %>
  ```

  Note that only the text `My Link` has been formatted.

  ### Intentional new lines

  The formatter will keep intentional new lines. However, the formatter will
  always keep a maximum of one line break in case you have multiple ones:

  ```eex
  <p>
    text


    text
  </p>
  ```

  Will be formatted to:

  ```eex
  <p>
    text

    text
  </p>
  ```
  """

  alias Phoenix.LiveView.HTMLAlgebra
  alias Phoenix.LiveView.HTMLTokenizer

  # Default line length to be used in case nothing is specified in the `.formatter.exs` options.
  @default_line_length 98

  if Version.match?(System.version(), ">= 1.13.0") do
    @behaviour Mix.Tasks.Format
  end

  # TODO: Add it back after versions before Elixir 1.13 are no longer supported.
  # @impl Mix.Tasks.Format
  @doc false
  def features(_opts) do
    [sigils: [:H], extensions: [".heex"]]
  end

  # TODO: Add it back after versions before Elixir 1.13 are no longer supported.
  # @impl Mix.Tasks.Format
  @doc false
  def format(contents, opts) do
    line_length = opts[:heex_line_length] || opts[:line_length] || @default_line_length

    formatted =
      contents
      |> tokenize()
      |> to_tree([], [])
      |> HTMLAlgebra.build(opts)
      |> Inspect.Algebra.format(line_length)

    # If the opening delimiter is a single character, such as ~H"...",
    # do not add trailing newline.
    newline = if match?(<<_>>, opts[:opening_delimiter]), do: [], else: ?\n

    # TODO: Remove IO.iodata_to_binary/1 call on Elixir v1.14+
    IO.iodata_to_binary([formatted, newline])
  end

  # Tokenize contents using EEx.tokenize and Phoenix.Live.HTMLTokenizer respectively.
  #
  # The following content:
  #
  # "<section>\n  <p><%= user.name ></p>\n  <%= if true do %> <p>this</p><% else %><p>that</p><% end %>\n</section>\n"
  #
  # Will be tokenized as:
  #
  # [
  #   {:tag_open, "section", [], %{column: 1, line: 1}},
  #   {:text, "\n  ", %{column_end: 3, line_end: 2}},
  #   {:tag_open, "p", [], %{column: 3, line: 2}},
  #   {:eex_tag_render, "<%= user.name ></p>\n  <%= if true do %>", %{block?: true, column: 6, line: 1}},
  #   {:text, " ", %{column_end: 2, line_end: 1}},
  #   {:tag_open, "p", [], %{column: 2, line: 1}},
  #   {:text, "this", %{column_end: 12, line_end: 1}},
  #   {:tag_close, "p", %{column: 12, line: 1}},
  #   {:eex_tag, "<% else %>", %{block?: false, column: 35, line: 2}},
  #   {:tag_open, "p", [], %{column: 1, line: 1}},
  #   {:text, "that", %{column_end: 14, line_end: 1}},
  #   {:tag_close, "p", %{column: 14, line: 1}},
  #   {:eex_tag, "<% end %>", %{block?: false, column: 62, line: 2}},
  #   {:text, "\n", %{column_end: 1, line_end: 2}},
  #   {:tag_close, "section", %{column: 1, line: 2}}
  # ]
  #
  # EEx.tokenize/2 was introduced in Elixir 1.14.
  # TODO: Remove this when we no longer support earlier versions.
  @eex_expr [:start_expr, :expr, :end_expr, :middle_expr]
  if Code.ensure_loaded?(EEx) && function_exported?(EEx, :tokenize, 2) do
    defp tokenize(contents) do
      {:ok, eex_nodes} = EEx.tokenize(contents)
      {tokens, cont} = Enum.reduce(eex_nodes, {[], :text}, &do_tokenize/2)
      HTMLTokenizer.finalize(tokens, "nofile", cont)
    end

    defp do_tokenize({:text, text, _meta}, {tokens, cont}) do
      text
      |> List.to_string()
      |> HTMLTokenizer.tokenize("nofile", 0, [], tokens, cont)
    end

    defp do_tokenize({:comment, text, meta}, {tokens, cont}) do
      {[{:eex_comment, List.to_string(text), meta} | tokens], cont}
    end

    defp do_tokenize({type, opt, expr, %{column: column, line: line}}, {tokens, cont})
         when type in @eex_expr do
      meta = %{opt: opt, line: line, column: column}
      {[{:eex, type, expr |> List.to_string() |> String.trim(), meta} | tokens], cont}
    end
  else
    defp tokenize(contents) do
      {:ok, eex_nodes} = EEx.Tokenizer.tokenize(contents, 1, 0, %{indentation: 0, trim: false})
      {tokens, cont} = Enum.reduce(eex_nodes, {[], :text}, &do_tokenize/2)
      HTMLTokenizer.finalize(tokens, "nofile", cont)
    end

    defp do_tokenize({:text, _line, _column, text}, {tokens, cont}) do
      text
      |> List.to_string()
      |> HTMLTokenizer.tokenize("nofile", 0, [], tokens, cont)
    end

    defp do_tokenize({type, line, column, opt, expr}, {tokens, cont}) when type in @eex_expr do
      meta = %{opt: opt, line: line, column: column}
      {[{:eex, type, expr |> List.to_string() |> String.trim(), meta} | tokens], cont}
    end
  end

  defp do_tokenize(_node, acc) do
    acc
  end

  # Build an HTML Tree according to the tokens from the EEx and HTML tokenizers.
  #
  # This is a recursive algorithm that will build an HTML tree from a flat list of
  # tokens. For instance, given this input:
  #
  # [
  #   {:tag_open, "div", [], %{column: 1, line: 1}},
  #   {:tag_open, "h1", [], %{column: 6, line: 1}},
  #   {:text, "Hello", %{column_end: 15, line_end: 1}},
  #   {:tag_close, "h1", %{column: 15, line: 1}},
  #   {:tag_close, "div", %{column: 20, line: 1}},
  #   {:tag_open, "div", [], %{column: 1, line: 2}},
  #   {:tag_open, "h1", [], %{column: 6, line: 2}},
  #   {:text, "World", %{column_end: 15, line_end: 2}},
  #   {:tag_close, "h1", %{column: 15, line: 2}},
  #   {:tag_close, "div", %{column: 20, line: 2}}
  # ]
  #
  # The output will be:
  #
  # [
  #   {:tag_block, "div", [], [{:tag_block, "h1", [], [text: "Hello"]}]},
  #   {:tag_block, "div", [], [{:tag_block, "h1", [], [text: "World"]}]}
  # ]
  #
  # Note that a `tag_block` has been created so that its fourth argument is a list of
  # its nested content.
  #
  # ### How does this algorithm work?
  #
  # As this is a recursive algorithm, it starts with an empty buffer and an empty
  # stack. The buffer will be accumulated until it finds a `{:tag_open, ..., ...}`.
  #
  # As soon as the `tag_open` arrives, a new buffer will be started and we move
  # the previous buffer to the stack along with the `tag_open`:
  #
  #   ```
  #   defp build([{:tag_open, name, attrs, _meta} | tokens], buffer, stack) do
  #     build(tokens, [], [{name, attrs, buffer} | stack])
  #   end
  #   ```
  #
  # Then, we start to populate the buffer again until a `{:tag_close, ...} arrives:
  #
  #   ```
  #   defp build([{:tag_close, name, _meta} | tokens], buffer, [{name, attrs, upper_buffer} | stack]) do
  #     build(tokens, [{:tag_block, name, attrs, Enum.reverse(buffer)} | upper_buffer], stack)
  #   end
  #   ```
  #
  # In the snippet above, we build the `tag_block` with the accumulated buffer,
  # putting the buffer accumulated before the tag open (upper_buffer) on top.
  #
  # We apply the same logic for `eex` expressions but, instead of `tag_open` and
  # `tag_close`, eex expressions use `start_expr`, `middle_expr` and `end_expr`.
  # The only real difference is that also need to handle `middle_buffer`.
  #
  # So given this eex input:
  #
  # ```elixir
  # [
  #   {:eex, :start_expr, "if true do", %{column: 0, line: 0, opt: '='}},
  #   {:text, "\n  ", %{column_end: 3, line_end: 2}},
  #   {:eex, :expr, "\"Hello\"", %{column: 3, line: 1, opt: '='}},
  #   {:text, "\n", %{column_end: 1, line_end: 2}},
  #   {:eex, :middle_expr, "else", %{column: 1, line: 2, opt: []}},
  #   {:text, "\n  ", %{column_end: 3, line_end: 2}},
  #   {:eex, :expr, "\"World\"", %{column: 3, line: 3, opt: '='}},
  #   {:text, "\n", %{column_end: 1, line_end: 2}},
  #   {:eex, :end_expr, "end", %{column: 1, line: 4, opt: []}}
  # ]
  # ```
  #
  # The output will be:
  #
  # ```elixir
  # [
  #   {:eex_block, "if true do",
  #    [
  #      {[{:eex, "\"Hello\"", %{column: 3, line: 1, opt: '='}}], "else"},
  #      {[{:eex, "\"World\"", %{column: 3, line: 3, opt: '='}}], "end"}
  #    ]}
  # ]
  # ```
  defp to_tree([], buffer, []) do
    Enum.reverse(buffer)
  end

  defp to_tree([{:text, text, %{context: [:comment_start]}} | tokens], buffer, stack) do
    to_tree(tokens, [], [{:comment, text, buffer} | stack])
  end

  defp to_tree([{:text, text, %{context: [:comment_end]}} | tokens], buffer, [
         {:comment, start_text, upper_buffer} | stack
       ]) do
    buffer = Enum.reverse([{:text, String.trim_trailing(text), %{}} | buffer])

    text = {:text, String.trim_leading(start_text), %{}}
    to_tree(tokens, [{:html_comment, [text | buffer]} | upper_buffer], stack)
  end

  defp to_tree(
         [{:text, text, %{context: [:comment_start, :comment_end]}} | tokens],
         buffer,
         stack
       ) do
    to_tree(tokens, [{:comment, text} | buffer], stack)
  end

  defp to_tree([{:text, text, _meta} | tokens], buffer, stack) do
    if line_html_comment?(text) do
      to_tree(tokens, [{:comment, text} | buffer], stack)
    else
      meta = %{newlines: count_newlines_until_text(text, 0)}
      to_tree(tokens, [{:text, text, meta} | buffer], stack)
    end
  end

  defp to_tree([{:eex_comment, text, _meta} | tokens], buffer, stack) do
    to_tree(tokens, [{:eex_comment, text} | buffer], stack)
  end

  defp to_tree([{:tag_open, name, attrs, %{self_close: true}} | tokens], buffer, stack) do
    to_tree(tokens, [{:tag_self_close, name, attrs} | buffer], stack)
  end

  @void_tags ~w(area base br col hr img input link meta param command keygen source)
  defp to_tree([{:tag_open, name, attrs, _meta} | tokens], buffer, stack)
       when name in @void_tags do
    to_tree(tokens, [{:tag_self_close, name, attrs} | buffer], stack)
  end

  defp to_tree([{:tag_open, name, attrs, _meta} | tokens], buffer, stack) do
    to_tree(tokens, [], [{name, attrs, buffer} | stack])
  end

  defp to_tree([{:tag_close, name, _meta} | tokens], buffer, [{name, attrs, upper_buffer} | stack]) do
    tag_block = {:tag_block, name, attrs, Enum.reverse(buffer)}
    to_tree(tokens, [tag_block | upper_buffer], stack)
  end

  # handle eex

  defp to_tree([{:eex, :start_expr, expr, _meta} | tokens], buffer, stack) do
    to_tree(tokens, [], [{:eex_block, expr, buffer} | stack])
  end

  defp to_tree([{:eex, :middle_expr, middle_expr, _meta} | tokens], buffer, [
         {:eex_block, expr, upper_buffer, middle_buffer} | stack
       ]) do
    middle_buffer = [{Enum.reverse(buffer), middle_expr} | middle_buffer]
    to_tree(tokens, [], [{:eex_block, expr, upper_buffer, middle_buffer} | stack])
  end

  defp to_tree([{:eex, :middle_expr, middle_expr, _meta} | tokens], buffer, [
         {:eex_block, expr, upper_buffer} | stack
       ]) do
    middle_buffer = [{Enum.reverse(buffer), middle_expr}]
    to_tree(tokens, [], [{:eex_block, expr, upper_buffer, middle_buffer} | stack])
  end

  defp to_tree([{:eex, :end_expr, end_expr, _meta} | tokens], buffer, [
         {:eex_block, expr, upper_buffer, middle_buffer} | stack
       ]) do
    block = Enum.reverse([{Enum.reverse(buffer), end_expr} | middle_buffer])
    to_tree(tokens, [{:eex_block, expr, block} | upper_buffer], stack)
  end

  defp to_tree([{:eex, :end_expr, end_expr, _meta} | tokens], buffer, [
         {:eex_block, expr, upper_buffer} | stack
       ]) do
    block = [{Enum.reverse(buffer), end_expr}]
    to_tree(tokens, [{:eex_block, expr, block} | upper_buffer], stack)
  end

  defp to_tree([{:eex, _type, expr, meta} | tokens], buffer, stack) do
    to_tree(tokens, [{:eex, expr, meta} | buffer], stack)
  end

  defp count_newlines_until_text(<<char, rest::binary>>, counter) when char in '\s\t\r',
    do: count_newlines_until_text(rest, counter)

  defp count_newlines_until_text(<<?\n, rest::binary>>, counter),
    do: count_newlines_until_text(rest, counter + 1)

  defp count_newlines_until_text(_, counter),
    do: counter

  # We just want to handle as :comment when the whole line is a HTML comment.
  #
  #   <!-- Modal content -->
  #   <%= render_slot(@inner_block) %>
  #
  # Thefore the case above will stay as is. Otherwise it would put them in the
  # same line.
  defp line_html_comment?(text) do
    trimmed_text = String.trim(text)
    String.starts_with?(trimmed_text, "<!--") and String.ends_with?(trimmed_text, "-->")
  end
end