lib/pages/html.ex

defmodule Pages.Html do
  # @related [test](/test/pages/html_test.exs)

  @moduledoc """
  Some simple HTML query functions, originally intended for unit tests.
  Delegates the hard work to [Floki](https://hex.pm/packages/floki).

  The main query functions are:

  * `all/2`: returns all elements matching the selector
  * `find/2`: returns the first element that matches the selector
  * `find!/2`: like `find/2` but raises if more than one element matches the selector

  Selectors can be a valid CSS selector string, or can be a keyword list. See `Pages.Css` for keyword list syntax.

  The `attr/2` function can be used to extract attr values, and the `text/1` function can be used to extract
  the text of an HTML fragment.

  ## Examples

  Get the value of a selected option:

  ```elixir
  iex> html = ~s|<select> <option value="a" selected>apples</option> <option value="b">bananas</option> </select>|
  iex> Pages.Html.find(html, "select option[selected]") |> Pages.Html.attr("value")
  "a"
  ```

  Get the text of a selected option, raising if there are more than one:

  ```elixir
  iex> html = ~s|<select> <option value="a" selected>apples</option> <option value="b">bananas</option> </select>|
  iex> Pages.Html.find!(html, "select option[selected]") |> Pages.Html.text()
  "apples"
  ```

  Get the text of all the options:

  ```elixir
  iex> html = ~s|<select> <option value="a" selected>apples</option> <option value="b">bananas</option> </select>|
  iex> Pages.Html.all(html, "select option") |> Enum.map(&Pages.Html.text/1)
  ["apples", "bananas"]
  ```

  Use a keyword list as the selector:

  ```elixir
  iex> html = ~s|<div> <a href="/logout" test-role="logout-link">logout</a> </div>|
  iex> Pages.Html.find!(html, test_role: "logout-link") |> Pages.Html.attr("href")
  "/logout"
  ```
  """

  @module_name __MODULE__ |> Module.split() |> Enum.join(".")

  @type attr :: binary() | atom()
  @type html :: binary() | Pages.Driver.t() | Floki.html_tree() | Floki.html_node()
  @type selector :: binary() | keyword() | atom()

  # # #

  @doc """
  Finds all elements in `html` that match `selector`. Returns a
  [Floki HTML tree](https://hexdocs.pm/floki/Floki.html#t:html_tree/0), which is a list of
  [Floki HTML nodes](https://hexdocs.pm/floki/Floki.html#t:html_node/0).

  ```elixir
  iex> html = ~s|<select> <option value="a" selected>apples</option> <option value="b">bananas</option> </select>|
  iex> Pages.Html.all(html, "option")
  [
    {"option", [{"value", "a"}, {"selected", "selected"}], ["apples"]},
    {"option", [{"value", "b"}], ["bananas"]}
  ]
  ```
  """
  @spec all(html(), selector()) :: Floki.html_tree()
  def all(html, selector), do: html |> parse() |> Floki.find(Pages.Css.selector(selector))

  @doc """
  Finds the first element in `html` that matches `selector`. Returns a
  [Floki HTML node](https://hexdocs.pm/floki/Floki.html#t:html_node/0).

  ```elixir
  iex> html = ~s|<select> <option value="a" selected>apples</option> <option value="b">bananas</option> </select>|
  iex> Pages.Html.find(html, "select option[selected]")
  {"option", [{"value", "a"}, {"selected", "selected"}], ["apples"]}
  ```
  """
  @spec find(html(), selector()) :: Floki.html_node()
  def find(html, selector), do: all(html, selector) |> List.first()

  @doc """
  Like `find/2` but raises unless exactly one element is found.
  """
  @spec find!(html(), selector()) :: Floki.html_node()
  def find!(html, selector), do: all(html, selector) |> first!()

  # # #

  @doc """
  Returns the value of `attr` from the outermost element of `html`

  ```elixir
  iex> html = ~s|<div> <a href="/logout" test-role="logout-link">logout</a> </div>|
  iex> Pages.Html.find!(html, test_role: "logout-link") |> Pages.Html.attr("href")
  "/logout"
  ```
  """
  @spec attr(html(), attr()) :: binary()
  def attr(nil, _attr), do: nil

  def attr(html, attr) do
    html
    |> parse()
    |> first!("Consider using Enum.map(html, &#{@module_name}.attr(&1, #{inspect(attr)}))")
    |> Floki.attribute(Moar.Atom.to_string(attr))
    |> List.first()
  end

  @doc """
  Returns the text value of `html`

  ```elixir
  iex> html = ~s|<select> <option value="a" selected>apples</option> <option value="b">bananas</option> </select>|
  iex> Pages.Html.find!(html, "select option[selected]") |> Pages.Html.text()
  "apples"
  ```
  """
  @spec text(html()) :: binary()
  def text(html) do
    html
    |> parse()
    |> first!("Consider using Enum.map(html, &#{@module_name}.text/1)")
    |> Floki.text(sep: " ")
    |> String.trim()
  end

  # # #

  @doc """
  Returns a map containing the form fields of form `selector` in `html`.

  ```elixir
  iex> html = ~s|<form> <input type="text" name="color" value="green"> <textarea name="desc">A tree</textarea> </form>|
  iex> Pages.Html.form_fields(html, "form")
  %{color: "green", desc: "A tree"}
  ```
  """
  @spec form_fields(html(), selector()) :: map()
  def form_fields(html, selector) do
    %{}
    |> input_values(html, selector)
    |> textarea_values(html, selector)
    |> Moar.Map.atomize_keys()
  end

  @doc """
  Prints prettified `html` with a label, and then returns the original html.
  """
  @spec inspect_html(html(), binary()) :: html()
  def inspect_html(html, label \\ "INSPECTED HTML") do
    """
    === #{label}:

    #{pretty(html)}
    """
    |> IO.puts()

    html
  end

  @doc """
  Extracts all the meta tags from `html`.

  ```elixir
  iex> html = ~s|<head> <meta charset="utf-8"/> <meta http-equiv="X-UA-Compatible" content="IE=edge"/> </head>|
  iex> Pages.Html.meta_tags(html)
  [%{"charset" => "utf-8"}, %{"content" => "IE=edge", "http-equiv" => "X-UA-Compatible"}]
  ```
  """
  @spec meta_tags(html()) :: [map()]
  def meta_tags(html), do: html |> parse() |> extract_meta_tags()

  @doc """
  Parses and then re-stringifies `html`, increasing the liklihood that two equivalent HTML strings can
  be considered equal.

  ```elixir
  iex> a = ~s|<p id="color">green</p>|
  iex> b = ~s|<p  id = "color" >green</p>|
  iex> a == b
  false
  iex> Pages.Html.normalize(a) == Pages.Html.normalize(b)
  true
  ```
  """
  @spec normalize(html()) :: binary()
  def normalize(html), do: html |> parse() |> Floki.raw_html()

  @doc """
  Parses an HTML fragment using `Floki.parse_fragment!/1`, returning a
  [Floki HTML tree](https://hexdocs.pm/floki/Floki.html#t:html_tree/0).

  `html` can be an HTML string, a Floki HTML tree, a Floki HTML node, or any struct that implements `String.Chars`.
  """
  @spec parse(html()) :: Floki.html_tree()
  def parse(html_string) when is_binary(html_string), do: html_string |> Floki.parse_fragment!()
  def parse(html_tree) when is_list(html_tree), do: html_tree
  def parse({element, attrs, contents}), do: [{element, attrs, contents}]
  def parse(%_{} = struct), do: struct |> Moar.Protocol.implements!(String.Chars) |> to_string() |> parse()

  @doc """
  Parses an HTML document using `Floki.parse_document!/1`, returning a
  [Floki HTML tree](https://hexdocs.pm/floki/Floki.html#t:html_tree/0).
  """
  @spec parse_doc(binary()) :: Floki.html_tree()
  def parse_doc(html_string), do: html_string |> Floki.parse_document!()

  @doc """
  Pretty-ifies `html` using `Floki.raw_html/2` and its `pretty: true` option.
  """
  @spec pretty(html()) :: binary()
  def pretty(html), do: html |> parse() |> Floki.raw_html(encode: false, pretty: true)

  # # #

  defp attrs_to_map(list, key_attr, value_attr, key_transformer) do
    Map.new(list, fn element ->
      key = Floki.attribute(element, key_attr) |> List.first() |> key_transformer.()

      value =
        if value_attr == :text,
          do: Pages.HtmlTransformer.to_text(element),
          else: Floki.attribute(element, value_attr) |> List.first()

      {key, value}
    end)
  end

  @spec extract_meta_tags(html()) :: [map()]
  defp extract_meta_tags(html) do
    all(html, "meta") |> Enum.map(fn {"meta", attrs, _} -> Map.new(attrs) end)
  end

  # # #

  defp input_values(acc, html, selector) do
    html
    |> all(Pages.Css.selector(selector) <> " input[type=text]")
    |> attrs_to_map("name", "value", &unwrap_input_name/1)
    |> Map.merge(acc, fn _k, a, b -> List.flatten([a, b]) end)
  end

  defp textarea_values(acc, html, selector) do
    html
    |> all(Pages.Css.selector(selector) <> " textarea")
    |> attrs_to_map("name", :text, &unwrap_input_name/1)
    |> Map.merge(acc, fn _k, a, b -> List.flatten([a, b]) end)
  end

  defp unwrap_input_name(input_name) do
    case Regex.run(~r|.*\[(.*)\]|, input_name) do
      [_, unwrapped] when not is_nil(unwrapped) -> unwrapped
      _ -> input_name
    end
  end

  # # #

  defp first!(html, hint \\ nil)

  defp first!([], _hint), do: raise("Expected a single HTML node but found none")

  defp first!([node], _hint), do: node

  defp first!(html, hint) do
    raise """
    Expected a single HTML node but got:

    #{pretty(html)}
    #{hint}
    """
  end
end