lib/html5ever.ex

defmodule Html5ever do
  @moduledoc """
  The html5ever is an HTML parser written in Rust.
  """

  @doc """
  Parses an HTML document from a string.

  This returns a list of tuples representing the HTML tree.

  ## Example

      iex> Html5ever.parse("<!doctype html><html><body><h1>Hello world</h1></body></html>")
      {:ok,
       [
         {:doctype, "html", "", ""},
         {"html", [], [{"head", [], []}, {"body", [], [{"h1", [], ["Hello world"]}]}]}
       ]}

  """
  def parse(html) when byte_size(html) > 500 do
    parse_async(html)
  end

  def parse(html) do
    parse_sync(html)
  end

  @doc """
  Parses an HTML document from a string and returns a map.

  The map contains the document structure.

  ## Example

      iex> Html5ever.flat_parse("<!doctype html><html><body><h1>Hello world</h1></body></html>")
      {:ok,
       %{
         nodes: %{
           0 => %{id: 0, parent: nil, type: :document},
           1 => %{id: 1, parent: 0, type: :doctype},
           2 => %{
             attrs: [],
             children: [3, 4],
             id: 2,
             name: "html",
             parent: 0,
             type: :element
           },
           3 => %{
             attrs: [],
             children: [],
             id: 3,
             name: "head",
             parent: 2,
             type: :element
           },
           4 => %{
             attrs: [],
             children: [5],
             id: 4,
             name: "body",
             parent: 2,
             type: :element
           },
           5 => %{
             attrs: [],
             children: [6],
             id: 5,
             name: "h1",
             parent: 4,
             type: :element
           },
           6 => %{contents: "Hello world", id: 6, parent: 5, type: :text}
         },
         root: 0
       }}

  """
  def flat_parse(html) when byte_size(html) > 500 do
    flat_parse_async(html)
  end

  def flat_parse(html) do
    flat_parse_sync(html)
  end

  defp parse_sync(html) do
    case Html5ever.Native.parse_sync(html) do
      {:html5ever_nif_result, :ok, result} ->
        {:ok, result}

      {:html5ever_nif_result, :error, err} ->
        {:error, err}
    end
  end

  defp parse_async(html) do
    :ok = Html5ever.Native.parse_async(html)

    receive do
      {:html5ever_nif_result, :ok, result} ->
        {:ok, result}

      {:html5ever_nif_result, :error, err} ->
        {:error, err}
    end
  end

  defp flat_parse_sync(html) do
    case Html5ever.Native.flat_parse_sync(html) do
      {:html5ever_nif_result, :ok, result} ->
        {:ok, result}

      {:html5ever_nif_result, :error, err} ->
        {:error, err}
    end
  end

  defp flat_parse_async(html) do
    :ok = Html5ever.Native.flat_parse_async(html)

    receive do
      {:html5ever_nif_result, :ok, result} ->
        {:ok, result}

      {:html5ever_nif_result, :error, err} ->
        {:error, err}
    end
  end
end