Skip to main content

lib/crawlberg.ex

# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:06831f8166c6d860691af36ee02b72ae3246568eb2e5c67ed5d11da71d02afeb
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
defmodule Crawlberg do
  @moduledoc "High-level API for crawlberg"

  @doc "Convert markdown links to numbered citations."
  @spec generate_citations(String.t()) :: map()
  def generate_citations(markdown) do
    Crawlberg.Native.generate_citations(markdown)
  end

  @doc "Create a new crawl engine with the given configuration."
  @spec create_engine() :: {:ok, reference()} | {:error, atom, String.t()}
  def create_engine do
    Crawlberg.Native.create_engine(nil)
  end

  @doc "Create a new crawl engine with the given configuration."
  @spec create_engine(String.t() | nil) :: {:ok, reference()} | {:error, atom, String.t()}
  def create_engine(config) do
    Crawlberg.Native.create_engine(config)
  end

  @doc "Scrape a single URL, returning extracted page data."
  @spec scrape(reference(), String.t()) :: {:ok, map()} | {:error, atom, String.t()}
  def scrape(engine, url) do
    Crawlberg.Native.scrape_async(engine, url)
  end

  @doc "Crawl a website starting from `url`, following links up to the configured depth."
  @spec crawl(reference(), String.t()) :: {:ok, map()} | {:error, atom, String.t()}
  def crawl(engine, url) do
    Crawlberg.Native.crawl_async(engine, url)
  end

  @doc "Discover all pages on a website by following links and sitemaps."
  @spec map_urls(reference(), String.t()) :: {:ok, map()} | {:error, atom, String.t()}
  def map_urls(engine, url) do
    Crawlberg.Native.map_urls_async(engine, url)
  end

  @doc "Execute browser actions on a single page."
  @spec interact(reference(), String.t(), [map()]) :: {:ok, map()} | {:error, atom, String.t()}
  def interact(engine, url, actions) do
    Crawlberg.Native.interact_async(engine, url, Jason.encode!(Enum.map(actions, &encode_page_action/1)))
  end

  @doc "Scrape multiple URLs concurrently."
  @spec batch_scrape(reference(), [String.t()]) :: {:ok, map()} | {:error, atom, String.t()}
  def batch_scrape(engine, urls) do
    Crawlberg.Native.batch_scrape_async(engine, urls)
  end

  @doc "Crawl multiple seed URLs concurrently, each following links to configured depth."
  @spec batch_crawl(reference(), [String.t()]) :: {:ok, map()} | {:error, atom, String.t()}
  def batch_crawl(engine, urls) do
    Crawlberg.Native.batch_crawl_async(engine, urls)
  end

  @doc false
  def crawlenginehandle_crawl_stream_start(client, req) do
    Crawlberg.Native.crawlenginehandle_crawl_stream_start(client, req)
  end

  @doc false
  def crawlenginehandle_crawl_stream_next(handle) do
    Crawlberg.Native.crawlenginehandle_crawl_stream_next(handle)
  end

  @doc "Streaming `crawl_stream` — returns an `Enumerable` of decoded chunk maps."
  def crawl_stream(client, req) do
    req_json =
      case req do
        nil -> nil
        s when is_binary(s) -> s
        other -> Jason.encode!(other)
      end

    case Crawlberg.Native.crawlenginehandle_crawl_stream_start(client, req_json) do
      {:ok, handle} ->
        stream =
          Stream.unfold(handle, fn h ->
            case Crawlberg.Native.crawlenginehandle_crawl_stream_next(h) do
              {:ok, nil} ->
                nil

              {:ok, chunk_json} ->
                {Jason.decode!(chunk_json, keys: :atoms), h}

              {:error, reason} ->
                raise Crawlberg.StreamError,
                  message: "crawl_stream stream failed: #{inspect(reason)}",
                  reason: reason,
                  adapter: :crawl_stream
            end
          end)

        {:ok, stream}

      {:error, reason} ->
        {:error, reason}
    end
  end

  @doc false
  def crawlenginehandle_batch_crawl_stream_start(client, req) do
    Crawlberg.Native.crawlenginehandle_batch_crawl_stream_start(client, req)
  end

  @doc false
  def crawlenginehandle_batch_crawl_stream_next(handle) do
    Crawlberg.Native.crawlenginehandle_batch_crawl_stream_next(handle)
  end

  @doc "Streaming `batch_crawl_stream` — returns an `Enumerable` of decoded chunk maps."
  def batch_crawl_stream(client, req) do
    req_json =
      case req do
        nil -> nil
        s when is_binary(s) -> s
        other -> Jason.encode!(other)
      end

    case Crawlberg.Native.crawlenginehandle_batch_crawl_stream_start(client, req_json) do
      {:ok, handle} ->
        stream =
          Stream.unfold(handle, fn h ->
            case Crawlberg.Native.crawlenginehandle_batch_crawl_stream_next(h) do
              {:ok, nil} ->
                nil

              {:ok, chunk_json} ->
                {Jason.decode!(chunk_json, keys: :atoms), h}

              {:error, reason} ->
                raise Crawlberg.StreamError,
                  message: "batch_crawl_stream stream failed: #{inspect(reason)}",
                  reason: reason,
                  adapter: :batch_crawl_stream
            end
          end)

        {:ok, stream}

      {:error, reason} ->
        {:error, reason}
    end
  end

  defp encode_page_action({:click, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "click")
  end

  defp encode_page_action({:type_text, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "type")
  end

  defp encode_page_action({:press, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "press")
  end

  defp encode_page_action({:scroll, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "scroll")
  end

  defp encode_page_action({:wait, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "wait")
  end

  defp encode_page_action({:screenshot, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          :full_page -> "fullPage"
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "screenshot")
  end

  defp encode_page_action({:execute_js, %{} = data}) do
    data
    |> Enum.reduce(%{}, fn {k, v}, acc ->
      key =
        case k do
          k when is_atom(k) -> Atom.to_string(k)
          k when is_binary(k) -> k
        end

      Map.put(acc, key, v)
    end)
    |> Map.put("type", "executeJs")
  end

  defp encode_page_action(:scrape), do: %{"type" => "scrape"}

  defp encode_page_action({:scrape, _}), do: %{"type" => "scrape"}

  defp encode_page_action(%{} = m), do: m

  defp encode_page_action(other),
    do: raise(ArgumentError, "expected PageAction (atom, {atom, map}, or map), got: " <> inspect(other))
end

defmodule Crawlberg.StreamError do
  @moduledoc false

  defexception [:message, :reason, :adapter]

  @impl true
  def message(%__MODULE__{message: msg}), do: msg
end