# This file is auto-generated by alef — DO NOT EDIT.
# alef:hash:06831f8166c6d860691af36ee02b72ae3246568eb2e5c67ed5d11da71d02afeb
# To regenerate: alef generate
# To verify freshness: alef verify --exit-code
defmodule Crawlberg do
@moduledoc "High-level API for crawlberg"
@doc "Convert markdown links to numbered citations."
@spec generate_citations(String.t()) :: map()
def generate_citations(markdown) do
Crawlberg.Native.generate_citations(markdown)
end
@doc "Create a new crawl engine with the given configuration."
@spec create_engine() :: {:ok, reference()} | {:error, atom, String.t()}
def create_engine do
Crawlberg.Native.create_engine(nil)
end
@doc "Create a new crawl engine with the given configuration."
@spec create_engine(String.t() | nil) :: {:ok, reference()} | {:error, atom, String.t()}
def create_engine(config) do
Crawlberg.Native.create_engine(config)
end
@doc "Scrape a single URL, returning extracted page data."
@spec scrape(reference(), String.t()) :: {:ok, map()} | {:error, atom, String.t()}
def scrape(engine, url) do
Crawlberg.Native.scrape_async(engine, url)
end
@doc "Crawl a website starting from `url`, following links up to the configured depth."
@spec crawl(reference(), String.t()) :: {:ok, map()} | {:error, atom, String.t()}
def crawl(engine, url) do
Crawlberg.Native.crawl_async(engine, url)
end
@doc "Discover all pages on a website by following links and sitemaps."
@spec map_urls(reference(), String.t()) :: {:ok, map()} | {:error, atom, String.t()}
def map_urls(engine, url) do
Crawlberg.Native.map_urls_async(engine, url)
end
@doc "Execute browser actions on a single page."
@spec interact(reference(), String.t(), [map()]) :: {:ok, map()} | {:error, atom, String.t()}
def interact(engine, url, actions) do
Crawlberg.Native.interact_async(engine, url, Jason.encode!(Enum.map(actions, &encode_page_action/1)))
end
@doc "Scrape multiple URLs concurrently."
@spec batch_scrape(reference(), [String.t()]) :: {:ok, map()} | {:error, atom, String.t()}
def batch_scrape(engine, urls) do
Crawlberg.Native.batch_scrape_async(engine, urls)
end
@doc "Crawl multiple seed URLs concurrently, each following links to configured depth."
@spec batch_crawl(reference(), [String.t()]) :: {:ok, map()} | {:error, atom, String.t()}
def batch_crawl(engine, urls) do
Crawlberg.Native.batch_crawl_async(engine, urls)
end
@doc false
def crawlenginehandle_crawl_stream_start(client, req) do
Crawlberg.Native.crawlenginehandle_crawl_stream_start(client, req)
end
@doc false
def crawlenginehandle_crawl_stream_next(handle) do
Crawlberg.Native.crawlenginehandle_crawl_stream_next(handle)
end
@doc "Streaming `crawl_stream` — returns an `Enumerable` of decoded chunk maps."
def crawl_stream(client, req) do
req_json =
case req do
nil -> nil
s when is_binary(s) -> s
other -> Jason.encode!(other)
end
case Crawlberg.Native.crawlenginehandle_crawl_stream_start(client, req_json) do
{:ok, handle} ->
stream =
Stream.unfold(handle, fn h ->
case Crawlberg.Native.crawlenginehandle_crawl_stream_next(h) do
{:ok, nil} ->
nil
{:ok, chunk_json} ->
{Jason.decode!(chunk_json, keys: :atoms), h}
{:error, reason} ->
raise Crawlberg.StreamError,
message: "crawl_stream stream failed: #{inspect(reason)}",
reason: reason,
adapter: :crawl_stream
end
end)
{:ok, stream}
{:error, reason} ->
{:error, reason}
end
end
@doc false
def crawlenginehandle_batch_crawl_stream_start(client, req) do
Crawlberg.Native.crawlenginehandle_batch_crawl_stream_start(client, req)
end
@doc false
def crawlenginehandle_batch_crawl_stream_next(handle) do
Crawlberg.Native.crawlenginehandle_batch_crawl_stream_next(handle)
end
@doc "Streaming `batch_crawl_stream` — returns an `Enumerable` of decoded chunk maps."
def batch_crawl_stream(client, req) do
req_json =
case req do
nil -> nil
s when is_binary(s) -> s
other -> Jason.encode!(other)
end
case Crawlberg.Native.crawlenginehandle_batch_crawl_stream_start(client, req_json) do
{:ok, handle} ->
stream =
Stream.unfold(handle, fn h ->
case Crawlberg.Native.crawlenginehandle_batch_crawl_stream_next(h) do
{:ok, nil} ->
nil
{:ok, chunk_json} ->
{Jason.decode!(chunk_json, keys: :atoms), h}
{:error, reason} ->
raise Crawlberg.StreamError,
message: "batch_crawl_stream stream failed: #{inspect(reason)}",
reason: reason,
adapter: :batch_crawl_stream
end
end)
{:ok, stream}
{:error, reason} ->
{:error, reason}
end
end
defp encode_page_action({:click, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "click")
end
defp encode_page_action({:type_text, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "type")
end
defp encode_page_action({:press, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "press")
end
defp encode_page_action({:scroll, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "scroll")
end
defp encode_page_action({:wait, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "wait")
end
defp encode_page_action({:screenshot, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
:full_page -> "fullPage"
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "screenshot")
end
defp encode_page_action({:execute_js, %{} = data}) do
data
|> Enum.reduce(%{}, fn {k, v}, acc ->
key =
case k do
k when is_atom(k) -> Atom.to_string(k)
k when is_binary(k) -> k
end
Map.put(acc, key, v)
end)
|> Map.put("type", "executeJs")
end
defp encode_page_action(:scrape), do: %{"type" => "scrape"}
defp encode_page_action({:scrape, _}), do: %{"type" => "scrape"}
defp encode_page_action(%{} = m), do: m
defp encode_page_action(other),
do: raise(ArgumentError, "expected PageAction (atom, {atom, map}, or map), got: " <> inspect(other))
end
defmodule Crawlberg.StreamError do
@moduledoc false
defexception [:message, :reason, :adapter]
@impl true
def message(%__MODULE__{message: msg}), do: msg
end