lib/crawler/worker.ex

defmodule Crawler.Worker do
  @moduledoc """
  Handles the crawl tasks.
  """

  require Logger

  alias Crawler.Fetcher
  alias Crawler.Store
  alias Crawler.Store.Page

  use GenServer

  def init(args) do
    :timer.send_after(args[:timeout], :stop)

    {:ok, args}
  end

  @doc """
  Runs the worker that casts data to itself to kick off the crawl workflow.
  """
  def run(opts) do
    Logger.debug("Running worker with opts: #{inspect(opts)}")

    {:ok, pid} = GenServer.start_link(__MODULE__, opts, hibernate_after: 0)

    GenServer.cast(pid, opts)
  end

  @doc """
  A crawl workflow that delegates responsibilities to:

  - `Crawler.Fetcher.fetch/1`
  - `Crawler.Parser.parse/1` (or a custom parser)
  """
  def handle_cast(_req, state) do
    Logger.debug("Running worker with opts: #{inspect(state)}")

    state
    |> Fetcher.fetch()
    |> state[:parser].parse()
    |> mark_processed()

    {:noreply, state, :hibernate}
  end

  def handle_info(:stop, state) do
    {:stop, :normal, state}
  end

  def handle_info(_msg, state) do
    {:noreply, state}
  end

  defp mark_processed({:ok, %Page{url: url, opts: opts}}) do
    Store.processed({url, opts[:scope]})
  end

  defp mark_processed(_), do: nil
end