lib/crawler/store.ex

defmodule Crawler.Store do
  @moduledoc """
  An internal data store for information related to each crawl.
  """

  alias __MODULE__.DB

  defmodule Page do
    @moduledoc """
    An internal struct for keeping the url and content of a crawled page.
    """

    defstruct [:url, :body, :opts, :processed]
  end

  @doc """
  Initialises a new `Registry` named `Crawler.Store.DB`.
  """
  def init do
    Registry.start_link(keys: :unique, name: DB)
  end

  @doc """
  Finds a stored URL and returns its page data.
  """
  def find(url) do
    case Registry.lookup(DB, url) do
      [{_, page}] -> page
      _ -> nil
    end
  end

  @doc """
  Finds a stored URL and returns its page data only if it's processed.
  """
  def find_processed(url) do
    case Registry.match(DB, url, %{processed: true}) do
      [{_, page}] -> page
      _ -> nil
    end
  end

  @doc """
  Adds a URL to the registry.
  """
  def add(url) do
    Registry.register(DB, url, %Page{url: url})
  end

  @doc """
  Adds the page data for a URL to the registry.
  """
  def add_page_data(url, body, opts) do
    {_new, _old} = Registry.update_value(DB, url, &%{&1 | body: body, opts: opts})
  end

  @doc """
  Marks a URL as processed in the registry.
  """
  def processed(url) do
    {_new, _old} = Registry.update_value(DB, url, &%{&1 | processed: true})
  end
end