lib/ex_wikipedia/page/page.ex

defmodule ExWikipedia.Page do
  @moduledoc """
  `ExWikipedia.page/2` delegates here. This module represents the current
  implementation for requesting and parsing a Wikipedia page.
  """

  @behaviour ExWikipedia

  @allow_redirect true
  @allowed_id_keys [:page, :pageid]
  @default_body_key :body
  @default_http_client HTTPoison
  @default_lang "en"
  @default_json_parser Jason
  @default_status_key :status_code

  alias ExWikipedia.PageParser

  @typedoc """
  - `:external_links` - List of fully qualified URLs linked from the Wikipedia page.
  - `:categories` - List of categories to which the Wikipedia page belongs
  - `:content` - Main content of the Wikipedia page, as a string
  - `:images` - List of relative URLs pointing to images found on the Wikipedia page
  - `:page_id` - Wikipedia page id represented as an integer
  - `:revision_id` - Wikipedia page revision id represented as an integer
  - `:summary` - Summary of page, as a string
  - `:title` - String title of the Wikipedia page
  - `:url` - Fully qualified URL of the Wikipedia page
  - `:is_redirect?` - Boolean. Indicates whether the content is from a page
    redirected from the one requested.
  """
  @type t :: %__MODULE__{
          external_links: [String.t()],
          categories: [String.t()],
          content: String.t(),
          images: [String.t()],
          page_id: non_neg_integer(),
          revision_id: non_neg_integer(),
          summary: String.t(),
          title: String.t(),
          url: String.t(),
          is_redirect?: boolean(),
          links: [String.t()]
        }

  @enforce_keys [:content, :page_id, :summary, :title, :url]

  defstruct external_links: [],
            categories: [],
            content: "",
            images: [],
            page_id: nil,
            revision_id: nil,
            summary: "",
            title: "",
            url: "",
            is_redirect?: false,
            links: []

  @doc """
  Fetches a Wikipedia page by an identifier (see `:by` option).

  ## Options

    - `:http_client`: HTTP Client used to fetch Wikipedia page via Wikipedia's integer ID. Default: `#{@default_http_client}`
    - `:decoder`: Decoder used to decode JSON returned from Wikipedia API. Default: `#{@default_json_parser}`
    - `:http_headers`: HTTP headers that are passed into the client. Default: []
    - `:http_opts`: HTTP options passed to the client. Default: []
    - `:body_key`: key inside the HTTP client's response which contains the response body.
      This may change depending on the client used. Default: `#{@default_body_key}`
    - `:status_key`: key inside the HTTP client's response which returns the HTTP status code.
      This may change depending on the client used. Default: `#{@default_status_key}`
    - `:parser`: Parser used to parse response returned from client. Default: `ExWikipedia.PageParser`
    - `:parser_opts`: Parser options passed the the parser. Default: `[]`.
      See `ExWikipedia.PageParser` for supported options.
    - `:allow_redirect`: indicates whether or not the content from a redirected
       page constitutes a valid response. Default: `#{inspect(@allow_redirect)}`
    - `:language`: Identifies a specific Wikipedia instance to search. You can use the
      `:default_language` config option to set this value. Default: `#{@default_lang}`
    - `:by`: The field used to identify the page. `#{inspect(@allowed_id_keys)}`.

  """
  @impl ExWikipedia
  def fetch(id, opts \\ [])

  def fetch(id, opts) do
    http_client = Keyword.get(opts, :http_client, @default_http_client)

    decoder = Keyword.get(opts, :decoder, @default_json_parser)

    http_headers = Keyword.get(opts, :http_headers, [])

    http_opts = Keyword.get(opts, :http_opts, [])

    body_key = Keyword.get(opts, :body_key, @default_body_key)

    status_key = Keyword.get(opts, :status_key, @default_status_key)

    parser = Keyword.get(opts, :parser, PageParser)

    allow_redirect = Keyword.get(opts, :allow_redirect, @allow_redirect)

    language =
      opts
      |> Keyword.get(
        :language,
        Application.get_env(:ex_wikipedia, :default_language, @default_lang)
      )

    id_key = Keyword.get(opts, :by, detect_id_type(id))

    parser_opts =
      opts
      |> Keyword.get(:parser_opts, [])
      |> Keyword.put(:allow_redirect, allow_redirect)

    with {:ok, url} <- build_url(id_key, id, language),
         {:ok, raw_response} <- http_client.get(url, http_headers, http_opts),
         :ok <- ok_http_status_code(raw_response, status_key),
         {:ok, body} <- get_body(raw_response, body_key),
         {:ok, response} <- decoder.decode(body, keys: :atoms),
         {:ok, parsed_response} <- parser.parse(response, parser_opts) do
      {:ok, struct(__MODULE__, parsed_response)}
    end
  end

  defp get_body(raw_response, body_key) do
    case Map.fetch(raw_response, body_key) do
      {:ok, body} ->
        {:ok, body}

      :error ->
        {:error, "#{inspect(body_key)} not found as key in response: #{inspect(raw_response)}"}
    end
  end

  defp ok_http_status_code(raw_response, status_key) do
    case Map.fetch(raw_response, status_key) do
      {:ok, 200} -> :ok
      _ -> {:error, "#{inspect(status_key)} not 200 in response: #{inspect(raw_response)}"}
    end
  end

  defp build_url(id_key, id_value, lang)
       when (id_key in @allowed_id_keys and is_binary(lang)) or is_atom(lang) do
    {:ok,
     "https://#{lang}.wikipedia.org/w/api.php?action=parse&#{id_key}=#{id_value}&format=json&redirects=true&prop=text|langlinks|categories|links|templates|images|externallinks|sections|revid|displaytitle|iwlinks|properties|parsewarnings|headhtml"
     |> URI.encode()}
  end

  defp build_url(id_key, _, _) when id_key not in @allowed_id_keys do
    {:error, "Unsupported :by field #{inspect(id_key)}"}
  end

  defp build_url(_id_key, id_value, _lang)
       when not is_binary(id_value) and not is_integer(id_value) do
    {:error, "#{inspect(id_value)} is not supported type for lookup."}
  end

  defp build_url(_id_key, _, lang) do
    {:error, "Unsupported language identifier #{inspect(lang)}; language codes must be a string."}
  end

  defp detect_id_type(id) when is_integer(id), do: :pageid

  defp detect_id_type(id) when is_binary(id) or is_atom(id), do: :page
end