lib/ores.ex

defmodule Wiki.Ores do
  @moduledoc """
  This module provides an adapter for the [ORES](https://www.mediawiki.org/wiki/ORES) scoring service.

  ## Examples

  ```elixir
  "enwiki"
  |> Wiki.Ores.new()
  |> Wiki.Ores.request!(
    models: ["damaging"],
    revids: 456789
  )
  # %{
  #   "enwiki" => %{
  #     "models" => %{
  #       "damaging" => %{"version" => "0.5.1"}
  #     },
  #     "scores" => %{
  #       "456789" => %{
  #         "damaging" => %{
  #           "score" => %{
  #             "prediction" => false,
  #             "probability" => %{
  #               "false" => 0.9784615344695441,
  #               "true" => 0.021538465530455946
  #             }
  #           }
  #         }
  #       }
  #     }
  #   }
  # }
  ```
  """

  alias Wiki.{Error, Util}

  @default_adapter Tesla.Adapter.Hackney
  @default_endpoint "https://ores.wikimedia.org/v3/scores/"

  # TODO:
  #  * Wrap models?
  #  * Chunk at 50 revisions per request.
  #  * Offer parallelism up to 4.

  @doc """
  Create a new ORES client.

  ## Arguments

  - `project` - Short code for the wiki where your articles appear.  For example, "enwiki" for English Wikipedia.
  - `opts` - Keyword options
    - `:endpoint` - Override the base URL to query

  ## Return value

  Returns an opaque client object, which should be passed to `request/2`.
  """
  @spec new(String.t(), keyword()) :: Tesla.Client.t()
  def new(project, opts \\ []) do
    endpoint = opts[:endpoint] || @default_endpoint
    url = endpoint <> project <> "/"

    client(url, opts)
  end

  @doc """
  Make an ORES request.

  Don't request scores for more than 50 revisions per request.

  ## Arguments

  - `client` - Client object as returned by `new/1`.
  - `params` - Keyword list of query parameters,
    - `:models` - Learning models to query.  These vary per wiki, see the [support matrix](https://tools.wmflabs.org/ores-support-checklist/)
    for availability and to read about what each model is scoring.  Multiple models can be passed as a list, for example,
    `[:damaging, :wp10]`, or as a single atom, `:damaging`.
    - `:revids` - Revision IDs to score, as a single integer or as a list.
  """
  @spec request(Tesla.Client.t(), keyword | map) :: {:ok, map} | {:error, any()}
  def request(client, params) do
    with {:ok, response} <- Tesla.get(client, "/", query: normalize(params)),
         {:ok, result} <- validate(response) do
      {:ok, result.body}
    end
  end

  @doc """
  Assertive variant of `request`.
  """
  @spec request!(Tesla.Client.t(), keyword | map) :: map
  def request!(client, params) do
    case request(client, params) do
      {:ok, result} -> result
      {:error, error = %Error{}} -> raise error
      {:error, error} -> raise %Error{message: "#{inspect(error)}"}
    end
  end

  @spec normalize(keyword) :: keyword
  defp normalize(params) do
    params
    |> pipe_lists()
  end

  defp pipe_lists(params) do
    params
    |> Enum.map(fn
      {k, v} when is_list(v) -> {k, Enum.join(v, "|")}
      entry -> entry
    end)
  end

  defp validate(result) do
    with nil <- validate_body_type(result.body),
         nil <- validate_api_errors(result.body),
         nil <- validate_http_status(result.status) do
      {:ok, result}
    end
  end

  defp validate_http_status(status) do
    case status do
      status when status >= 200 and status < 300 -> nil
      status -> {:error, %Error{message: "Error received with HTTP status #{status}"}}
    end
  end

  defp validate_body_type(body) do
    with body when is_map(body) <- body,
         body when body != %{} <- body do
      nil
    else
      _ -> {:error, %Error{message: "Empty response"}}
    end
  end

  defp validate_api_errors(body) do
    case body["error"] do
      nil -> nil
      error -> {:error, %Error{message: summarize_error(error)}}
    end
  end

  defp summarize_error(error) do
    error["message"] ||
      error["code"] ||
      "unknown"
  end

  @spec client(binary(), keyword()) :: Tesla.Client.t()
  defp client(url, opts) do
    adapter = opts[:adapter] || @default_adapter
    user_agent = opts[:user_agent] || Util.default_user_agent()

    [
      {Tesla.Middleware.BaseUrl, url},
      {Tesla.Middleware.Compression, format: "gzip"},
      {Tesla.Middleware.Headers,
       [
         {"user-agent", user_agent}
       ]},
      Tesla.Middleware.FollowRedirects,
      Tesla.Middleware.JSON
      # Debugging only:
      # Tesla.Middleware.Logger
    ]
    |> Tesla.client(adapter)
  end
end