lib/action.ex

defmodule Wiki.Action.Session do
  @moduledoc """
  This module provides a struct for holding private connection state and accumulated results.

  ## Fields

  - `result` - Map with recursively merged values from all requests made using this session.
  - `state` - Cache for session state and accumulation.
  """

  @type client :: Tesla.Client.t()
  @type result :: {:ok, t()} | {:error, any}
  @type state :: keyword

  @type t :: %__MODULE__{
          __client__: client,
          result: map,
          state: keyword
        }

  defstruct __client__: nil,
            result: %{},
            state: []
end

defmodule Wiki.Action do
  @moduledoc """
  Adapter to the MediaWiki [Action API](https://www.mediawiki.org/wiki/Special:MyLanguage/API:Main_page)

  Most commands return a pipeable Wiki.Action.Session, which can be reused repeatedly.

  Anonymous requests,

  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("dewiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
    action: :query,
    meta: :siteinfo,
    siprop: :statistics
  )
  # %Wiki.Action.Session{
  #   ...
  #   result: %{
  #     "batchcomplete" => true,
  #     "query" => %{
  #       "statistics" => %{
  #         "activeusers" => 19393,
  #         "admins" => 188,
  #         "articles" => 2583636,
  #         "edits" => 211249646,
  #         "images" => 130213,
  #         "jobs" => 0,
  #         "pages" => 7164514,
  #         "queued-massmessages" => 0,
  #         "users" => 3716049
  #       }
  #     }
  #   },
  #   ...
  # }
  ```

  Commands can be pipelined while accumulating results, and logged-in user permissions
  delegated by supplying a [bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords).

  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("enwiki")
  |> Wiki.Action.new(
    accumulate: true
  )
  |> Wiki.Action.authenticate!(
    Application.get_env(:example_app, :bot_username),
    Application.get_env(:example_app, :bot_password)
  )
  |> Wiki.Action.get!(
    action: :query,
    meta: :tokens,
    type: :csrf
  )
  |> (&Wiki.Action.post!(&1,
    action: :edit,
    title: "Sandbox",
    assert: :user,
    token: &1.result["query"]["tokens"]["csrftoken"],
    appendtext: "~~~~ was here."
  )).()
  |> Map.get(:result)
  ```

  Streaming results from multiple requests using continuation,

  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("dewiki")
  |> Wiki.Action.new()
  |> Wiki.Action.stream(
    action: :query,
    list: :recentchanges,
    rclimit: 5
  )
  |> Stream.take(10)
  |> Enum.flat_map(fn response -> response["query"]["recentchanges"] end)
  |> Enum.map(fn rc -> rc["timestamp"] <> " " <> rc["title"] end)
  ```

  ## Wikibase

  The [Wikidata](https://www.wikidata.org/) project provides structured data for
  other wiki projects, and can be accessed through the Action API.

  Examples:

  Search for entities called "alphabet",
  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("wikidatawiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
      action: :wbsearchentities,
      search: "alphabet",
      language: :en
  )
  ```

  Search for entities with "Frank Zappa" anywhere in the description or contents,
  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("wikidatawiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
      action: :wbsearchentities,
      search: "alphabet",
      language: :en
  )
  ```

  Retrieve all data about a specific entity with ID "Q42",
  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("wikidatawiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
      action: :wbgetentities,
      ids: "Q42"
  )
  ```

  ## Defaults
  A few parameters are automatically added for convenience, but can be
  overridden if desired:

  * The `:format` parameter defaults to `:json`.
  * `:formatversion` defaults to `2`.

  Overriding to get pretty-printed JSON and the older response structure,
  ```elixir
  Wiki.Action.get!(
    action: query,
    meta: siteinfo,
    siprop: namespaces,
    format: jsonfm,
    formatversion: 1
  )
  ```
  """

  alias Wiki.{Action.Session, Error, SiteMatrix, Util}

  @type client_option ::
          {:accumulate, true}
          | {:adapter, module()}
          | {:debug, true}
          | {:user_agent, binary()}

  @typedoc """
    - `:accumulate` - Merge results from each step of a pipeline, rather than
      overwriting with the latest response.
    - `:adapter` - Override the HTTP adapter
    - `:debug` - Turn on verbose logging by setting to `true`
    - `:disable_compression` - Disable the transparent gzip codec, rather than
      automatically detecting based on HTTP headers.
    - `:user_agent` - Override the user-agent header string
  """
  @type client_options :: [client_option()]

  @default_timeout 60_000
  @default_adapter {Tesla.Adapter.Hackney, recv_timeout: @default_timeout}

  @doc """
  Create a new client session

  ## Arguments

  - `site` - SiteMatrix.Spec or raw `api.php` endpoint for the wiki you will
    connect to.  For example, "https://en.wikipedia.org/w/api.php".
  - `opts` - configuration options which modify client behavior
  """
  @spec new(String.t() | SiteMatrix.Spec.t(), client_options()) :: Session.t()
  def new(site, opts \\ [])

  def new(%SiteMatrix.Spec{} = site, opts) do
    site
    |> SiteMatrix.action_api()
    |> new(site.opts ++ opts)
  end

  def new(url, opts) do
    %Session{
      __client__: client(url, opts)
    }
  end

  @doc """
  Make requests to authenticate a client session.  This should only be done using
  a [bot username and password](https://www.mediawiki.org/wiki/Manual:Bot_passwords),
  which can be created for any normal user account.

  ## Arguments

  - `session` - Base session pointing to a wiki.
  - `username` - Bot username, may be different than the final logged-in username.
  - `password` - Bot password.  Protect this string, it allows others to take on-wiki actions on your behalf.

  ## Return value

  Authenticated session object.
  """
  @spec authenticate(Session.t(), String.t(), String.t()) :: Session.result()
  def authenticate(session, username, password) do
    with {:ok, login_token} <-
           get(session,
             action: :query,
             meta: :tokens,
             type: :login
           ) do
      post(login_token,
        action: :login,
        lgname: username,
        lgpassword: password,
        lgtoken: login_token.result["query"]["tokens"]["logintoken"]
      )
    end
  end

  @doc """
  Assertive variant of `authenticate`
  """
  @spec authenticate!(Session.t(), String.t(), String.t()) :: Session.t()
  def authenticate!(session, username, password) do
    case authenticate(session, username, password) do
      {:ok, session} -> session
      {:error, error} -> raise error
    end
  end

  @doc """
  Make an API GET request

  ## Arguments

  - `session` - `Wiki.Action.Session` object.
  - `params` - Keyword list of query parameters as atoms or strings.

  ## Return value

  Session object with its `.result` populated.
  """
  @spec get(Session.t(), keyword) :: Session.result()
  def get(session, params),
    do: request(session, :get, query: normalize_params(params))

  @doc """
  Assertive variant of `get`.
  """
  @spec get!(Session.t(), keyword) :: Session.t()
  def get!(session, params) do
    case get(session, params) do
      {:ok, result} -> result
      {:error, error} -> raise error
    end
  end

  @doc """
  Make an API POST request.

  ## Arguments

  - `session` - `Wiki.Action.Session` object.  If credentials are required for this
  action, you should have created this object with the `authenticate/3` function.
  - `params` - Keyword list of query parameters as atoms or strings.

  ## Return value

  Session object with a populated `:result` attribute.
  """
  @spec post(Session.t(), keyword) :: Session.result()
  def post(session, params),
    do: request(session, :post, body: normalize_params(params))

  @doc """
  Assertive variant of `post`.
  """
  @spec post!(Session.t(), keyword) :: Session.t()
  def post!(session, params) do
    case post(session, params) do
      {:ok, result} -> result
      {:error, error} -> raise error
    end
  end

  @doc """
  Make a GET request and follow continuations until exhausted or the stream is closed.

  ## Arguments

  - `session` - `Wiki.Action.Session` object.
  - `params` - Keyword list of query parameters as atoms or strings.

  ## Return value

  Enumerable `Stream`, where each returned chunk is a raw result map, possibly
  containing multiple records.  This corresponds to `session.result` from the other
  entry points.
  """
  @spec stream(Session.t(), keyword) :: Enumerable.t()
  def stream(session, params) do
    Stream.resource(
      fn -> {session, :start} end,
      fn
        {prev, :start} ->
          do_stream_get(prev, params)

        {prev, :cont} ->
          get_continuation(prev.result)
          |> case do
            nil -> {:halt, nil}
            continue -> do_stream_get(prev, params ++ continue)
          end
      end,
      fn _ -> nil end
    )
  end

  defp do_stream_get(session, params) do
    next = get!(session, params)
    {[next.result], {next, :cont}}
  end

  defp get_continuation(result) do
    case result do
      # TODO: Test that a cross between a list and query can be continued
      # in both dimensions.
      %{"continue" => continue} ->
        Map.to_list(continue)

      %{"query-continue" => continue} ->
        continue
        |> Map.values()
        |> Enum.flat_map(&Map.to_list/1)

      _ ->
        nil
    end
  end

  @spec request(Session.t(), :get | :post, keyword) :: Session.result()
  defp request(session, method, params) do
    # TODO: This can be extracted into a generic StatefulAdapter now.
    opts = [opts: session.state] ++ params ++ [method: method]

    with {:ok, result} <- Tesla.request(session.__client__, opts),
         {:ok, result} <- validate(result) do
      {:ok,
       %Session{
         __client__: session.__client__,
         result: result.body,
         state: Keyword.delete(result.opts, :opts)
       }}
    else
      {:error, error = %Error{}} -> {:error, error}
      {:error, error} -> {:error, %Error{message: "#{inspect(error)}"}}
    end
  end

  @spec normalize_params(keyword) :: keyword
  defp normalize_params(params) do
    defaults = [
      format: :json,
      formatversion: 2
    ]

    (defaults ++ params)
    |> remove_boolean_false()
    |> pipe_lists()
    |> Enum.sort()
    |> Enum.dedup()
  end

  defp remove_boolean_false(params) do
    params
    |> Enum.filter(fn {_, v} -> v not in [false, nil] end)
  end

  defp pipe_lists(params) do
    params
    |> Enum.map(fn
      {k, v} when is_list(v) -> {k, pipe_list(v)}
      entry -> entry
    end)
  end

  defp pipe_list(values) do
    if Enum.any?(values, fn v -> String.contains?(to_string(v), "|") end) do
      # Use a special join character because pipe would conflict with the value.
      unit_separator = "\x1f"
      Enum.join([""] ++ values, unit_separator)
    else
      Enum.join(values, "|")
    end
  end

  defp validate(result) do
    with nil <- validate_http_status(result.status),
         nil <- validate_body_type(result.body),
         nil <- validate_api_errors(result.body) do
      {:ok, result}
    end
  end

  defp validate_http_status(status) do
    case status do
      status when status >= 200 and status < 300 -> nil
      status -> {:error, %Error{message: "Error received with HTTP status #{status}"}}
    end
  end

  defp validate_body_type(body) do
    with body when is_map(body) <- body,
         body when body != %{} <- body do
      nil
    else
      _ -> {:error, %Error{message: "Empty response"}}
    end
  end

  defp validate_api_errors(body) do
    with nil <- body["error"],
         nil <- body["errors"] do
      nil
    else
      error when is_map(error) -> {:error, %Error{message: summarize_legacy_error(error)}}
      errors when is_list(errors) -> {:error, %Error{message: summarize_new_error(errors)}}
    end
  end

  defp summarize_legacy_error(error) do
    error["info"] ||
      error["code"] ||
      "Unknown error (legacy format)"
  end

  defp summarize_new_error(errors) do
    # TODO: multiple errors
    case(List.first(errors)) do
      %{"text" => text} -> text
      %{"html" => html} -> html
      %{"key" => key, "params" => params} -> [key, params] |> List.flatten() |> Enum.join("-")
      %{"code" => code} -> code
      _ -> "unknown"
    end
  end

  @spec client(binary(), keyword()) :: Tesla.Client.t()
  defp client(url, opts) do
    adapter = opts[:adapter] || @default_adapter
    user_agent = opts[:user_agent] || Util.default_user_agent()

    default_middleware = [
      {Tesla.Middleware.BaseUrl, url},
      {Tesla.Middleware.Timeout, timeout: @default_timeout},
      Wiki.StatefulClient.CookieJar,
      Tesla.Middleware.FormUrlencoded,
      {Tesla.Middleware.Headers,
       [
         {"user-agent", user_agent}
       ]},
      Tesla.Middleware.FollowRedirects,
      Tesla.Middleware.JSON,
      Tesla.Middleware.Compression
    ]

    Enum.reduce(opts, default_middleware, fn {option, on}, middleware ->
      cond do
        on == false ->
          middleware

        option == :accumulate ->
          middleware ++ [Wiki.StatefulClient.CumulativeResult]

        option == :disable_compression ->
          middleware -- [Tesla.Middleware.Compression]

        option == :debug ->
          middleware ++ [Tesla.Middleware.Logger]

        true ->
          middleware
      end
    end)
    |> Tesla.client(adapter)
  end
end

defmodule Wiki.StatefulClient.CookieJar do
  @moduledoc false

  @behaviour Tesla.Middleware

  @impl true
  def call(env, next, _opts) do
    env = set_cookie_header(env, env.opts[:cookies])

    with {:ok, env} <- Tesla.run(env, next) do
      {:ok, merge_cookies(env, env.opts[:cookies])}
    end
  end

  @spec set_cookie_header(Tesla.Env.t(), nil | map) :: Tesla.Env.t()
  defp set_cookie_header(env, cookies_opt)
  defp set_cookie_header(env, nil), do: env

  defp set_cookie_header(env, cookies) do
    serialized = Enum.map_join(cookies, "; ", fn {key, value} -> key <> "=" <> value end)
    Tesla.put_headers(env, [{"cookie", serialized}])
  end

  @spec merge_cookies(Tesla.Env.t(), nil | map) :: Tesla.Env.t()
  defp merge_cookies(env, old_cookies)
  defp merge_cookies(env, nil), do: merge_cookies(env, %{})

  defp merge_cookies(env, old_cookies) do
    merged_cookies = Map.merge(old_cookies, extract_cookie_headers(env))
    Tesla.put_opt(env, :cookies, merged_cookies)
  end

  @spec extract_cookie_headers(Tesla.Env.t()) :: map
  defp extract_cookie_headers(env) do
    Tesla.get_headers(env, "set-cookie")
    |> Enum.map(&SetCookie.parse/1)
    |> Enum.into(%{}, fn %{key: k, value: v} -> {k, v} end)
  end
end

defmodule Wiki.StatefulClient.CumulativeResult do
  @moduledoc false

  @behaviour Tesla.Middleware

  @impl true
  def call(env, next, _opts) do
    with {:ok, env} <- Tesla.run(env, next) do
      accumulated = recursive_merge(env.opts[:accumulated_result] || %{}, env.body)

      {:ok,
       Tesla.put_opt(env, :accumulated_result, accumulated)
       |> Tesla.put_body(accumulated)}
    end
  end

  @spec recursive_merge(map, map) :: map
  defp recursive_merge(%{} = v1, %{} = v2), do: Map.merge(v1, v2, &recursive_merge/3)

  # TODO: _key can be dropped
  @spec recursive_merge(String.t(), map | String.t(), map | String.t()) :: map
  defp recursive_merge(_key, v1, v2)

  defp recursive_merge(_key, %{} = v1, %{} = v2), do: recursive_merge(v1, v2)

  defp recursive_merge(_key, v1, v2) when is_list(v1) and is_list(v2), do: v1 ++ v2

  defp recursive_merge(_key, v1, v2) when v1 == v2, do: v1
end