lib/action.ex

defmodule Wiki.Action.Session do
  @moduledoc """
  This module provides a struct for holding private connection state and accumulated results.

  ## Fields

  - `result` - Map with recursively merged values from all requests made using this session.
  - `state` - Cache for session state and accumulation.
  """

  @type client :: Tesla.Client.t()
  @type result :: {:ok, t()} | {:error, any}
  @type state :: keyword

  @type t :: %__MODULE__{
          __client__: client,
          result: map,
          state: keyword
        }

  defstruct __client__: nil,
            result: %{},
            state: []
end

defmodule Wiki.Action do
  @moduledoc """
  Adapter to the MediaWiki [Action API](https://www.mediawiki.org/wiki/Special:MyLanguage/API:Main_page)

  Most commands return a pipeable Wiki.Action.Session, which can be reused repeatedly.

  Anonymous requests,

  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("dewiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
    action: :query,
    meta: :siteinfo,
    siprop: :statistics
  )
  # %Wiki.Action.Session{
  #   __client__: %Tesla.Client{
  #     adapter: {Tesla.Adapter.Hackney, :call, [[]]},
  #     fun: nil,
  #     post: [],
  #     pre: [
  #       {Tesla.Middleware.BaseUrl, :call, ["https://de.wikipedia.org/w/api.php"]},
  #       {Tesla.Middleware.Compression, :call, [[format: "gzip"]]},
  #       {Wiki.StatefulClient.CookieJar, :call, [[]]},
  #       {Tesla.Middleware.FormUrlencoded, :call, [[]]},
  #       {Tesla.Middleware.Headers, :call,
  #       [[{"user-agent", "mediawiki_client_ex/0.2.6 (spam@ludd.net)"}]]},
  #       {Tesla.Middleware.JSON, :call, [[]]}
  #     ]
  #   },
  #   result: %{
  #     "batchcomplete" => true,
  #     "query" => %{
  #       "statistics" => %{
  #         "activeusers" => 19393,
  #         "admins" => 188,
  #         "articles" => 2583636,
  #         "edits" => 211249646,
  #         "images" => 130213,
  #         "jobs" => 0,
  #         "pages" => 7164514,
  #         "queued-massmessages" => 0,
  #         "users" => 3716049
  #       }
  #     }
  #   },
  #   state: [
  #     cookies: %{
  #       "GeoIP" => "DE:BE:Berlin:52.57:13.42:v4",
  #       "WMF-Last-Access" => "04-Jun-2021",
  #       "WMF-Last-Access-Global" => "04-Jun-2021"
  #     }
  #   ]
  # }
  ```

  Commands can be pipelined while accumulating results, and logged-in user permissions
  delegated by supplying a [bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords).

  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("enwiki")
  |> Wiki.Action.new(
    accumulate: true
  )
  |> Wiki.Action.authenticate!(
    Application.get_env(:example_app, :bot_username),
    Application.get_env(:example_app, :bot_password)
  )
  |> Wiki.Action.get!(
    action: :query,
    meta: :tokens,
    type: :csrf
  )
  |> (&Wiki.Action.post!(&1,
    action: :edit,
    title: "Sandbox",
    assert: :user,
    token: &1.result["query"]["tokens"]["csrftoken"],
    appendtext: "~~~~ was here."
  )).()
  |> Map.get(:result)
  ```

  Streaming results from multiple requests using continuation,

  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("dewiki")
  |> Wiki.Action.new()
  |> Wiki.Action.stream(
    action: :query,
    list: :recentchanges,
    rclimit: 5
  )
  |> Stream.take(10)
  |> Enum.flat_map(fn response -> response["query"]["recentchanges"] end)
  |> Enum.map(fn rc -> rc["timestamp"] <> " " <> rc["title"] end)
  ```

  ## Wikibase

  The [Wikidata](https://www.wikidata.org/) project provides structured data for
  other wiki projects, and can be accessed through the Action API.

  Examples:

  Search for entities called "alphabet",
  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("wikidatawiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
      action: :wbsearchentities,
      search: "alphabet",
      language: :en
  )
  ```

  Search for entities with "Frank Zappa" anywhere in the description or contents,
  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("wikidatawiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
      action: :wbsearchentities,
      search: "alphabet",
      language: :en
  )
  ```

  Retrieve all data about a specific entity with ID "Q42",
  ```elixir
  Wiki.SiteMatrix.new()
  |> Wiki.SiteMatrix.get!("wikidatawiki")
  |> Wiki.Action.new()
  |> Wiki.Action.get!(
      action: :wbgetentities,
      ids: "Q42"
  )
  ```

  ## Defaults
  A few parameters are automatically added for convenience, but can be
  overridden if desired:

  * The `:format` parameter defaults to `:json`.
  * `:formatversion` defaults to `2`.

  Overriding to get pretty-printed JSON and the older response structure,
  ```elixir
  Wiki.Action.get!(
    action: query,
    meta: siteinfo,
    siprop: namespaces,
    format: jsonfm,
    formatversion: 1
  )
  ```
  """

  alias Wiki.{Action.Session, Error, SiteMatrix, Util}

  @default_adapter Tesla.Adapter.Hackney

  @doc """
  Create a new client session

  ## Arguments

  - `site` - SiteMatrix.Spec or raw `api.php` endpoint for the wiki you will
    connect to.  For example, "https://en.wikipedia.org/w/api.php".
  - `opts`
    - `:accumulate` - Merge results from each step of a pipeline, rather than overwriting with the latest response.
  """
  @spec new(String.t() | SiteMatrix.Spec.t(), keyword) :: Session.t()
  def new(site, opts \\ [])

  def new(%SiteMatrix.Spec{} = site, opts) do
    site
    |> SiteMatrix.action_api()
    |> new(opts)
  end

  def new(url, opts) do
    %Session{
      __client__: client(url, opts)
    }
  end

  @doc """
  Make requests to authenticate a client session.  This should only be done using
  a [bot username and password](https://www.mediawiki.org/wiki/Manual:Bot_passwords),
  which can be created for any normal user account.

  ## Arguments

  - `session` - Base session pointing to a wiki.
  - `username` - Bot username, may be different than the final logged-in username.
  - `password` - Bot password.  Protect this string, it allows others to take on-wiki actions on your behalf.

  ## Return value

  Authenticated session object.
  """
  @spec authenticate(Session.t(), String.t(), String.t()) :: Session.result()
  def authenticate(session, username, password) do
    with {:ok, login_token} <-
           get(session,
             action: :query,
             meta: :tokens,
             type: :login
           ) do
      post(login_token,
        action: :login,
        lgname: username,
        lgpassword: password,
        lgtoken: login_token.result["query"]["tokens"]["logintoken"]
      )
    end
  end

  @doc """
  Assertive variant of `authenticate`
  """
  @spec authenticate!(Session.t(), String.t(), String.t()) :: Session.t()
  def authenticate!(session, username, password) do
    case authenticate(session, username, password) do
      {:ok, session} -> session
      {:error, error} -> raise error
    end
  end

  @doc """
  Make an API GET request

  ## Arguments

  - `session` - `Wiki.Action.Session` object.
  - `params` - Keyword list of query parameters as atoms or strings.
  - `opts` - Options to pass to the adapter.

  ## Return value

  Session object with its `.result` populated.
  """
  @spec get(Session.t(), keyword, keyword) :: Session.result()
  def get(session, params, opts \\ []),
    do: request(session, :get, opts ++ [query: normalize_params(params)])

  @doc """
  Assertive variant of `get`.
  """
  @spec get!(Session.t(), keyword, keyword) :: Session.t()
  def get!(session, params, opts \\ []) do
    case get(session, params, opts) do
      {:ok, result} -> result
      {:error, error} -> raise error
    end
  end

  @doc """
  Make an API POST request.

  ## Arguments

  - `session` - `Wiki.Action.Session` object.  If credentials are required for this
  action, you should have created this object with the `authenticate/3` function.
  - `params` - Keyword list of query parameters as atoms or strings.
  - `opts` - Options to pass to the adapter.

  ## Return value

  Session object with a populated `:result` attribute.
  """
  @spec post(Session.t(), keyword, keyword) :: Session.result()
  def post(session, params, opts \\ []),
    do: request(session, :post, opts ++ [body: normalize_params(params)])

  @doc """
  Assertive variant of `post`.
  """
  @spec post!(Session.t(), keyword, keyword) :: Session.t()
  def post!(session, params, opts \\ []) do
    case post(session, params, opts) do
      {:ok, result} -> result
      {:error, error} -> raise error
    end
  end

  @doc """
  Make a GET request and follow continuations until exhausted or the stream is closed.

  ## Arguments

  - `session` - `Wiki.Action.Session` object.
  - `params` - Keyword list of query parameters as atoms or strings.

  ## Return value

  Enumerable `Stream`, where each returned chunk is a raw result map, possibly
  containing multiple records.  This corresponds to `session.result` from the other
  entry points.
  """
  @spec stream(Session.t(), keyword) :: Enumerable.t()
  def stream(session, params) do
    Stream.resource(
      fn -> {session, :start} end,
      fn
        {prev, :start} ->
          do_stream_get(prev, params)

        {prev, :cont} ->
          get_continuation(prev.result)
          |> case do
            nil -> {:halt, nil}
            continue -> do_stream_get(prev, params ++ continue)
          end
      end,
      fn _ -> nil end
    )
  end

  defp do_stream_get(session, params) do
    next = get!(session, params)
    {[next.result], {next, :cont}}
  end

  defp get_continuation(result) do
    case result do
      # TODO: Test that a cross between a list and query can be continued
      # in both dimensions.
      %{"continue" => continue} ->
        Map.to_list(continue)

      %{"query-continue" => continue} ->
        continue
        |> Map.values()
        |> Enum.flat_map(&Map.to_list/1)

      _ ->
        nil
    end
  end

  @spec request(Session.t(), :get | :post, keyword) :: Session.result()
  defp request(session, method, opts) do
    # TODO: This can be extracted into a generic StatefulAdapter now.
    opts = [opts: session.state] ++ opts ++ [method: method]

    with {:ok, result} <- Tesla.request(session.__client__, opts),
         {:ok, result} <- validate(result) do
      {:ok,
       %Session{
         __client__: session.__client__,
         result: result.body,
         state: Keyword.delete(result.opts, :opts)
       }}
    else
      {:error, error = %Error{}} -> {:error, error}
      {:error, error} -> {:error, %Error{message: "#{inspect(error)}"}}
    end
  end

  @spec normalize_params(keyword) :: keyword
  defp normalize_params(params) do
    defaults = [
      format: :json,
      formatversion: 2
    ]

    (defaults ++ params)
    |> remove_boolean_false()
    |> pipe_lists()
    |> Enum.sort()
    |> Enum.dedup()
  end

  defp remove_boolean_false(params) do
    params
    |> Enum.filter(fn {_, v} -> v not in [false, nil] end)
  end

  defp pipe_lists(params) do
    params
    |> Enum.map(fn
      {k, v} when is_list(v) -> {k, pipe_list(v)}
      entry -> entry
    end)
  end

  defp pipe_list(values) do
    if Enum.any?(values, fn v -> String.contains?(to_string(v), "|") end) do
      # Use a special join character because pipe would conflict with the value.
      unit_separator = "\x1f"
      Enum.join([""] ++ values, unit_separator)
    else
      Enum.join(values, "|")
    end
  end

  defp validate(result) do
    with nil <- validate_http_status(result.status),
         nil <- validate_body_type(result.body),
         nil <- validate_api_errors(result.body) do
      {:ok, result}
    end
  end

  defp validate_http_status(status) do
    case status do
      status when status >= 200 and status < 300 -> nil
      status -> {:error, %Error{message: "Error received with HTTP status #{status}"}}
    end
  end

  defp validate_body_type(body) do
    with body when is_map(body) <- body,
         body when body != %{} <- body do
      nil
    else
      _ -> {:error, %Error{message: "Empty response"}}
    end
  end

  defp validate_api_errors(body) do
    with nil <- body["error"],
         nil <- body["errors"] do
      nil
    else
      error when is_map(error) -> {:error, %Error{message: summarize_legacy_error(error)}}
      errors when is_list(errors) -> {:error, %Error{message: summarize_new_error(errors)}}
    end
  end

  defp summarize_legacy_error(error) do
    error["info"] ||
      error["code"] ||
      "Unknown error (legacy format)"
  end

  defp summarize_new_error(errors) do
    # TODO: multiple errors
    case(List.first(errors)) do
      %{"text" => text} -> text
      %{"html" => html} -> html
      %{"key" => key, "params" => params} -> [key, params] |> List.flatten() |> Enum.join("-")
      %{"code" => code} -> code
      _ -> "unknown"
    end
  end

  @spec client(binary(), keyword()) :: Tesla.Client.t()
  defp client(url, opts) do
    adapter = opts[:adapter] || @default_adapter
    user_agent = opts[:user_agent] || Util.default_user_agent()

    (if(opts[:accumulate], do: [Wiki.StatefulClient.CumulativeResult], else: []) ++
       [
         {Tesla.Middleware.BaseUrl, url},
         {Tesla.Middleware.Compression, format: "gzip"},
         Wiki.StatefulClient.CookieJar,
         Tesla.Middleware.FormUrlencoded,
         {Tesla.Middleware.Headers,
          [
            {"user-agent", user_agent}
          ]},
         Tesla.Middleware.FollowRedirects,
         Tesla.Middleware.JSON
         # Debugging only:
         # Tesla.Middleware.Logger
       ])
    |> Tesla.client(adapter)
  end
end

defmodule Wiki.StatefulClient.CookieJar do
  @moduledoc false

  @behaviour Tesla.Middleware

  @impl true
  def call(env, next, _opts) do
    cookie_header =
      case env.opts[:cookies] do
        nil -> []
        cookies -> [{"cookie", serialize_cookies(cookies)}]
      end

    env =
      env
      |> Tesla.put_headers(cookie_header)

    with {:ok, env} <- Tesla.run(env, next) do
      cookies =
        env
        |> Tesla.get_headers("set-cookie")
        |> extract_cookies()
        |> update_cookies(env.opts[:cookies])

      env =
        env
        |> Tesla.put_opt(:cookies, cookies)

      {:ok, env}
    end
  end

  @spec update_cookies(map, map) :: map
  defp update_cookies(new_cookies, old_cookies) do
    case old_cookies do
      nil -> new_cookies
      _ -> Map.merge(old_cookies, new_cookies)
    end
  end

  @spec extract_cookies(Keyword.t()) :: map
  defp extract_cookies(headers) do
    headers
    |> Enum.map(&SetCookie.parse/1)
    |> Enum.into(%{}, fn %{key: k, value: v} -> {k, v} end)
  end

  @spec serialize_cookies(map) :: String.t()
  defp serialize_cookies(cookies) do
    cookies
    |> Enum.map_join("; ", fn {key, value} -> key <> "=" <> value end)
  end
end

defmodule Wiki.StatefulClient.CumulativeResult do
  @moduledoc false

  @behaviour Tesla.Middleware

  @impl true
  def call(env, next, _opts) do
    with {:ok, env} <- Tesla.run(env, next) do
      accumulated = recursive_merge(env.opts[:accumulated_result] || %{}, env.body)

      {:ok,
       Tesla.put_opt(env, :accumulated_result, accumulated)
       |> Tesla.put_body(accumulated)}
    end
  end

  @spec recursive_merge(map, map) :: map
  defp recursive_merge(%{} = v1, %{} = v2), do: Map.merge(v1, v2, &recursive_merge/3)

  # TODO: _key can be dropped
  @spec recursive_merge(String.t(), map | String.t(), map | String.t()) :: map
  defp recursive_merge(_key, v1, v2)

  defp recursive_merge(_key, %{} = v1, %{} = v2), do: recursive_merge(v1, v2)

  defp recursive_merge(_key, v1, v2) when is_list(v1) and is_list(v2), do: v1 ++ v2

  defp recursive_merge(_key, v1, v2) when v1 == v2, do: v1
end