defmodule Wiki.Action.Session do
@moduledoc """
This module provides a struct for holding private connection state and accumulated results.
## Fields
- `result` - Map with recursively merged values from all requests made using this session.
- `state` - Cache for session state and accumulation.
"""
@type client :: Tesla.Client.t()
@type result :: {:ok, t()} | {:error, any}
@type state :: keyword
@type t :: %__MODULE__{
__client__: client,
result: map,
state: keyword
}
defstruct __client__: nil,
result: %{},
state: []
end
defmodule Wiki.Action do
@moduledoc """
Adapter to the MediaWiki [Action API](https://www.mediawiki.org/wiki/Special:MyLanguage/API:Main_page)
Most commands return a pipeable Wiki.Action.Session, which can be reused repeatedly.
Anonymous requests,
```elixir
Wiki.SiteMatrix.new()
|> Wiki.SiteMatrix.get!("dewiki")
|> Wiki.Action.new()
|> Wiki.Action.get!(
action: :query,
meta: :siteinfo,
siprop: :statistics
)
# %Wiki.Action.Session{
# ...
# result: %{
# "batchcomplete" => true,
# "query" => %{
# "statistics" => %{
# "activeusers" => 19393,
# "admins" => 188,
# "articles" => 2583636,
# "edits" => 211249646,
# "images" => 130213,
# "jobs" => 0,
# "pages" => 7164514,
# "queued-massmessages" => 0,
# "users" => 3716049
# }
# }
# },
# ...
# }
```
Commands can be pipelined while accumulating results, and logged-in user permissions
delegated by supplying a [bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords).
```elixir
Wiki.SiteMatrix.new()
|> Wiki.SiteMatrix.get!("enwiki")
|> Wiki.Action.new(
accumulate: true
)
|> Wiki.Action.authenticate!(
Application.get_env(:example_app, :bot_username),
Application.get_env(:example_app, :bot_password)
)
|> Wiki.Action.get!(
action: :query,
meta: :tokens,
type: :csrf
)
|> (&Wiki.Action.post!(&1,
action: :edit,
title: "Sandbox",
assert: :user,
token: &1.result["query"]["tokens"]["csrftoken"],
appendtext: "~~~~ was here."
)).()
|> Map.get(:result)
```
Streaming results from multiple requests using continuation,
```elixir
Wiki.SiteMatrix.new()
|> Wiki.SiteMatrix.get!("dewiki")
|> Wiki.Action.new()
|> Wiki.Action.stream(
action: :query,
list: :recentchanges,
rclimit: 5
)
|> Stream.take(10)
|> Enum.flat_map(fn response -> response["query"]["recentchanges"] end)
|> Enum.map(fn rc -> rc["timestamp"] <> " " <> rc["title"] end)
```
## Wikibase
The [Wikidata](https://www.wikidata.org/) project provides structured data for
other wiki projects, and can be accessed through the Action API.
Examples:
Search for entities called "alphabet",
```elixir
Wiki.SiteMatrix.new()
|> Wiki.SiteMatrix.get!("wikidatawiki")
|> Wiki.Action.new()
|> Wiki.Action.get!(
action: :wbsearchentities,
search: "alphabet",
language: :en
)
```
Search for entities with "Frank Zappa" anywhere in the description or contents,
```elixir
Wiki.SiteMatrix.new()
|> Wiki.SiteMatrix.get!("wikidatawiki")
|> Wiki.Action.new()
|> Wiki.Action.get!(
action: :wbsearchentities,
search: "alphabet",
language: :en
)
```
Retrieve all data about a specific entity with ID "Q42",
```elixir
Wiki.SiteMatrix.new()
|> Wiki.SiteMatrix.get!("wikidatawiki")
|> Wiki.Action.new()
|> Wiki.Action.get!(
action: :wbgetentities,
ids: "Q42"
)
```
## Defaults
A few parameters are automatically added for convenience, but can be
overridden if desired:
* The `:format` parameter defaults to `:json`.
* `:formatversion` defaults to `2`.
Overriding to get pretty-printed JSON and the older response structure,
```elixir
Wiki.Action.get!(
action: query,
meta: siteinfo,
siprop: namespaces,
format: jsonfm,
formatversion: 1
)
```
"""
alias Wiki.{Action.Session, Error, SiteMatrix, Util}
@type client_option ::
{:accumulate, true}
| {:adapter, module()}
| {:debug, true}
| {:user_agent, binary()}
@typedoc """
- `:accumulate` - Merge results from each step of a pipeline, rather than
overwriting with the latest response.
- `:adapter` - Override the HTTP adapter
- `:debug` - Turn on verbose logging by setting to `true`
- `:disable_compression` - Disable the transparent gzip codec, rather than
automatically detecting based on HTTP headers.
- `:user_agent` - Override the user-agent header string
"""
@type client_options :: [client_option()]
@default_timeout 60_000
@default_adapter {Tesla.Adapter.Hackney, recv_timeout: @default_timeout}
@doc """
Create a new client session
## Arguments
- `site` - SiteMatrix.Spec or raw `api.php` endpoint for the wiki you will
connect to. For example, "https://en.wikipedia.org/w/api.php".
- `opts` - configuration options which modify client behavior
"""
@spec new(String.t() | SiteMatrix.Spec.t(), client_options()) :: Session.t()
def new(site, opts \\ [])
def new(%SiteMatrix.Spec{} = site, opts) do
site
|> SiteMatrix.action_api()
|> new(opts)
end
def new(url, opts) do
%Session{
__client__: client(url, opts)
}
end
@doc """
Make requests to authenticate a client session. This should only be done using
a [bot username and password](https://www.mediawiki.org/wiki/Manual:Bot_passwords),
which can be created for any normal user account.
## Arguments
- `session` - Base session pointing to a wiki.
- `username` - Bot username, may be different than the final logged-in username.
- `password` - Bot password. Protect this string, it allows others to take on-wiki actions on your behalf.
## Return value
Authenticated session object.
"""
@spec authenticate(Session.t(), String.t(), String.t()) :: Session.result()
def authenticate(session, username, password) do
with {:ok, login_token} <-
get(session,
action: :query,
meta: :tokens,
type: :login
) do
post(login_token,
action: :login,
lgname: username,
lgpassword: password,
lgtoken: login_token.result["query"]["tokens"]["logintoken"]
)
end
end
@doc """
Assertive variant of `authenticate`
"""
@spec authenticate!(Session.t(), String.t(), String.t()) :: Session.t()
def authenticate!(session, username, password) do
case authenticate(session, username, password) do
{:ok, session} -> session
{:error, error} -> raise error
end
end
@doc """
Make an API GET request
## Arguments
- `session` - `Wiki.Action.Session` object.
- `params` - Keyword list of query parameters as atoms or strings.
## Return value
Session object with its `.result` populated.
"""
@spec get(Session.t(), keyword) :: Session.result()
def get(session, params),
do: request(session, :get, query: normalize_params(params))
@doc """
Assertive variant of `get`.
"""
@spec get!(Session.t(), keyword) :: Session.t()
def get!(session, params) do
case get(session, params) do
{:ok, result} -> result
{:error, error} -> raise error
end
end
@doc """
Make an API POST request.
## Arguments
- `session` - `Wiki.Action.Session` object. If credentials are required for this
action, you should have created this object with the `authenticate/3` function.
- `params` - Keyword list of query parameters as atoms or strings.
## Return value
Session object with a populated `:result` attribute.
"""
@spec post(Session.t(), keyword) :: Session.result()
def post(session, params),
do: request(session, :post, body: normalize_params(params))
@doc """
Assertive variant of `post`.
"""
@spec post!(Session.t(), keyword) :: Session.t()
def post!(session, params) do
case post(session, params) do
{:ok, result} -> result
{:error, error} -> raise error
end
end
@doc """
Make a GET request and follow continuations until exhausted or the stream is closed.
## Arguments
- `session` - `Wiki.Action.Session` object.
- `params` - Keyword list of query parameters as atoms or strings.
## Return value
Enumerable `Stream`, where each returned chunk is a raw result map, possibly
containing multiple records. This corresponds to `session.result` from the other
entry points.
"""
@spec stream(Session.t(), keyword) :: Enumerable.t()
def stream(session, params) do
Stream.resource(
fn -> {session, :start} end,
fn
{prev, :start} ->
do_stream_get(prev, params)
{prev, :cont} ->
get_continuation(prev.result)
|> case do
nil -> {:halt, nil}
continue -> do_stream_get(prev, params ++ continue)
end
end,
fn _ -> nil end
)
end
defp do_stream_get(session, params) do
next = get!(session, params)
{[next.result], {next, :cont}}
end
defp get_continuation(result) do
case result do
# TODO: Test that a cross between a list and query can be continued
# in both dimensions.
%{"continue" => continue} ->
Map.to_list(continue)
%{"query-continue" => continue} ->
continue
|> Map.values()
|> Enum.flat_map(&Map.to_list/1)
_ ->
nil
end
end
@spec request(Session.t(), :get | :post, keyword) :: Session.result()
defp request(session, method, params) do
# TODO: This can be extracted into a generic StatefulAdapter now.
opts = [opts: session.state] ++ params ++ [method: method]
with {:ok, result} <- Tesla.request(session.__client__, opts),
{:ok, result} <- validate(result) do
{:ok,
%Session{
__client__: session.__client__,
result: result.body,
state: Keyword.delete(result.opts, :opts)
}}
else
{:error, error = %Error{}} -> {:error, error}
{:error, error} -> {:error, %Error{message: "#{inspect(error)}"}}
end
end
@spec normalize_params(keyword) :: keyword
defp normalize_params(params) do
defaults = [
format: :json,
formatversion: 2
]
(defaults ++ params)
|> remove_boolean_false()
|> pipe_lists()
|> Enum.sort()
|> Enum.dedup()
end
defp remove_boolean_false(params) do
params
|> Enum.filter(fn {_, v} -> v not in [false, nil] end)
end
defp pipe_lists(params) do
params
|> Enum.map(fn
{k, v} when is_list(v) -> {k, pipe_list(v)}
entry -> entry
end)
end
defp pipe_list(values) do
if Enum.any?(values, fn v -> String.contains?(to_string(v), "|") end) do
# Use a special join character because pipe would conflict with the value.
unit_separator = "\x1f"
Enum.join([""] ++ values, unit_separator)
else
Enum.join(values, "|")
end
end
defp validate(result) do
with nil <- validate_http_status(result.status),
nil <- validate_body_type(result.body),
nil <- validate_api_errors(result.body) do
{:ok, result}
end
end
defp validate_http_status(status) do
case status do
status when status >= 200 and status < 300 -> nil
status -> {:error, %Error{message: "Error received with HTTP status #{status}"}}
end
end
defp validate_body_type(body) do
with body when is_map(body) <- body,
body when body != %{} <- body do
nil
else
_ -> {:error, %Error{message: "Empty response"}}
end
end
defp validate_api_errors(body) do
with nil <- body["error"],
nil <- body["errors"] do
nil
else
error when is_map(error) -> {:error, %Error{message: summarize_legacy_error(error)}}
errors when is_list(errors) -> {:error, %Error{message: summarize_new_error(errors)}}
end
end
defp summarize_legacy_error(error) do
error["info"] ||
error["code"] ||
"Unknown error (legacy format)"
end
defp summarize_new_error(errors) do
# TODO: multiple errors
case(List.first(errors)) do
%{"text" => text} -> text
%{"html" => html} -> html
%{"key" => key, "params" => params} -> [key, params] |> List.flatten() |> Enum.join("-")
%{"code" => code} -> code
_ -> "unknown"
end
end
@spec client(binary(), keyword()) :: Tesla.Client.t()
defp client(url, opts) do
adapter = opts[:adapter] || @default_adapter
user_agent = opts[:user_agent] || Util.default_user_agent()
default_middleware = [
{Tesla.Middleware.BaseUrl, url},
{Tesla.Middleware.Timeout, timeout: @default_timeout},
Wiki.StatefulClient.CookieJar,
Tesla.Middleware.FormUrlencoded,
{Tesla.Middleware.Headers,
[
{"user-agent", user_agent}
]},
Tesla.Middleware.FollowRedirects,
Tesla.Middleware.JSON,
Tesla.Middleware.Compression
]
Enum.reduce(opts, default_middleware, fn {option, on}, middleware ->
cond do
on == false ->
middleware
option == :accumulate ->
middleware ++ [Wiki.StatefulClient.CumulativeResult]
option == :disable_compression ->
middleware -- [Tesla.Middleware.Compression]
option == :debug ->
middleware ++ [Tesla.Middleware.Logger]
true ->
middleware
end
end)
|> Tesla.client(adapter)
end
end
defmodule Wiki.StatefulClient.CookieJar do
@moduledoc false
@behaviour Tesla.Middleware
@impl true
def call(env, next, _opts) do
env = set_cookie_header(env, env.opts[:cookies])
with {:ok, env} <- Tesla.run(env, next) do
{:ok, merge_cookies(env, env.opts[:cookies])}
end
end
@spec set_cookie_header(Tesla.Env.t(), nil | map) :: Tesla.Env.t()
defp set_cookie_header(env, cookies_opt)
defp set_cookie_header(env, nil), do: env
defp set_cookie_header(env, cookies) do
serialized = Enum.map_join(cookies, "; ", fn {key, value} -> key <> "=" <> value end)
Tesla.put_headers(env, [{"cookie", serialized}])
end
@spec merge_cookies(Tesla.Env.t(), nil | map) :: Tesla.Env.t()
defp merge_cookies(env, old_cookies)
defp merge_cookies(env, nil), do: merge_cookies(env, %{})
defp merge_cookies(env, old_cookies) do
merged_cookies = Map.merge(old_cookies, extract_cookie_headers(env))
Tesla.put_opt(env, :cookies, merged_cookies)
end
@spec extract_cookie_headers(Tesla.Env.t()) :: map
defp extract_cookie_headers(env) do
Tesla.get_headers(env, "set-cookie")
|> Enum.map(&SetCookie.parse/1)
|> Enum.into(%{}, fn %{key: k, value: v} -> {k, v} end)
end
end
defmodule Wiki.StatefulClient.CumulativeResult do
@moduledoc false
@behaviour Tesla.Middleware
@impl true
def call(env, next, _opts) do
with {:ok, env} <- Tesla.run(env, next) do
accumulated = recursive_merge(env.opts[:accumulated_result] || %{}, env.body)
{:ok,
Tesla.put_opt(env, :accumulated_result, accumulated)
|> Tesla.put_body(accumulated)}
end
end
@spec recursive_merge(map, map) :: map
defp recursive_merge(%{} = v1, %{} = v2), do: Map.merge(v1, v2, &recursive_merge/3)
# TODO: _key can be dropped
@spec recursive_merge(String.t(), map | String.t(), map | String.t()) :: map
defp recursive_merge(_key, v1, v2)
defp recursive_merge(_key, %{} = v1, %{} = v2), do: recursive_merge(v1, v2)
defp recursive_merge(_key, v1, v2) when is_list(v1) and is_list(v2), do: v1 ++ v2
defp recursive_merge(_key, v1, v2) when v1 == v2, do: v1
end