defmodule Wiki.Action.Session do
@moduledoc """
This module provides a struct for holding private connection state and accumulated results.
## Fields
- `result` - Map with recursively merged values from all requests made using this session.
- `state` - Cache for session state and accumulation.
"""
@type client :: Tesla.Client.t()
@type result :: map
@type state :: keyword
@type t :: %__MODULE__{
__client__: client,
result: result,
state: keyword
}
defstruct __client__: nil,
result: %{},
state: []
end
defmodule Wiki.Action do
@moduledoc """
Adapter to the MediaWiki [Action API](https://www.mediawiki.org/wiki/Special:MyLanguage/API:Main_page)
Most commands return a pipeable Wiki.Action.Session, which can be reused repeatedly.
Anonymous requests,
```elixir
"dewiki"
|> Wiki.Site.get()
|> Wiki.Action.new()
|> Wiki.Action.get(
action: :query,
meta: :siteinfo,
siprop: :statistics
)
# %Wiki.Action.Session{
# __client__: %Tesla.Client{
# adapter: {Tesla.Adapter.Hackney, :call, [[]]},
# fun: nil,
# post: [],
# pre: [
# {Tesla.Middleware.BaseUrl, :call, ["https://de.wikipedia.org/w/api.php"]},
# {Tesla.Middleware.Compression, :call, [[format: "gzip"]]},
# {Wiki.StatefulClient.CookieJar, :call, [[]]},
# {Tesla.Middleware.FormUrlencoded, :call, [[]]},
# {Tesla.Middleware.Headers, :call,
# [[{"user-agent", "wiki_elixir/0.2.2 (spam@ludd.net)"}]]},
# {Tesla.Middleware.JSON, :call, [[]]}
# ]
# },
# result: %{
# "batchcomplete" => true,
# "query" => %{
# "statistics" => %{
# "activeusers" => 19393,
# "admins" => 188,
# "articles" => 2583636,
# "edits" => 211249646,
# "images" => 130213,
# "jobs" => 0,
# "pages" => 7164514,
# "queued-massmessages" => 0,
# "users" => 3716049
# }
# }
# },
# state: [
# cookies: %{
# "GeoIP" => "DE:BE:Berlin:52.57:13.42:v4",
# "WMF-Last-Access" => "04-Jun-2021",
# "WMF-Last-Access-Global" => "04-Jun-2021"
# }
# ]
# }
```
Commands can be pipelined while accumulating results, and logged-in user permissions
delegated by supplying a [bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords).
```elixir
"enwiki"
|> Wiki.Site.get()
|> Wiki.Action.new(
accumulate: true
)
|> Wiki.Action.authenticate(
Application.get_env(:example_app, :bot_username),
Application.get_env(:example_app, :bot_password)
)
|> Wiki.Action.get(
action: :query,
meta: :tokens,
type: :csrf
)
|> (&Wiki.Action.post(&1,
action: :edit,
title: "Sandbox",
assert: :user,
token: &1.result["query"]["tokens"]["csrftoken"],
appendtext: "~~~~ was here."
)).()
|> Map.get(:result)
|> IO.inspect()
```
Streaming results from multiple requests using continuation,
```elixir
"https://de.wikipedia.org/w/api.php"
|> Wiki.Action.new()
|> Wiki.Action.stream(
action: :query,
list: :recentchanges,
rclimit: 5
)
|> Stream.take(10)
|> Enum.flat_map(fn response -> response["query"]["recentchanges"] end)
|> Enum.map(fn rc -> rc["timestamp"] <> " " <> rc["title"] end)
|> IO.inspect()
```
"""
alias Wiki.Action.Session
alias Wiki.Site
alias Wiki.Util
@doc """
Create a new client session
## Arguments
- `site` - Site.Spec or raw `api.php` endpoint for the wiki you will connect
to. For example, "https://en.wikipedia.org/w/api.php".
- `opts`
- `:accumulate` - Merge results from each step of a pipeline, rather than overwriting with the latest response.
"""
@spec new(String.t() | Site.Spec.t(), keyword) :: Session.t()
def new(site, opts \\ [])
def new(%Site.Spec{} = site, opts) do
site
|> Site.action_api()
|> new(opts)
end
def new(url, opts) do
# TODO: This belongs in client/1, maybe pass options through?
middleware =
if opts[:accumulate] do
[Wiki.StatefulClient.CumulativeResult]
else
[]
end ++
[{Tesla.Middleware.BaseUrl, url}]
%Session{
__client__: client(middleware)
}
end
@doc """
Make requests to authenticate a client session. This should only be done using
a [bot username and password](https://www.mediawiki.org/wiki/Manual:Bot_passwords),
which can be created for any normal user account.
## Arguments
- `session` - Base session pointing to a wiki.
- `username` - Bot username, may be different than the final logged-in username.
- `password` - Bot password. Protect this string, it allows others to take on-wiki actions on your behalf.
## Return value
Authenticated session object.
"""
@spec authenticate(Session.t(), String.t(), String.t()) :: Session.t()
def authenticate(session, username, password) do
session
|> get(
action: :query,
meta: :tokens,
type: :login
)
|> (&post(&1,
action: :login,
lgname: username,
lgpassword: password,
lgtoken: &1.result["query"]["tokens"]["logintoken"]
)).()
end
@doc """
Make an API GET request
## Arguments
- `session` - `Wiki.Action.Session` object.
- `params` - Keyword list of query parameters as atoms or strings.
- `opts` - Options to pass to the adapter.
## Return value
Session object with its `.result` populated.
"""
@spec get(Session.t(), keyword, keyword) :: Session.t()
def get(session, params, opts \\ []),
do: request!(session, :get, opts ++ [query: normalize_params(params)])
@doc """
Make an API POST request.
## Arguments
- `session` - `Wiki.Action.Session` object. If credentials are required for this
action, you should have created this object with the `authenticate/3` function.
- `params` - Keyword list of query parameters as atoms or strings.
- `opts` - Options to pass to the adapter.
## Return value
Session object with a populated `:result` attribute.
"""
@spec post(Session.t(), keyword, keyword) :: Session.t()
def post(session, params, opts \\ []),
do: request!(session, :post, opts ++ [body: normalize_params(params)])
@doc """
Make a GET request and follow continuations until exhausted or the stream is closed.
## Arguments
- `session` - `Wiki.Action.Session` object.
- `params` - Keyword list of query parameters as atoms or strings.
## Return value
Enumerable `Stream`, where each returned chunk is a raw result map, possibly
containing multiple records. This corresponds to `session.result` from the other
entry points.
"""
@spec stream(Session.t(), keyword) :: Enumerable.t()
def stream(session, params) do
Stream.resource(
fn -> {session, :start} end,
fn
{prev, :start} ->
do_stream_get(prev, params)
{prev, :cont} ->
case prev.result do
%{"continue" => continue} -> do_stream_get(prev, params ++ Map.to_list(continue))
_ -> {:halt, nil}
end
end,
fn _ -> nil end
)
end
defp do_stream_get(session, params) do
next = get(session, params)
{[next.result], {next, :cont}}
end
@spec request!(Session.t(), :get | :post, keyword) :: Session.t()
defp request!(session, method, opts) do
# TODO: This can be extracted into a generic StatefulAdapter now.
opts = [opts: session.state] ++ opts ++ [method: method]
result = Tesla.request!(session.__client__, opts)
assert_success(result)
%Session{
__client__: session.__client__,
result: result.body,
state: Keyword.delete(result.opts, :opts)
}
end
@spec normalize_params(keyword) :: keyword
defp normalize_params(params) do
defaults = [
format: :json,
formatversion: 2
]
(defaults ++ params)
|> remove_boolean_false()
|> pipe_lists()
|> Enum.sort()
|> Enum.dedup()
end
defp remove_boolean_false(params) do
params
|> Enum.filter(fn {_, v} -> v not in [false, nil] end)
end
defp pipe_lists(params) do
params
|> Enum.map(fn
{k, v} when is_list(v) -> {k, pipe_list(v)}
entry -> entry
end)
end
defp pipe_list(values) do
if Enum.any?(values, fn v -> String.contains?(to_string(v), "|") end) do
# Use a special join character because pipe would conflict with the value.
unit_separator = "\x1f"
Enum.join([""] ++ values, unit_separator)
else
Enum.join(values, "|")
end
end
defp assert_success(result) do
cond do
result.status < 200 or result.status >= 300 ->
raise "Error received with HTTP status #{result.status}"
result.body in [nil, "", %{}] ->
raise "Empty response"
not is_map(result.body) ->
# TODO: If we test for format=json in the request, we can assert a malformed response.
nil
error = result.body["error"] ->
raise summarize_legacy_error(error)
errors = result.body["errors"] ->
raise summarize_new_error(errors)
true ->
nil
end
end
defp summarize_legacy_error(error) do
error["info"] ||
error["code"] ||
"Unknown error (legacy format)"
end
defp summarize_new_error(errors) do
case(List.first(errors)) do
%{"text" => text} -> text
%{"html" => html} -> html
%{"key" => key, "params" => params} -> [key, params] |> List.flatten() |> Enum.join("-")
%{"code" => code} -> code
_ -> "unknown"
end
end
@spec client(list) :: Tesla.Client.t()
defp client(extra) do
middleware =
extra ++
[
{Tesla.Middleware.Compression, format: "gzip"},
Wiki.StatefulClient.CookieJar,
Tesla.Middleware.FormUrlencoded,
{Tesla.Middleware.Headers,
[
{"user-agent", Util.user_agent()}
]},
Tesla.Middleware.JSON
# Debugging only:
# Tesla.Middleware.Logger
]
Tesla.client(middleware, Util.default_adapter())
end
end
defmodule Wiki.StatefulClient.CookieJar do
@moduledoc false
@behaviour Tesla.Middleware
@impl true
def call(env, next, _opts) do
cookie_header =
case env.opts[:cookies] do
nil -> []
cookies -> [{"cookie", serialize_cookies(cookies)}]
end
env =
env
|> Tesla.put_headers(cookie_header)
with {:ok, env} <- Tesla.run(env, next) do
cookies =
env
|> Tesla.get_headers("set-cookie")
|> extract_cookies()
|> update_cookies(env.opts[:cookies])
env =
env
|> Tesla.put_opt(:cookies, cookies)
{:ok, env}
end
end
@spec update_cookies(map, map) :: map
defp update_cookies(new_cookies, old_cookies) do
case old_cookies do
nil -> new_cookies
_ -> Map.merge(old_cookies, new_cookies)
end
end
@spec extract_cookies(Keyword.t()) :: map
defp extract_cookies(headers) do
headers
|> Enum.map(&SetCookie.parse/1)
|> Enum.into(%{}, fn %{key: k, value: v} -> {k, v} end)
end
@spec serialize_cookies(map) :: String.t()
defp serialize_cookies(cookies) do
cookies
|> Enum.map_join("; ", fn {key, value} -> key <> "=" <> value end)
end
end
defmodule Wiki.StatefulClient.CumulativeResult do
@moduledoc false
@behaviour Tesla.Middleware
@impl true
def call(env, next, _opts) do
with {:ok, env} <- Tesla.run(env, next) do
accumulated = recursive_merge(env.opts[:accumulated_result] || %{}, env.body)
{:ok,
Tesla.put_opt(env, :accumulated_result, accumulated)
|> Tesla.put_body(accumulated)}
end
end
@spec recursive_merge(map, map) :: map
defp recursive_merge(%{} = v1, %{} = v2), do: Map.merge(v1, v2, &recursive_merge/3)
# TODO: _key can be dropped
@spec recursive_merge(String.t(), map | String.t(), map | String.t()) :: map
defp recursive_merge(_key, v1, v2)
defp recursive_merge(_key, %{} = v1, %{} = v2), do: recursive_merge(v1, v2)
defp recursive_merge(_key, v1, v2) when is_list(v1) and is_list(v2), do: v1 ++ v2
defp recursive_merge(_key, v1, v2) when v1 == v2, do: v1
end