Skip to main content

src/roadrunner_qs.erl

-module(roadrunner_qs).
-moduledoc """
`application/x-www-form-urlencoded` query string codec.

Pairs are separated by `&`; key and value within each pair are
separated by `=`. `+` decodes to space (legacy form-encoding rule),
followed by RFC 3986 percent-decoding via `roadrunner_uri`. Bare keys
with no `=` are flags — their value is the atom `true`.

Lenient on malformed input (cowboy parity): empty pair entries
(`&&`, leading or trailing `&`) are skipped, and percent sequences
that fail to decode pass through as raw bytes.
""".

-on_load(init_patterns/0).

-export([parse/1, encode/1]).

%% Trigger bytes that mean `decode/1` actually has to do work.
%% Pre-compiled at module load (see `init_patterns/0`).
-define(QS_TRIGGERS_KEY, {?MODULE, qs_triggers_cp}).
-define(AMP_KEY, {?MODULE, amp_cp}).
-define(EQ_KEY, {?MODULE, eq_cp}).
-define(PLUS_KEY, {?MODULE, plus_cp}).
-define(PCT20_KEY, {?MODULE, pct20_cp}).

-doc """
Parse a query string into an ordered list of `{Key, Value}` pairs.

`Value` is `true` for bare flags, otherwise a binary (possibly empty).
""".
-spec parse(binary()) -> [{binary(), binary() | true}].
parse(<<>>) ->
    [];
parse(Bin) when is_binary(Bin) ->
    EqCp = persistent_term:get(?EQ_KEY),
    TriggersCp = persistent_term:get(?QS_TRIGGERS_KEY),
    PlusCp = persistent_term:get(?PLUS_KEY),
    [
        parse_pair(P, EqCp, TriggersCp, PlusCp)
     || P <- binary:split(Bin, persistent_term:get(?AMP_KEY), [global]), P =/= <<>>
    ].

-spec parse_pair(binary(), binary:cp(), binary:cp(), binary:cp()) ->
    {binary(), binary() | true}.
parse_pair(Pair, EqCp, TriggersCp, PlusCp) ->
    case binary:split(Pair, EqCp) of
        [Key] -> {decode(Key, TriggersCp, PlusCp), true};
        [Key, Value] -> {decode(Key, TriggersCp, PlusCp), decode(Value, TriggersCp, PlusCp)}
    end.

-spec decode(binary(), binary:cp(), binary:cp()) -> binary().
decode(Bin, TriggersCp, PlusCp) ->
    %% Fast path: when neither `+` nor `%` is present, the body
    %% bytes ARE the decoded bytes — return as-is. Skips both
    %% `binary:replace` and `roadrunner_uri:percent_decode/1`,
    %% which is the dominant cost on form fields with safe ASCII
    %% (numeric IDs, alpha-only keys, base64 etc.). Single-pass
    %% match against a precompiled pattern.
    case binary:match(Bin, TriggersCp) of
        nomatch ->
            Bin;
        _ ->
            Spaced = binary:replace(Bin, PlusCp, ~" ", [global]),
            case roadrunner_uri:percent_decode(Spaced) of
                {ok, Decoded} -> Decoded;
                {error, badarg} -> Spaced
            end
    end.

-doc """
Encode a list of `{Key, Value}` pairs as a query string.

`Value` may be `true` (bare flag, no `=`) or a binary. Spaces are
encoded as `+` (form-encoding convention); other non-unreserved bytes
become `%HH` triples — including a literal `+`, which becomes `%2B`
so it round-trips through `parse/1`.
""".
-spec encode([{binary(), binary() | true}]) -> binary().
encode(Pairs) when is_list(Pairs) ->
    Pct20Cp = persistent_term:get(?PCT20_KEY),
    iolist_to_binary(lists:join(~"&", [encode_pair(P, Pct20Cp) || P <- Pairs])).

-spec encode_pair({binary(), binary() | true}, binary:cp()) -> iodata().
encode_pair({Key, true}, Pct20Cp) ->
    encode_component(Key, Pct20Cp);
encode_pair({Key, Value}, Pct20Cp) when is_binary(Value) ->
    [encode_component(Key, Pct20Cp), $=, encode_component(Value, Pct20Cp)].

-spec encode_component(binary(), binary:cp()) -> binary().
encode_component(Bin, Pct20Cp) ->
    %% Use the URI-style percent encoder, then collapse %20 to '+' so the
    %% output is form-encoded. Other percent triples are unaffected because
    %% only literal 0x20 ever produces "%20".
    Encoded = roadrunner_uri:percent_encode(Bin),
    binary:replace(Encoded, Pct20Cp, ~"+", [global]).

%% `-on_load` callback. Compiles the `+`/`%` trigger pattern once
%% at module load and stashes it in `persistent_term`. The
%% `decode/1` fast-path scans with a precompiled binary pattern
%% instead of building one per call. Conventional shape across the
%% codebase (see `roadrunner_compress`, `roadrunner_http1`,
%% `roadrunner_ws`).
-spec init_patterns() -> ok.
init_patterns() ->
    persistent_term:put(?QS_TRIGGERS_KEY, binary:compile_pattern([~"+", ~"%"])),
    persistent_term:put(?AMP_KEY, binary:compile_pattern(~"&")),
    persistent_term:put(?EQ_KEY, binary:compile_pattern(~"=")),
    persistent_term:put(?PLUS_KEY, binary:compile_pattern(~"+")),
    persistent_term:put(?PCT20_KEY, binary:compile_pattern(~"%20")),
    ok.