-module(roadrunner_static).
-moduledoc """
Built-in static file handler.
Configure via a 3-tuple route with `#{dir => Path}` opts and a
`*path` wildcard segment carrying the relative file path:
```
{~"/static/*path", roadrunner_static, #{dir => ~"/var/www"}}
```
Reads the file from disk, sets `Content-Type` from the extension,
returns 404 on a missing file or any path that contains `..`.
## Gzip-sibling serving
When a request carries `Accept-Encoding: gzip` and the requested
file has a `<file>.gz` sibling on disk, the sibling is served
verbatim with `Content-Encoding: gzip` plus `Vary: Accept-Encoding`.
This matches nginx's `gzip_static on` behaviour and lets operators
pre-compress build assets once instead of paying the deflate cost
per request.
`Accept-Encoding` is matched via plain substring (`gzip`) rather
than full RFC 9110 §12.5.3 qvalue ranking. The static path is
typically hit by browsers and benchmark clients that always
include `gzip` plainly. Brotli (`.br`) siblings are not served —
gzip is the universally supported encoding.
The original file's ETag is reused for the gzip variant, so a
follow-up `If-None-Match` returns 304 regardless of which variant
was first served. A `Range` request disables the gzip path on that
request — byte offsets over a compressed representation have
subtle semantics and the simple "Range wins" rule matches what
nginx does.
## Symlink policy
`#{symlink_policy => Policy}` (default `refuse_escapes`) controls
how symlinks inside the docroot are handled. The policy applies to
the **leaf** of the requested path — symlinks in intermediate
directories are still followed by the kernel.
- `refuse_escapes` (default) — symlinks whose target resolves
inside `dir` are followed; symlinks pointing outside (e.g. an
absolute target like `/etc/passwd`, or a relative target with
`..` segments) return 404. Stricter than nginx/Apache defaults
but matches what an operator typically wants for a public
docroot.
- `follow` — every symlink is followed regardless of where it
points (nginx `disable_symlinks off` equivalent). Use only when
the docroot's filesystem permissions prevent untrusted writes.
- `refuse` — every symlink returns 404, even safe in-docroot ones.
""".
-behaviour(roadrunner_handler).
-include_lib("kernel/include/file.hrl").
-on_load(init_patterns/0).
-define(COMMA_CP_KEY, {?MODULE, comma_cp}).
-define(DASH_CP_KEY, {?MODULE, dash_cp}).
-export([handle/1]).
-define(MIME_TYPES, #{
~".html" => ~"text/html; charset=utf-8",
~".css" => ~"text/css; charset=utf-8",
~".js" => ~"application/javascript",
~".json" => ~"application/json",
~".png" => ~"image/png",
~".jpg" => ~"image/jpeg",
~".jpeg" => ~"image/jpeg",
~".gif" => ~"image/gif",
~".svg" => ~"image/svg+xml",
~".ico" => ~"image/x-icon",
~".txt" => ~"text/plain; charset=utf-8"
}).
-spec handle(roadrunner_req:request()) -> roadrunner_handler:result().
handle(Req) ->
#{dir := Dir} = roadrunner_req:state(Req),
Segments = maps:get(~"path", roadrunner_req:bindings(Req), []),
Resp =
case validate_segments(Segments) of
ok ->
FilePath = filename:join([Dir | Segments]),
serve_file(FilePath, Req);
traversal ->
roadrunner_resp:not_found()
end,
{Resp, Req}.
-spec serve_file(file:filename_all(), roadrunner_req:request()) -> roadrunner_handler:response().
serve_file(FilePath, Req) ->
%% `read_link_info/1` does not follow the leaf symlink — we need
%% the un-followed type so the symlink-policy gate can decide
%% whether the target is allowed to be served.
case file:read_link_info(FilePath, [{time, posix}]) of
{ok, #file_info{type = symlink}} ->
case symlink_allowed(FilePath, Req) of
true -> serve_followed_file(FilePath, Req);
false -> roadrunner_resp:not_found()
end;
{ok, #file_info{type = regular, size = Size, mtime = Mtime}} ->
serve_regular_file(FilePath, Size, Mtime, Req);
_ ->
roadrunner_resp:not_found()
end.
%% Read leaf-stat after the symlink-policy gate has approved follow.
-spec serve_followed_file(file:filename_all(), roadrunner_req:request()) ->
roadrunner_handler:response().
serve_followed_file(FilePath, Req) ->
case file:read_file_info(FilePath, [{time, posix}]) of
{ok, #file_info{type = regular, size = Size, mtime = Mtime}} ->
serve_regular_file(FilePath, Size, Mtime, Req);
_ ->
roadrunner_resp:not_found()
end.
-spec serve_regular_file(
file:filename_all(), non_neg_integer(), integer(), roadrunner_req:request()
) -> roadrunner_handler:response().
serve_regular_file(FilePath, Size, Mtime, Req) ->
ETag = etag(Size, Mtime),
LastMod = roadrunner_http:format_http_date(Mtime),
case is_cached(Req, ETag, Mtime) of
true ->
{304,
[
{~"etag", ETag},
{~"last-modified", LastMod},
{~"content-length", ~"0"}
],
~""};
false ->
case maybe_serve_gzip(FilePath, ETag, LastMod, Req) of
{ok, Resp} -> Resp;
none -> serve_with_range(FilePath, Size, ETag, LastMod, Req)
end
end.
%% When the client opted into gzip and a `<file>.gz` sibling is on
%% disk, serve the sibling with `Content-Encoding: gzip`. `Range`
%% requests skip this path — byte offsets over a compressed
%% representation have subtle semantics, so we let Range win and
%% serve the raw file.
-spec maybe_serve_gzip(file:filename_all(), binary(), binary(), roadrunner_req:request()) ->
{ok, roadrunner_handler:response()} | none.
maybe_serve_gzip(FilePath, ETag, LastMod, Req) ->
case
(roadrunner_req:header(~"range", Req) =:= undefined) andalso
accepts_gzip(Req)
of
true ->
GzPath = iolist_to_binary([FilePath, ~".gz"]),
case file:read_file_info(GzPath, [{time, posix}]) of
{ok, #file_info{type = regular, size = GzSize}} ->
{ok, gzip_response(FilePath, GzPath, GzSize, ETag, LastMod)};
_ ->
none
end;
false ->
none
end.
-spec accepts_gzip(roadrunner_req:request()) -> boolean().
accepts_gzip(Req) ->
case roadrunner_req:header(~"accept-encoding", Req) of
undefined -> false;
Bin -> binary:match(Bin, ~"gzip") =/= nomatch
end.
-spec gzip_response(
file:filename_all(), file:filename_all(), non_neg_integer(), binary(), binary()
) -> roadrunner_handler:response().
gzip_response(OrigPath, GzPath, GzSize, ETag, LastMod) ->
{sendfile, 200,
[
{~"content-type", content_type_for(OrigPath)},
{~"content-encoding", ~"gzip"},
{~"content-length", integer_to_binary(GzSize)},
{~"etag", ETag},
{~"last-modified", LastMod},
{~"vary", ~"Accept-Encoding"}
],
{GzPath, 0, GzSize}}.
%% Cache hit when either:
%% - `If-None-Match` matches the current ETag (strong validator), or
%% - `If-Modified-Since` ≥ the file's mtime (weak validator).
-spec is_cached(roadrunner_req:request(), binary(), integer()) -> boolean().
is_cached(Req, ETag, Mtime) ->
if_none_match(Req) =:= ETag orelse if_modified_since_satisfied(Req, Mtime).
-spec if_modified_since_satisfied(roadrunner_req:request(), integer()) -> boolean().
if_modified_since_satisfied(Req, Mtime) ->
case roadrunner_req:header(~"if-modified-since", Req) of
undefined ->
false;
Value ->
case parse_http_date(Value) of
{ok, Posix} -> Posix >= Mtime;
error -> false
end
end.
%% Branches on the Range header: satisfiable single range → 206,
%% unsatisfiable → 416, anything else (no header, malformed, multi-range)
%% → fall through to a normal 200 with the full body.
-spec serve_with_range(
file:filename_all(),
non_neg_integer(),
binary(),
binary(),
roadrunner_req:request()
) -> roadrunner_handler:response().
serve_with_range(FilePath, Size, ETag, LastMod, Req) ->
case parse_range(roadrunner_req:header(~"range", Req), Size) of
{range, Start, End} ->
serve_range(FilePath, Size, ETag, LastMod, Start, End);
unsatisfiable ->
range_not_satisfiable(Size, ETag, LastMod);
none ->
serve_full_file(FilePath, Size, ETag, LastMod)
end.
%% Returns a `{sendfile, ...}` response so the conn dispatches
%% `file:sendfile/5` (TCP) or a chunked read+send fallback (TLS) — the
%% file body is never copied through the Erlang heap.
-spec serve_full_file(
file:filename_all(), non_neg_integer(), binary(), binary()
) -> roadrunner_handler:response().
serve_full_file(FilePath, Size, ETag, LastMod) ->
{sendfile, 200,
[
{~"content-type", content_type_for(FilePath)},
{~"content-length", integer_to_binary(Size)},
{~"etag", ETag},
{~"last-modified", LastMod}
],
{FilePath, 0, Size}}.
-spec content_type_for(file:filename_all()) -> binary().
content_type_for(FilePath) ->
Ext = roadrunner_bin:ascii_lowercase(iolist_to_binary(filename:extension(FilePath))),
maps:get(Ext, ?MIME_TYPES, ~"application/octet-stream").
%% Strong ETag derived from size + posix mtime — RFC 9110 §8.8.3
%% format: opaque-tag wrapped in double quotes. Two files with the
%% same size and mtime collide; that's intentional (and matches how
%% nginx/apache build their default ETags).
-spec etag(non_neg_integer(), integer()) -> binary().
etag(Size, Mtime) ->
<<$", (integer_to_binary(Size))/binary, $-, (integer_to_binary(Mtime))/binary, $">>.
-spec if_none_match(roadrunner_req:request()) -> binary() | undefined.
if_none_match(Req) ->
roadrunner_req:header(~"if-none-match", Req).
%% Parse a `Range: bytes=N-M`, `bytes=N-`, or `bytes=-S` header against
%% the file `Size`. `none` means "ignore Range and serve the full body"
%% — used for missing, malformed, multi-range, and other shapes we
%% don't honor (per RFC 9110 §14.1.1: servers MUST ignore unknown
%% range units). `unsatisfiable` triggers a 416.
-spec parse_range(binary() | undefined, non_neg_integer()) ->
{range, non_neg_integer(), non_neg_integer()} | unsatisfiable | none.
parse_range(undefined, _Size) ->
none;
parse_range(<<"bytes=", Spec/binary>>, Size) ->
case binary:match(Spec, persistent_term:get(?COMMA_CP_KEY)) of
nomatch -> parse_single_range(Spec, Size);
%% Multi-range — falls back to a 200 with the full body.
_ -> none
end;
parse_range(_, _Size) ->
none.
-spec parse_single_range(binary(), non_neg_integer()) ->
{range, non_neg_integer(), non_neg_integer()} | unsatisfiable | none.
parse_single_range(Spec, Size) ->
case binary:split(Spec, persistent_term:get(?DASH_CP_KEY)) of
[<<>>, SuffixLen] ->
%% `bytes=-S` — last S bytes.
case bin_to_pos_int(SuffixLen) of
{ok, S} when S > 0, Size > 0 ->
Start = max(0, Size - S),
{range, Start, Size - 1};
{ok, _} ->
%% Well-formed but unsatisfiable: zero-length suffix
%% or empty file.
unsatisfiable;
error ->
%% Malformed (non-numeric, negative): per RFC 9110
%% §14.2 the server MUST ignore Range.
none
end;
[StartBin, <<>>] ->
%% `bytes=N-` — open-ended.
case bin_to_pos_int(StartBin) of
{ok, Start} when Start < Size ->
{range, Start, Size - 1};
{ok, _} ->
unsatisfiable;
error ->
none
end;
[StartBin, EndBin] ->
case {bin_to_pos_int(StartBin), bin_to_pos_int(EndBin)} of
{{ok, Start}, {ok, End}} when Start =< End, Start < Size ->
{range, Start, min(End, Size - 1)};
{{ok, _}, {ok, _}} ->
unsatisfiable;
_ ->
none
end;
_ ->
none
end.
-spec bin_to_pos_int(binary()) -> {ok, non_neg_integer()} | error.
bin_to_pos_int(Bin) ->
try binary_to_integer(Bin) of
N when N >= 0 -> {ok, N};
_ -> error
catch
_:_ -> error
end.
-spec serve_range(
file:filename_all(),
non_neg_integer(),
binary(),
binary(),
non_neg_integer(),
non_neg_integer()
) -> roadrunner_handler:response().
serve_range(FilePath, Size, ETag, LastMod, Start, End) ->
Length = End - Start + 1,
ContentRange = iolist_to_binary([
~"bytes ",
integer_to_binary(Start),
$-,
integer_to_binary(End),
$/,
integer_to_binary(Size)
]),
{sendfile, 206,
[
{~"content-type", content_type_for(FilePath)},
{~"content-length", integer_to_binary(Length)},
{~"content-range", ContentRange},
{~"etag", ETag},
{~"last-modified", LastMod}
],
{FilePath, Start, Length}}.
-spec range_not_satisfiable(non_neg_integer(), binary(), binary()) -> roadrunner_handler:response().
range_not_satisfiable(Size, ETag, LastMod) ->
%% RFC 9110 §15.5.17: 416 SHOULD include Content-Range with the
%% total size so clients can recover.
ContentRange = iolist_to_binary([~"bytes */", integer_to_binary(Size)]),
{416,
[
{~"content-length", ~"0"},
{~"content-range", ContentRange},
{~"etag", ETag},
{~"last-modified", LastMod}
],
~""}.
%% Reject any segment that's `..` — defense against path traversal.
%% Empty segments are already stripped by `roadrunner_router:path_segments/1`.
-spec validate_segments([binary()]) -> ok | traversal.
validate_segments(Segments) ->
case lists:any(fun(S) -> S =:= ~".." end, Segments) of
true -> traversal;
false -> ok
end.
%% Decide whether a symlink leaf may be served under the route's policy.
-spec symlink_allowed(file:filename_all(), roadrunner_req:request()) -> boolean().
symlink_allowed(FilePath, Req) ->
case symlink_policy(Req) of
follow -> true;
refuse -> false;
refuse_escapes -> target_inside_docroot(FilePath, Req)
end.
-spec symlink_policy(roadrunner_req:request()) -> follow | refuse | refuse_escapes.
symlink_policy(Req) ->
case roadrunner_req:state(Req) of
#{symlink_policy := follow} -> follow;
#{symlink_policy := refuse} -> refuse;
_ -> refuse_escapes
end.
%% Resolve the symlink one level and check the result lives under
%% `dir`. Symlinks in intermediate path components are not inspected —
%% the kernel follows those when we eventually open the file. The
%% threat model is "an attacker plants a leaf symlink to escape", which
%% is the common case for upload-able directories.
-spec target_inside_docroot(file:filename_all(), roadrunner_req:request()) -> boolean().
target_inside_docroot(FilePath, Req) ->
%% `serve_file/2` only calls us after `read_link_info` reported
%% `type = symlink`, so `read_link` is expected to succeed —
%% we let a TOCTOU race (symlink removed between the two stats)
%% crash and bubble up as a 500 instead of silently 404'ing.
{ok, Target} = file:read_link(FilePath),
#{dir := Dir} = roadrunner_req:state(Req),
case filename:pathtype(Target) of
relative ->
%% A relative target without any `..` segments must land
%% inside the directory containing the symlink, which by
%% construction is inside `dir`. The framework runs with
%% binary file names (default UTF-8 native encoding), so
%% `filename:split/1` yields binaries and the `~".."`
%% literal matches directly.
not lists:member(~"..", filename:split(Target));
_ ->
%% `filename:absname/1` strips trailing slashes (except for
%% the root `/` itself, which we don't reasonably support
%% as a docroot anyway), so a single appended `/` is enough
%% to make `string:prefix/2` an exact directory check
%% rather than a sibling-prefix false positive. `string:prefix/2`
%% accepts chardata, so neither argument needs flattening.
string:prefix(Target, [filename:absname(Dir), $/]) =/= nomatch
end.
%% Parse an IMF-fixdate header back into a posix timestamp. Returns
%% `error` for any other format (we don't bother with the legacy RFC 850
%% or asctime forms; modern clients all emit IMF-fixdate).
-spec parse_http_date(binary()) -> {ok, integer()} | error.
parse_http_date(<<
_DayName:3/binary,
", ",
D1,
D2,
" ",
Mon:3/binary,
" ",
Y1,
Y2,
Y3,
Y4,
" ",
H1,
H2,
":",
Mi1,
Mi2,
":",
S1,
S2,
" GMT"
>>) ->
try
Day = list_to_integer([D1, D2]),
Year = list_to_integer([Y1, Y2, Y3, Y4]),
Hour = list_to_integer([H1, H2]),
Minute = list_to_integer([Mi1, Mi2]),
Second = list_to_integer([S1, S2]),
Month = month_number(Mon),
DateTime = {{Year, Month, Day}, {Hour, Minute, Second}},
Epoch = calendar:datetime_to_gregorian_seconds({{1970, 1, 1}, {0, 0, 0}}),
{ok, calendar:datetime_to_gregorian_seconds(DateTime) - Epoch}
catch
_:_ -> error
end;
parse_http_date(_) ->
error.
%% `maps:get/2` raises `{badkey, _}` on an unknown month abbreviation;
%% the surrounding try/catch in `parse_http_date/1` turns that into
%% the `error` return, which is what we want for malformed input.
month_number(Mon) ->
maps:get(Mon, #{
~"Jan" => 1,
~"Feb" => 2,
~"Mar" => 3,
~"Apr" => 4,
~"May" => 5,
~"Jun" => 6,
~"Jul" => 7,
~"Aug" => 8,
~"Sep" => 9,
~"Oct" => 10,
~"Nov" => 11,
~"Dec" => 12
}).
%% `-on_load` callback. See `feedback_compile_pattern_convention`.
-spec init_patterns() -> ok.
init_patterns() ->
persistent_term:put(?COMMA_CP_KEY, binary:compile_pattern(~",")),
persistent_term:put(?DASH_CP_KEY, binary:compile_pattern(~"-")),
ok.