-module(packkit@detect).
-compile([no_auto_import, nowarn_unused_vars, nowarn_unused_function, nowarn_nomatch, inline]).
-define(FILEPATH, "src/packkit/detect.gleam").
-export([codec/1, archive/1, recipe/1, extension/1, from_bytes/1, from_filename/1, from_path_or_bytes/2]).
-export_type([detected/0]).
-if(?OTP_RELEASE >= 27).
-define(MODULEDOC(Str), -moduledoc(Str)).
-define(DOC(Str), -doc(Str)).
-else.
-define(MODULEDOC(Str), -compile([])).
-define(DOC(Str), -compile([])).
-endif.
-opaque detected() :: {detected,
gleam@option:option(packkit@codec:codec()),
gleam@option:option(packkit@archive:archive_format()),
gleam@option:option(packkit@recipe:recipe()),
gleam@option:option(binary())}.
-file("src/packkit/detect.gleam", 216).
-spec looks_like_zlib(bitstring()) -> boolean().
looks_like_zlib(Bytes) ->
case Bytes of
<<Cmf, Flg, _/binary>> ->
Cm = erlang:'band'(Cmf, 16#0F),
Cinfo = erlang:'bsr'(Cmf, 4),
((Cm =:= 8) andalso (Cinfo =< 7)) andalso ((((Cmf * 256) + Flg) rem 31)
=:= 0);
_ ->
false
end.
-file("src/packkit/detect.gleam", 227).
-spec has_ustar_magic(bitstring()) -> boolean().
has_ustar_magic(Bytes) ->
case gleam_stdlib:bit_array_slice(Bytes, 257, 5) of
{ok, <<"ustar"/utf8>>} ->
true;
_ ->
false
end.
-file("src/packkit/detect.gleam", 235).
?DOC(" Read the detected codec if one was found.\n").
-spec codec(detected()) -> gleam@option:option(packkit@codec:codec()).
codec(Detected) ->
erlang:element(2, Detected).
-file("src/packkit/detect.gleam", 240).
?DOC(" Read the detected archive family if one was found.\n").
-spec archive(detected()) -> gleam@option:option(packkit@archive:archive_format()).
archive(Detected) ->
erlang:element(3, Detected).
-file("src/packkit/detect.gleam", 245).
?DOC(" Read the detected recipe if one was found.\n").
-spec recipe(detected()) -> gleam@option:option(packkit@recipe:recipe()).
recipe(Detected) ->
erlang:element(4, Detected).
-file("src/packkit/detect.gleam", 250).
?DOC(" Read the matched extension label, if any.\n").
-spec extension(detected()) -> gleam@option:option(binary()).
extension(Detected) ->
erlang:element(5, Detected).
-file("src/packkit/detect.gleam", 254).
-spec detected_recipe(packkit@recipe:recipe(), binary()) -> detected().
detected_recipe(Value, Extension) ->
{detected,
packkit@recipe:outermost_codec(Value),
{some, packkit@recipe:archive_format(Value)},
{some, Value},
{some, Extension}}.
-file("src/packkit/detect.gleam", 266).
-spec detected_archive(packkit@archive:archive_format(), binary()) -> detected().
detected_archive(Value, Extension) ->
{detected, none, {some, Value}, none, {some, Extension}}.
-file("src/packkit/detect.gleam", 278).
-spec detected_codec(packkit@codec:codec(), binary()) -> detected().
detected_codec(Value, Extension) ->
{detected, {some, Value}, none, none, {some, Extension}}.
-file("src/packkit/detect.gleam", 60).
?DOC(
" Filename rules, ordered most-specific first so the first match wins.\n"
" Compound extensions (`.tar.gz`, `.tar.bz2`, …) must precede the\n"
" single extensions (`.gz`, `.bz2`, …) for the obvious reason.\n"
).
-spec filename_rules() -> list({list(binary()), fun(() -> detected())}).
filename_rules() ->
[{[<<".tar.gz"/utf8>>, <<".tgz"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_gzip(), <<"tar.gz"/utf8>>)
end},
{[<<".tar.zlib"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_zlib(), <<"tar.zlib"/utf8>>)
end},
{[<<".tar.lz4"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_lz4(), <<"tar.lz4"/utf8>>)
end},
{[<<".tar.sz"/utf8>>, <<".tar.snappy"/utf8>>],
fun() ->
detected_recipe(
packkit@recipe:tar_snappy(),
<<"tar.snappy"/utf8>>
)
end},
{[<<".tar.bz2"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_bzip2(), <<"tar.bz2"/utf8>>)
end},
{[<<".tar.xz"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_xz(), <<"tar.xz"/utf8>>)
end},
{[<<".tar.zst"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_zstd(), <<"tar.zst"/utf8>>)
end},
{[<<".tar.br"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_brotli(), <<"tar.br"/utf8>>)
end},
{[<<".tar.z"/utf8>>, <<".taz"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:tar_lzw(), <<"tar.Z"/utf8>>)
end},
{[<<".cpio.gz"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:cpio_gzip(), <<"cpio.gz"/utf8>>)
end},
{[<<".cpio.bz2"/utf8>>],
fun() ->
detected_recipe(
packkit@recipe:cpio_bzip2(),
<<"cpio.bz2"/utf8>>
)
end},
{[<<".cpio.xz"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:cpio_xz(), <<"cpio.xz"/utf8>>)
end},
{[<<".cpio.zst"/utf8>>],
fun() ->
detected_recipe(packkit@recipe:cpio_zstd(), <<"cpio.zst"/utf8>>)
end},
{[<<".tar"/utf8>>],
fun() -> detected_archive(packkit@archive:tar(), <<"tar"/utf8>>) end},
{[<<".zip"/utf8>>],
fun() -> detected_archive(packkit@archive:zip(), <<"zip"/utf8>>) end},
{[<<".7z"/utf8>>],
fun() ->
detected_archive(packkit@archive:seven_z(), <<"7z"/utf8>>)
end},
{[<<".cpio"/utf8>>],
fun() ->
detected_archive(packkit@archive:cpio_newc(), <<"cpio"/utf8>>)
end},
{[<<".ar"/utf8>>, <<".a"/utf8>>],
fun() -> detected_archive(packkit@archive:ar(), <<"ar"/utf8>>) end},
{[<<".gz"/utf8>>],
fun() -> detected_codec(packkit@codec:gzip(), <<"gz"/utf8>>) end},
{[<<".zlib"/utf8>>],
fun() -> detected_codec(packkit@codec:zlib(), <<"zlib"/utf8>>) end},
{[<<".deflate"/utf8>>, <<".dfl"/utf8>>],
fun() ->
detected_codec(packkit@codec:deflate(), <<"deflate"/utf8>>)
end},
{[<<".lz4"/utf8>>],
fun() -> detected_codec(packkit@codec:lz4(), <<"lz4"/utf8>>) end},
{[<<".sz"/utf8>>, <<".snappy"/utf8>>],
fun() ->
detected_codec(packkit@codec:snappy(), <<"snappy"/utf8>>)
end},
{[<<".bz2"/utf8>>],
fun() -> detected_codec(packkit@codec:bzip2(), <<"bz2"/utf8>>) end},
{[<<".xz"/utf8>>],
fun() -> detected_codec(packkit@codec:xz(), <<"xz"/utf8>>) end},
{[<<".br"/utf8>>],
fun() -> detected_codec(packkit@codec:brotli(), <<"br"/utf8>>) end},
{[<<".zst"/utf8>>],
fun() -> detected_codec(packkit@codec:zstd(), <<"zst"/utf8>>) end},
{[<<".z"/utf8>>],
fun() -> detected_codec(packkit@codec:lzw(), <<"Z"/utf8>>) end}].
-file("src/packkit/detect.gleam", 164).
?DOC(
" Detect a format from the leading bytes of an input stream.\n"
"\n"
" Signatures are matched as strictly as practical:\n"
"\n"
" * gzip (`1F 8B`) also requires the compression-method byte to be\n"
" `08` (DEFLATE), since RFC 1952 reserves the other values and\n"
" no production gzip stream uses them.\n"
" * zlib (`78 _`) requires CMF.CM == 8 (DEFLATE), CMF.CINFO ≤ 7\n"
" (15-bit window), and `(CMF*256 + FLG) % 31 == 0` per RFC 1950.\n"
" * bzip2 (`BZh`) additionally requires the block-size byte to be\n"
" an ASCII digit `1`..`9`.\n"
" * lz4 (`04 22 4D 18`) and `.Z` (`1F 9D`) keep their fixed magic.\n"
" * zstd skippable frames (magic `184D2A50`..`184D2A5F`) are\n"
" recognised as zstd so wrappers that embed user metadata in\n"
" skippable frames at the start of the stream do not fail to\n"
" detect.\n"
" * snappy framed format starts with a stream identifier chunk\n"
" (`FF 06 00 00 sNaPpY`); the raw snappy block format has no\n"
" magic so it can only be detected from filename.\n"
"\n"
" Looser signatures like a bare `0x78 _` would false-positive on\n"
" any byte stream whose first byte happens to be `0x78`.\n"
).
-spec from_bytes(bitstring()) -> {ok, detected()} |
{error, packkit@error:detect_error()}.
from_bytes(Bytes) ->
case Bytes of
<<16#1F, 16#8B, Cm, _/binary>> when Cm =:= 16#08 ->
{ok, detected_codec(packkit@codec:gzip(), <<"gz"/utf8>>)};
<<16#50, 16#4B, 16#03, 16#04, _/binary>> ->
{ok, detected_archive(packkit@archive:zip(), <<"zip"/utf8>>)};
<<16#50, 16#4B, 16#05, 16#06, _/binary>> ->
{ok, detected_archive(packkit@archive:zip(), <<"zip"/utf8>>)};
<<16#37, 16#7A, 16#BC, 16#AF, 16#27, 16#1C, _/binary>> ->
{ok, detected_archive(packkit@archive:seven_z(), <<"7z"/utf8>>)};
<<16#FD, 16#37, 16#7A, 16#58, 16#5A, 16#00, _/binary>> ->
{ok, detected_codec(packkit@codec:xz(), <<"xz"/utf8>>)};
<<16#28, 16#B5, 16#2F, 16#FD, _/binary>> ->
{ok, detected_codec(packkit@codec:zstd(), <<"zst"/utf8>>)};
<<Low, 16#2A, 16#4D, 16#18, _/binary>> when (Low >= 16#50) andalso (Low =< 16#5F) ->
{ok, detected_codec(packkit@codec:zstd(), <<"zst"/utf8>>)};
<<16#04, 16#22, 16#4D, 16#18, _/binary>> ->
{ok, detected_codec(packkit@codec:lz4(), <<"lz4"/utf8>>)};
<<16#02, 16#21, 16#4C, 16#18, _/binary>> ->
{ok, detected_codec(packkit@codec:lz4(), <<"lz4"/utf8>>)};
<<16#FF, 16#06, 16#00, 16#00, "sNaPpY"/utf8, _/binary>> ->
{ok, detected_codec(packkit@codec:snappy(), <<"snappy"/utf8>>)};
<<16#42, 16#5A, 16#68, Lvl, _/binary>> when (Lvl >= 16#31) andalso (Lvl =< 16#39) ->
{ok, detected_codec(packkit@codec:bzip2(), <<"bz2"/utf8>>)};
<<16#1F, 16#9D, _/binary>> ->
{ok, detected_codec(packkit@codec:lzw(), <<"Z"/utf8>>)};
<<"!<arch>\n"/utf8, _/binary>> ->
{ok, detected_archive(packkit@archive:ar(), <<"ar"/utf8>>)};
<<"070701"/utf8, _/binary>> ->
{ok, detected_archive(packkit@archive:cpio_newc(), <<"cpio"/utf8>>)};
_ ->
case looks_like_zlib(Bytes) of
true ->
{ok, detected_codec(packkit@codec:zlib(), <<"zlib"/utf8>>)};
false ->
case has_ustar_magic(Bytes) of
true ->
{ok,
detected_archive(
packkit@archive:tar(),
<<"tar"/utf8>>
)};
false ->
{error,
{detect_unknown_format,
<<"byte-signature scan"/utf8>>}}
end
end
end.
-file("src/packkit/detect.gleam", 287).
-spec matches_any(binary(), list(binary())) -> boolean().
matches_any(Path, Suffixes) ->
case Suffixes of
[] ->
false;
[Suffix | Rest] ->
gleam_stdlib:string_ends_with(Path, Suffix) orelse matches_any(
Path,
Rest
)
end.
-file("src/packkit/detect.gleam", 128).
-spec find_filename_match(
binary(),
list({list(binary()), fun(() -> detected())})
) -> gleam@option:option(detected()).
find_filename_match(Path, Rules) ->
case Rules of
[] ->
none;
[{Suffixes, Build} | Rest] ->
case matches_any(Path, Suffixes) of
true ->
{some, Build()};
false ->
find_filename_match(Path, Rest)
end
end.
-file("src/packkit/detect.gleam", 23).
?DOC(" Detect a format from a filename or path suffix.\n").
-spec from_filename(binary()) -> {ok, detected()} |
{error, packkit@error:detect_error()}.
from_filename(Path) ->
Lower = string:lowercase(Path),
case find_filename_match(Lower, filename_rules()) of
{some, Detected} ->
{ok, Detected};
none ->
{error, {detect_unknown_format, Path}}
end.
-file("src/packkit/detect.gleam", 42).
?DOC(
" Try filename detection first, then fall back to magic-byte\n"
" detection on the supplied content. Mirrors the resolution order\n"
" most CLI tools use: a meaningful extension is a strong signal, but\n"
" when the path is uninformative (`-`, `/dev/stdin`, an arbitrary\n"
" upload, etc.) the file's first bytes still pin the format.\n"
"\n"
" On total failure (neither the filename nor the magic bytes\n"
" classified the input) the returned `DetectUnknownFormat` carries\n"
" the original caller-supplied `path` in its `input` field — never\n"
" the internal `\"byte-signature scan\"` sentinel — so the message\n"
" stays specific to the user's input.\n"
).
-spec from_path_or_bytes(binary(), bitstring()) -> {ok, detected()} |
{error, packkit@error:detect_error()}.
from_path_or_bytes(Path, Bytes) ->
_pipe = from_filename(Path),
_pipe@1 = gleam@result:'or'(_pipe, from_bytes(Bytes)),
gleam@result:'or'(_pipe@1, {error, {detect_unknown_format, Path}}).