//// LZ4 frame format codec.
////
//// Decodes LZ4 frames (magic `0x184D2204`) as specified in the LZ4
//// Frame Format Description. The encoder runs a greedy 4-byte hash-
//// chain match-finder over each block and emits the LZ77 sequences
//// in the canonical block layout (token byte + optional length
//// extensions + literals + 16-bit little-endian offset + optional
//// match-length extensions). Blocks that don't shrink are emitted
//// in the uncompressed form to guarantee the frame never grows
//// beyond `1 + ceil(input_size / block_max) * (4 + block_max)`.
import gleam/bit_array
import gleam/bool
import gleam/dict
import gleam/int
import gleam/list
import gleam/result
import packkit/codec as codecs
import packkit/error
import packkit/internal/xxh32
import packkit/limit
const magic: Int = 0x184D2204
/// LZ4 legacy frame format magic (`02 21 4C 18` little-endian).
/// The legacy format predates RFC-style LZ4 frames and is still
/// emitted by older `lz4 -l` / `lz4c` tools and embedded systems.
/// It has no frame descriptor, no checksums, no content size — just
/// a magic followed by a sequence of `<size_LE32, body>` blocks
/// terminated at EOF.
const legacy_magic: Int = 0x184C2102
/// LZ4 legacy blocks have an 8 MiB block-size cap per the spec
/// (`LZ4_LEGACY_BLOCKSIZE`). Going past this would either be an
/// adversarial header or a new concatenated frame, so we use it
/// as an early-exit check during decoding.
const legacy_max_block_size: Int = 8_388_608
const flg_version_mask: Int = 0xC0
const flg_version_v1: Int = 0x40
const flg_block_checksum: Int = 0x10
const flg_content_size: Int = 0x08
const flg_content_checksum: Int = 0x04
const flg_dict_id: Int = 0x01
const uncompressed_block_bit: Int = 0x80000000
const u32_mask: Int = 0xFFFFFFFF
const default_block_max: Int = 4_194_304
/// LZ4 frame codec smart constructor.
pub fn codec() -> codecs.Codec {
codecs.lz4()
}
/// Encode `bytes` as an LZ4 frame. The frame descriptor uses
/// independent blocks, the v1 frame version, and a 4 MiB block
/// maximum; no content size, block checksum, content checksum, or
/// dictionary id is written.
pub fn encode(bytes bytes: BitArray) -> Result(BitArray, error.CodecError) {
encode_internal(bytes, content_size_present: False)
}
/// Encode `bytes` as an LZ4 frame and store the uncompressed
/// content size in the frame descriptor. Strict LZ4 decoders use
/// the value to pre-allocate the output buffer and reject any
/// frame whose payload disagrees with the declared length; our own
/// decoder simply skips the field today, so encoding it does not
/// change `encode -> decode` round trips.
pub fn encode_with_content_size(
bytes bytes: BitArray,
) -> Result(BitArray, error.CodecError) {
encode_internal(bytes, content_size_present: True)
}
fn encode_internal(
bytes: BitArray,
content_size_present content_size_present: Bool,
) -> Result(BitArray, error.CodecError) {
let flg = case content_size_present {
True -> 0x60 + flg_content_size
False -> 0x60
}
let bd = 0x70
let descriptor = case content_size_present {
True -> <<flg, bd, bit_array.byte_size(bytes):size(64)-little>>
False -> <<flg, bd>>
}
let hc =
int.bitwise_and(
int.bitwise_shift_right(xxh32.digest(bytes: descriptor, seed: 0), 8),
0xFF,
)
let header = bit_array.concat([<<magic:size(32)-little>>, descriptor, <<hc>>])
let blocks = encode_blocks(bytes, [])
let end_mark = <<0:size(32)-little>>
Ok(bit_array.concat([header, blocks, end_mark]))
}
/// Decode an LZ4 frame using the default resource limits.
pub fn decode(bytes bytes: BitArray) -> Result(BitArray, error.CodecError) {
decode_with_limits(bytes: bytes, limits: limit.default())
}
/// Decode an LZ4 frame using explicit resource limits.
pub fn decode_with_limits(
bytes bytes: BitArray,
limits limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
use <- bool.guard(
when: bit_array.byte_size(bytes) > limit.max_input_bytes(limits),
return: Error(error.CodecLimitExceeded(
limit: "max_input_bytes",
actual: bit_array.byte_size(bytes),
)),
)
case bytes {
<<m:size(32)-little, _rest:bytes>> if m == legacy_magic -> {
let assert Ok(after_magic) =
bit_array.slice(bytes, 4, bit_array.byte_size(bytes) - 4)
decode_legacy_blocks(after_magic, <<>>, limits)
}
<<m:size(32)-little, flg, bd, rest:bytes>> -> {
use <- bool.guard(
when: m != magic,
return: Error(error.CodecInvalidData(message: "lz4: bad frame magic")),
)
use <- bool.guard(
when: int.bitwise_and(flg, flg_version_mask) != flg_version_v1,
return: Error(error.CodecInvalidData(
message: "lz4: unsupported frame version",
)),
)
let block_checksum = int.bitwise_and(flg, flg_block_checksum) != 0
let content_size_present = int.bitwise_and(flg, flg_content_size) != 0
let content_checksum = int.bitwise_and(flg, flg_content_checksum) != 0
let dict_id_present = int.bitwise_and(flg, flg_dict_id) != 0
let bd_block_max_idx = int.bitwise_and(int.bitwise_shift_right(bd, 4), 7)
let block_max = block_max_bytes(bd_block_max_idx)
use rest <- result.try(maybe_skip(
rest,
content_size_present,
8,
"lz4: content size truncated",
))
use rest <- result.try(maybe_skip(
rest,
dict_id_present,
4,
"lz4: dictionary id truncated",
))
use rest <- result.try(maybe_skip(
rest,
True,
1,
"lz4: header checksum missing",
))
decode_blocks(
rest,
<<>>,
block_checksum,
content_checksum,
block_max,
limits,
)
}
_ -> Error(error.CodecInvalidData(message: "lz4: frame header truncated"))
}
}
fn encode_blocks(remaining: BitArray, acc: List(BitArray)) -> BitArray {
let total = bit_array.byte_size(remaining)
case total {
0 -> bit_array.concat(list.reverse(acc))
_ -> {
let chunk_size = case total > default_block_max {
True -> default_block_max
False -> total
}
let assert Ok(chunk) = bit_array.slice(remaining, 0, chunk_size)
let assert Ok(after) =
bit_array.slice(remaining, chunk_size, total - chunk_size)
// Try real LZ77 compression first; fall back to an uncompressed
// block when the result doesn't shrink (or matches the input
// byte-for-byte) so the frame is never larger than the original
// payload + framing overhead.
let compressed = compress_block(chunk, chunk_size)
let compressed_size = bit_array.byte_size(compressed)
let framed = case compressed_size < chunk_size {
True -> <<compressed_size:size(32)-little, compressed:bits>>
False -> <<
int.bitwise_or(uncompressed_block_bit, chunk_size):size(32)-little,
chunk:bits,
>>
}
encode_blocks(after, [framed, ..acc])
}
}
}
// -- LZ77 block compressor ---------------------------------------------
//
// The LZ4 block format (Yann Collet's spec) encodes a sequence of
// `(token | optional literal-length extensions | literals | 16-bit
// little-endian offset | optional match-length extensions)` records,
// with the final record carrying only literals. Two parsing rules
// the reference decoder enforces:
//
// 1. The last 5 bytes of input are always literals.
// 2. The last match must start at least 12 bytes before the end of
// block.
//
// The encoder runs a greedy 4-byte hash-chain match-finder honouring
// both rules. Min match length is 4; the stored match length is the
// actual length minus 4 (so the low nibble of the token covers
// matches of length 4..18 inline).
const lz4_min_match: Int = 4
const lz4_max_distance: Int = 65_535
const lz4_last_literals: Int = 5
const lz4_safety_margin: Int = 12
fn compress_block(bytes: BitArray, size: Int) -> BitArray {
case size <= lz4_safety_margin {
True -> emit_literal_only_block(bytes, 0, size)
False -> {
let table = build_byte_table(bytes, 0, dict.new())
compress_loop(table, size, 0, 0, dict.new(), [])
}
}
}
fn compress_loop(
table: dict.Dict(Int, Int),
size: Int,
pos: Int,
last_lit_start: Int,
hashes: dict.Dict(Int, Int),
acc: List(BitArray),
) -> BitArray {
let last_search = size - lz4_safety_margin
case pos > last_search {
True -> {
// Tail: emit the remaining bytes as a literal-only sequence.
let lit_len = size - last_lit_start
let tail = emit_literal_only_chunk(table, last_lit_start, lit_len)
bit_array.concat(list.reverse([tail, ..acc]))
}
False -> step_compress(table, size, pos, last_lit_start, hashes, acc)
}
}
fn step_compress(
table: dict.Dict(Int, Int),
size: Int,
pos: Int,
last_lit_start: Int,
hashes: dict.Dict(Int, Int),
acc: List(BitArray),
) -> BitArray {
let key =
hash4(
byte_at(table, pos),
byte_at(table, pos + 1),
byte_at(table, pos + 2),
byte_at(table, pos + 3),
)
case dict.get(hashes, key) {
Ok(prev) -> {
let distance = pos - prev
let valid =
distance >= 1
&& distance <= lz4_max_distance
&& bytes4_equal(table, prev, pos)
case valid {
False ->
compress_loop(
table,
size,
pos + 1,
last_lit_start,
dict.insert(hashes, key, pos),
acc,
)
True -> {
// The last `lz4_last_literals` bytes of the block can't be
// consumed by a match, so cap the search horizon to keep
// those bytes for the trailing literal sequence.
let search_end = size - lz4_last_literals
let match_len = lz4_match_length(table, prev, pos, search_end, 0)
case match_len < lz4_min_match {
True ->
compress_loop(
table,
size,
pos + 1,
last_lit_start,
dict.insert(hashes, key, pos),
acc,
)
False -> {
let lit_len = pos - last_lit_start
let seq =
emit_sequence(
table,
last_lit_start,
lit_len,
distance,
match_len,
)
let next_pos = pos + match_len
let new_hashes =
lz4_insert_hashes(
table,
dict.insert(hashes, key, pos),
pos + 1,
next_pos - 1,
size,
)
compress_loop(table, size, next_pos, next_pos, new_hashes, [
seq,
..acc
])
}
}
}
}
}
_ ->
compress_loop(
table,
size,
pos + 1,
last_lit_start,
dict.insert(hashes, key, pos),
acc,
)
}
}
fn build_byte_table(
bytes: BitArray,
index: Int,
acc: dict.Dict(Int, Int),
) -> dict.Dict(Int, Int) {
case bytes {
<<b, rest:bytes>> ->
build_byte_table(rest, index + 1, dict.insert(acc, index, b))
_ -> acc
}
}
fn byte_at(table: dict.Dict(Int, Int), index: Int) -> Int {
case dict.get(table, index) {
Ok(b) -> b
_ -> 0
}
}
fn hash4(b0: Int, b1: Int, b2: Int, b3: Int) -> Int {
// Mix the four bytes through the Fibonacci multiplier and truncate
// to 16 bits. The exact hash isn't part of the LZ4 wire format —
// any hash that produces good 4-byte uniqueness suffices.
let combined =
int.bitwise_or(
b0,
int.bitwise_or(
int.bitwise_shift_left(b1, 8),
int.bitwise_or(
int.bitwise_shift_left(b2, 16),
int.bitwise_shift_left(b3, 24),
),
),
)
int.bitwise_and(combined * 2_654_435_761, 0xFFFF)
}
fn bytes4_equal(table: dict.Dict(Int, Int), p1: Int, p2: Int) -> Bool {
byte_at(table, p1) == byte_at(table, p2)
&& byte_at(table, p1 + 1) == byte_at(table, p2 + 1)
&& byte_at(table, p1 + 2) == byte_at(table, p2 + 2)
&& byte_at(table, p1 + 3) == byte_at(table, p2 + 3)
}
fn lz4_match_length(
table: dict.Dict(Int, Int),
base: Int,
cursor: Int,
limit_pos: Int,
acc: Int,
) -> Int {
case cursor + acc >= limit_pos {
True -> acc
False ->
case byte_at(table, base + acc) == byte_at(table, cursor + acc) {
True -> lz4_match_length(table, base, cursor, limit_pos, acc + 1)
False -> acc
}
}
}
fn lz4_insert_hashes(
table: dict.Dict(Int, Int),
hashes: dict.Dict(Int, Int),
from: Int,
to: Int,
size: Int,
) -> dict.Dict(Int, Int) {
case from > to || from + lz4_min_match > size {
True -> hashes
False -> {
let key =
hash4(
byte_at(table, from),
byte_at(table, from + 1),
byte_at(table, from + 2),
byte_at(table, from + 3),
)
lz4_insert_hashes(
table,
dict.insert(hashes, key, from),
from + 1,
to,
size,
)
}
}
}
fn emit_sequence(
table: dict.Dict(Int, Int),
lit_start: Int,
lit_len: Int,
distance: Int,
match_len: Int,
) -> BitArray {
let ml_stored = match_len - lz4_min_match
let lit_high = case lit_len >= 15 {
True -> 15
False -> lit_len
}
let ml_high = case ml_stored >= 15 {
True -> 15
False -> ml_stored
}
let token = int.bitwise_or(int.bitwise_shift_left(lit_high, 4), ml_high)
let lit_ext = case lit_len >= 15 {
True -> encode_length_extension(lit_len - 15)
False -> <<>>
}
let literals = collect_bytes(table, lit_start, lit_len, <<>>)
let offset = <<distance:size(16)-little>>
let ml_ext = case ml_stored >= 15 {
True -> encode_length_extension(ml_stored - 15)
False -> <<>>
}
bit_array.concat([<<token>>, lit_ext, literals, offset, ml_ext])
}
fn emit_literal_only_block(bytes: BitArray, start: Int, size: Int) -> BitArray {
let table = build_byte_table(bytes, 0, dict.new())
emit_literal_only_chunk(table, start, size)
}
fn emit_literal_only_chunk(
table: dict.Dict(Int, Int),
start: Int,
lit_len: Int,
) -> BitArray {
let lit_high = case lit_len >= 15 {
True -> 15
False -> lit_len
}
let token = int.bitwise_shift_left(lit_high, 4)
let lit_ext = case lit_len >= 15 {
True -> encode_length_extension(lit_len - 15)
False -> <<>>
}
let literals = collect_bytes(table, start, lit_len, <<>>)
bit_array.concat([<<token>>, lit_ext, literals])
}
fn encode_length_extension(n: Int) -> BitArray {
case n {
n if n < 255 -> <<n>>
_ -> bit_array.concat([<<0xFF>>, encode_length_extension(n - 255)])
}
}
fn collect_bytes(
table: dict.Dict(Int, Int),
start: Int,
count: Int,
acc: BitArray,
) -> BitArray {
case count {
0 -> acc
_ ->
collect_bytes(table, start + 1, count - 1, <<
acc:bits,
byte_at(table, start),
>>)
}
}
// Legacy-frame block driver. Same Step/Continue trampoline as the
// modern `decode_blocks` so a multi-MB `.lz4` legacy stream decoded on
// the JS target does not exhaust the call stack.
fn decode_legacy_blocks(
bytes: BitArray,
output: BitArray,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
case decode_legacy_blocks_step(bytes, output, limits) {
Error(err) -> Error(err)
Ok(LegacyBlocksDone(out)) -> Ok(out)
Ok(LegacyBlocksContinue(next_bytes, next_output)) ->
decode_legacy_blocks(next_bytes, next_output, limits)
}
}
type LegacyBlocksStep {
LegacyBlocksDone(output: BitArray)
LegacyBlocksContinue(bytes: BitArray, output: BitArray)
}
fn decode_legacy_blocks_step(
bytes: BitArray,
output: BitArray,
limits: limit.Limits,
) -> Result(LegacyBlocksStep, error.CodecError) {
case bytes {
// Stream ends at EOF — there is no terminator block in the
// legacy format.
<<>> -> Ok(LegacyBlocksDone(output))
<<block_size:size(32)-little, _rest:bytes>> if block_size == 0 ->
// Some implementations write an explicit terminator block;
// accept it for robustness.
Ok(LegacyBlocksDone(output))
<<block_size:size(32)-little, _rest:bytes>>
if block_size > legacy_max_block_size
->
// Per the legacy spec, a "block size" past 8 MiB indicates
// either a new concatenated frame magic or junk — stop
// decoding cleanly rather than allocating a huge slice.
Ok(LegacyBlocksDone(output))
<<block_size:size(32)-little, rest:bytes>> -> {
case bit_array.byte_size(rest) < block_size {
True ->
Error(error.CodecInvalidData(
message: "lz4 legacy: block payload truncated",
))
False -> {
let assert Ok(block) = bit_array.slice(rest, 0, block_size)
let assert Ok(after_block) =
bit_array.slice(
rest,
block_size,
bit_array.byte_size(rest) - block_size,
)
use new_output <- result.try(decode_block(block, output, limits))
Ok(LegacyBlocksContinue(after_block, new_output))
}
}
}
_ ->
Error(error.CodecInvalidData(
message: "lz4 legacy: block header truncated",
))
}
}
// Frame-level driver: one iteration consumes a single block. Same
// Step/Continue trampoline shape as `decode_block_loop` so the JS
// target stays in constant stack for frames that ship many blocks
// (e.g. a 64 MiB payload at the default 64 KiB block size hits 1024
// blocks, well past the JS recursion budget without the trampoline).
fn decode_blocks(
bytes: BitArray,
output: BitArray,
block_checksum: Bool,
content_checksum: Bool,
block_max: Int,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
case
decode_blocks_step(
bytes,
output,
block_checksum,
content_checksum,
block_max,
limits,
)
{
Error(err) -> Error(err)
Ok(BlocksDone(out)) -> Ok(out)
Ok(BlocksContinue(next_bytes, next_output)) ->
decode_blocks(
next_bytes,
next_output,
block_checksum,
content_checksum,
block_max,
limits,
)
}
}
type BlocksStep {
BlocksDone(output: BitArray)
BlocksContinue(bytes: BitArray, output: BitArray)
}
fn decode_blocks_step(
bytes: BitArray,
output: BitArray,
block_checksum: Bool,
content_checksum: Bool,
block_max: Int,
limits: limit.Limits,
) -> Result(BlocksStep, error.CodecError) {
case bytes {
<<0:size(32)-little, rest:bytes>> ->
case finalize(rest, output, content_checksum) {
Ok(out) -> Ok(BlocksDone(out))
Error(err) -> Error(err)
}
<<header:size(32)-little, rest:bytes>> -> {
let uncompressed = int.bitwise_and(header, uncompressed_block_bit) != 0
let block_size =
int.bitwise_and(
header,
int.bitwise_exclusive_or(u32_mask, uncompressed_block_bit),
)
use <- bool.guard(
when: block_size > block_max,
return: Error(error.CodecInvalidData(
message: "lz4: block size exceeds frame max block size",
)),
)
case bit_array.byte_size(rest) < block_size {
True ->
Error(error.CodecInvalidData(message: "lz4: block payload truncated"))
False -> {
let assert Ok(block) = bit_array.slice(rest, 0, block_size)
let assert Ok(after_block) =
bit_array.slice(
rest,
block_size,
bit_array.byte_size(rest) - block_size,
)
use after_block <- result.try(case block_checksum {
True ->
case bit_array.byte_size(after_block) < 4 {
True ->
Error(error.CodecInvalidData(
message: "lz4: block checksum missing",
))
False -> {
let assert Ok(after) =
bit_array.slice(
after_block,
4,
bit_array.byte_size(after_block) - 4,
)
Ok(after)
}
}
False -> Ok(after_block)
})
use new_output <- result.try(case uncompressed {
True -> append_with_limit(output, block, limits)
False -> decode_block(block, output, limits)
})
Ok(BlocksContinue(after_block, new_output))
}
}
}
_ -> Error(error.CodecInvalidData(message: "lz4: block header truncated"))
}
}
fn finalize(
after_end: BitArray,
output: BitArray,
content_checksum: Bool,
) -> Result(BitArray, error.CodecError) {
case content_checksum {
False -> Ok(output)
True ->
case bit_array.byte_size(after_end) >= 4 {
True -> Ok(output)
False ->
Error(error.CodecInvalidData(message: "lz4: content checksum missing"))
}
}
}
fn decode_block(
block: BitArray,
output: BitArray,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
case bit_array.byte_size(block) {
0 -> Ok(output)
_ -> decode_block_loop(block, output, limits)
}
}
// Token-level driver for one LZ4 block. Wrapped in a Step/Continue
// trampoline (`decode_block_step` / `BlockStep`) because every
// `use ... <- result.try(...)` desugars to a closure on the JS target —
// Gleam's JS backend only rewrites a self-tail-call to a `while` when
// the recursive call sits at the function body's true tail position.
// Without the trampoline, a block carrying more than ~1500 LZ77
// sequences blows up the JS stack (`RangeError: Maximum call stack size
// exceeded`), mirroring the deflate/bzip2 fix in commit 2adc249.
fn decode_block_loop(
block: BitArray,
output: BitArray,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
case decode_block_step(block, output, limits) {
Error(err) -> Error(err)
Ok(BlockDone(out)) -> Ok(out)
Ok(BlockContinue(next_block, next_output)) ->
decode_block_loop(next_block, next_output, limits)
}
}
type BlockStep {
BlockDone(output: BitArray)
BlockContinue(block: BitArray, output: BitArray)
}
fn decode_block_step(
block: BitArray,
output: BitArray,
limits: limit.Limits,
) -> Result(BlockStep, error.CodecError) {
case block {
<<token, rest:bytes>> -> {
let lit_len_base = int.bitwise_shift_right(token, 4)
let match_len_base = int.bitwise_and(token, 0x0F)
use #(lit_len, rest) <- result.try(case lit_len_base {
15 -> read_extension(rest, 15)
_ -> Ok(#(lit_len_base, rest))
})
case bit_array.byte_size(rest) < lit_len {
True ->
Error(error.CodecInvalidData(
message: "lz4: literal length exceeds block payload",
))
False -> {
let assert Ok(literals) = bit_array.slice(rest, 0, lit_len)
let assert Ok(after_literals) =
bit_array.slice(rest, lit_len, bit_array.byte_size(rest) - lit_len)
use output <- result.try(append_with_limit(output, literals, limits))
case bit_array.byte_size(after_literals) {
0 -> Ok(BlockDone(output))
_ ->
case after_literals {
<<offset:size(16)-little, after_offset:bytes>> -> {
use <- bool.guard(
when: offset == 0,
return: Error(error.CodecInvalidData(
message: "lz4: zero match offset",
)),
)
use #(match_len_extra, after_offset) <- result.try(
case match_len_base {
15 -> read_extension(after_offset, 15)
_ -> Ok(#(match_len_base, after_offset))
},
)
let match_len = match_len_extra + 4
use <- bool.guard(
when: offset > bit_array.byte_size(output),
return: Error(error.CodecInvalidData(
message: "lz4: match offset exceeds output",
)),
)
use output <- result.try(copy_match(
output,
offset,
match_len,
limits,
))
Ok(BlockContinue(after_offset, output))
}
_ ->
Error(error.CodecInvalidData(
message: "lz4: match offset truncated",
))
}
}
}
}
}
<<>> -> Ok(BlockDone(output))
_ -> Error(error.CodecInvalidData(message: "lz4: malformed block"))
}
}
fn read_extension(
bytes: BitArray,
acc: Int,
) -> Result(#(Int, BitArray), error.CodecError) {
case bytes {
<<b, rest:bytes>> ->
case b {
0xFF -> read_extension(rest, acc + 0xFF)
_ -> Ok(#(acc + b, rest))
}
_ ->
Error(error.CodecInvalidData(message: "lz4: length extension truncated"))
}
}
fn copy_match(
output: BitArray,
offset: Int,
length: Int,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
case length {
0 -> Ok(output)
_ -> {
let size = bit_array.byte_size(output)
case offset >= length {
True -> {
let assert Ok(chunk) = bit_array.slice(output, size - offset, length)
append_with_limit(output, chunk, limits)
}
False -> copy_match_byte_by_byte(output, offset, length, limits)
}
}
}
}
// Byte-by-byte overlapping-match copy. This is the hot path for any
// run-length-encoded payload (`a × N` compresses to a single token
// with `offset = 1`), and the loop iterates once per output byte —
// 15 KiB of repeated input is ~15 000 iterations. Without the
// trampoline, that's 15 000 JS stack frames.
fn copy_match_byte_by_byte(
output: BitArray,
offset: Int,
length: Int,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
case copy_match_byte_step(output, offset, length, limits) {
Error(err) -> Error(err)
Ok(CopyDone(out)) -> Ok(out)
Ok(CopyContinue(new_output, new_length)) ->
copy_match_byte_by_byte(new_output, offset, new_length, limits)
}
}
type CopyStep {
CopyDone(output: BitArray)
CopyContinue(output: BitArray, length: Int)
}
fn copy_match_byte_step(
output: BitArray,
offset: Int,
length: Int,
limits: limit.Limits,
) -> Result(CopyStep, error.CodecError) {
case length {
0 -> Ok(CopyDone(output))
_ -> {
let size = bit_array.byte_size(output)
let assert Ok(byte_slice) = bit_array.slice(output, size - offset, 1)
use new_output <- result.try(append_with_limit(output, byte_slice, limits))
Ok(CopyContinue(new_output, length - 1))
}
}
}
fn append_with_limit(
output: BitArray,
chunk: BitArray,
limits: limit.Limits,
) -> Result(BitArray, error.CodecError) {
let projected = bit_array.byte_size(output) + bit_array.byte_size(chunk)
case projected > limit.max_output_bytes(limits) {
True ->
Error(error.CodecLimitExceeded(
limit: "max_output_bytes",
actual: projected,
))
False -> Ok(bit_array.concat([output, chunk]))
}
}
fn maybe_skip(
bytes: BitArray,
active: Bool,
count: Int,
message: String,
) -> Result(BitArray, error.CodecError) {
case active {
False -> Ok(bytes)
True ->
case bit_array.byte_size(bytes) < count {
True -> Error(error.CodecInvalidData(message: message))
False -> {
let assert Ok(after) =
bit_array.slice(bytes, count, bit_array.byte_size(bytes) - count)
Ok(after)
}
}
}
}
fn block_max_bytes(index: Int) -> Int {
case index {
4 -> 64_000
5 -> 256_000
6 -> 1_000_000
7 -> 4_000_000
_ -> 4_000_000
}
}