defmodule Z64 do
@moduledoc """
Z64 is a high-performance Elixir library for decoding ZIP files,
specifically supporting PKWARE's Deflate64 (Compression Method 9) decompression,
which is not natively supported by OTP's `:zip` module.
"""
@doc """
Lists all file entries inside the given ZIP archive.
Accepts either a file path or a binary containing ZIP data.
Returns `{:ok, [%Z64.Entry{filename: "file.txt", compressed_size: 100, uncompressed_size: 200, compression_method: 9, ...}]}`
or `{:error, reason}`.
"""
@spec list_entries(String.t() | binary()) :: {:ok, [Z64.Entry.t()]} | {:error, term()}
def list_entries(zip_binary_or_path) do
with {:ok, binary} <- get_zip_binary(zip_binary_or_path),
{:ok, eocd} <- parse_eocd(binary) do
cd_offset = eocd.cd_offset
cd_size = eocd.cd_size
case binary do
<<_::binary-size(^cd_offset), cd_binary::binary-size(^cd_size), _::binary>> ->
entries = parse_central_directory(cd_binary, [])
{:ok, entries}
_ ->
{:error, :corrupted_central_directory}
end
end
end
@doc """
Extracts and decompresses a specific file by its filename from the ZIP archive.
Accepts either a file path or a binary containing ZIP data.
Returns `{:ok, decompressed_binary}` or `{:error, reason}`.
"""
@spec extract_entry(String.t() | binary(), String.t()) :: {:ok, binary()} | {:error, term()}
def extract_entry(zip_binary_or_path, filename) do
with {:ok, binary} <- get_zip_binary(zip_binary_or_path),
{:ok, entries} <- list_entries(binary) do
case Enum.find(entries, &(&1.filename == filename)) do
nil ->
{:error, :file_not_found}
entry ->
with {:ok, compressed_data} <- extract_data(binary, entry),
{:ok, decompressed} <-
decompress_entry(
compressed_data,
entry.compression_method,
entry.uncompressed_size
) do
# Verify CRC-32
if :erlang.crc32(decompressed) == entry.crc32 do
{:ok, decompressed}
else
{:error, :crc_mismatch}
end
end
end
end
end
@doc """
Unzips all file entries from the ZIP archive and writes them into `output_dir`.
Accepts either a file path or a binary containing ZIP data.
Returns `:ok` or `{:error, reason}`.
"""
@spec unzip(String.t() | binary(), String.t()) :: :ok | {:error, term()}
def unzip(zip_binary_or_path, output_dir) do
with {:ok, binary} <- get_zip_binary(zip_binary_or_path),
{:ok, entries} <- list_entries(binary) do
# Create output directory
File.mkdir_p!(output_dir)
results =
Enum.map(entries, fn entry ->
# Ensure we only extract files (skip directory entries which usually end with '/')
if String.ends_with?(entry.filename, "/") do
target_dir = Path.join(output_dir, entry.filename)
File.mkdir_p!(target_dir)
:ok
else
case extract_entry(binary, entry.filename) do
{:ok, decompressed} ->
target_path = Path.join(output_dir, entry.filename)
# Ensure parent directory exists
Path.dirname(target_path) |> File.mkdir_p!()
File.write!(target_path, decompressed)
:ok
error ->
error
end
end
end)
case Enum.find(results, &match?({:error, _}, &1)) do
nil -> :ok
error -> error
end
end
end
# Helper to resolve binary or load path
defp get_zip_binary(binary) when is_binary(binary) do
# Try to determine if it's a file path or raw binary.
# ZIP signature is normally 50 4B 03 04 or 50 4B 05 06 (EOCD).
# If it starts with 50 4B, or matches a typical ZIP structure, we treat it as binary.
# Otherwise, if it is a valid file path, we read the file.
if String.starts_with?(binary, "PK") or not File.exists?(binary) do
{:ok, binary}
else
File.read(binary)
end
end
# Locates the End of Central Directory (EOCD) record
defp parse_eocd(binary) do
size = byte_size(binary)
# The minimum size of EOCD is 22 bytes. Archive comments can be up to 65,535 bytes.
max_scan = min(size, 65535 + 22)
scan_start = size - max_scan
case scan_backward(binary, size - 22, scan_start) do
nil ->
{:error, :invalid_zip_archive}
rest ->
case rest do
<<
_disk_no::little-16,
_cd_disk::little-16,
_num_entries_this_disk::little-16,
num_entries::little-16,
cd_size::little-32,
cd_offset::little-32,
comment_len::little-16,
comment::binary-size(comment_len),
_::binary
>> ->
{:ok,
%{
num_entries: num_entries,
cd_size: cd_size,
cd_offset: cd_offset,
comment: comment
}}
_ ->
{:error, :invalid_eocd}
end
end
end
defp scan_backward(_binary, current, limit) when current < limit, do: nil
defp scan_backward(binary, current, limit) do
case binary do
<<_::binary-size(^current), 0x50, 0x4B, 0x05, 0x06, rest::binary>> ->
rest
_ ->
scan_backward(binary, current - 1, limit)
end
end
# Parses the Central Directory record sequence
defp parse_central_directory(<<>>, acc), do: Enum.reverse(acc)
defp parse_central_directory(<<0x50, 0x4B, 0x01, 0x02, rest::binary>>, acc) do
case rest do
<<
version_made::little-16,
version_needed::little-16,
flags::little-16,
compression_method::little-16,
last_mod_time::little-16,
last_mod_date::little-16,
crc32::little-32,
compressed_size::little-32,
uncompressed_size::little-32,
filename_len::little-16,
extra_len::little-16,
comment_len::little-16,
_disk_start::little-16,
_internal_attr::little-16,
_external_attr::little-32,
local_header_offset::little-32,
filename::binary-size(filename_len),
_extra::binary-size(extra_len),
_comment::binary-size(comment_len),
next::binary
>> ->
entry = %Z64.Entry{
filename: filename,
version_made: version_made,
version_needed: version_needed,
flags: flags,
compression_method: compression_method,
last_mod_time: last_mod_time,
last_mod_date: last_mod_date,
crc32: crc32,
compressed_size: compressed_size,
uncompressed_size: uncompressed_size,
local_header_offset: local_header_offset
}
parse_central_directory(next, [entry | acc])
_ ->
Enum.reverse(acc)
end
end
defp parse_central_directory(_, acc), do: Enum.reverse(acc)
# Extracts the raw compressed data from the local file header
defp extract_data(binary, %{local_header_offset: offset, compressed_size: comp_size}) do
case binary do
<<_::binary-size(^offset), 0x50, 0x4B, 0x03, 0x04, rest::binary>> ->
case rest do
<<
_version_needed::little-16,
_flags::little-16,
_compression_method::little-16,
_last_mod_time::little-16,
_last_mod_date::little-16,
_crc32::little-32,
_compressed_size::little-32,
_uncompressed_size::little-32,
filename_len::little-16,
extra_len::little-16,
rest_after_header::binary
>> ->
skip_bytes = filename_len + extra_len
case rest_after_header do
<<_::binary-size(^skip_bytes), compressed_data::binary-size(^comp_size), _::binary>> ->
{:ok, compressed_data}
_ ->
{:error, :invalid_local_file_data}
end
_ ->
{:error, :invalid_local_file_header}
end
_ ->
{:error, :local_file_header_not_found}
end
end
# Decompresses the data according to the method
defp decompress_entry(data, 0, _uncompressed_size), do: {:ok, data}
defp decompress_entry(data, 8, _uncompressed_size) do
z = :zlib.open()
try do
:zlib.inflateInit(z, -15)
decompressed = :zlib.inflate(z, data)
:zlib.inflateEnd(z)
{:ok, IO.iodata_to_binary(decompressed)}
catch
_, _ ->
{:error, :inflate_failed}
after
:zlib.close(z)
end
end
defp decompress_entry(data, 9, uncompressed_size) do
try do
{:ok, Z64.NIF.decompress(data, uncompressed_size)}
catch
:error, reason ->
{:error, {:deflate64_failed, reason}}
end
end
defp decompress_entry(_data, method, _uncompressed_size) do
{:error, {:unsupported_compression_method, method}}
end
end