Skip to main content

lib/z64.ex

defmodule Z64 do
  @moduledoc """
  Z64 is a high-performance Elixir library for decoding ZIP files,
  specifically supporting PKWARE's Deflate64 (Compression Method 9) decompression,
  which is not natively supported by OTP's `:zip` module.
  """

  @doc """
  Lists all file entries inside the given ZIP archive.
  Accepts either a file path or a binary containing ZIP data.

  Returns `{:ok, [%Z64.Entry{filename: "file.txt", compressed_size: 100, uncompressed_size: 200, compression_method: 9, ...}]}`
  or `{:error, reason}`.
  """
  @spec list_entries(String.t() | binary()) :: {:ok, [Z64.Entry.t()]} | {:error, term()}
  def list_entries(zip_binary_or_path) do
    with {:ok, binary} <- get_zip_binary(zip_binary_or_path),
         {:ok, eocd} <- parse_eocd(binary) do
      cd_offset = eocd.cd_offset
      cd_size = eocd.cd_size

      case binary do
        <<_::binary-size(^cd_offset), cd_binary::binary-size(^cd_size), _::binary>> ->
          entries = parse_central_directory(cd_binary, [])
          {:ok, entries}

        _ ->
          {:error, :corrupted_central_directory}
      end
    end
  end

  @doc """
  Extracts and decompresses a specific file by its filename from the ZIP archive.
  Accepts either a file path or a binary containing ZIP data.

  Returns `{:ok, decompressed_binary}` or `{:error, reason}`.
  """
  @spec extract_entry(String.t() | binary(), String.t()) :: {:ok, binary()} | {:error, term()}
  def extract_entry(zip_binary_or_path, filename) do
    with {:ok, binary} <- get_zip_binary(zip_binary_or_path),
         {:ok, entries} <- list_entries(binary) do
      case Enum.find(entries, &(&1.filename == filename)) do
        nil ->
          {:error, :file_not_found}

        entry ->
          with {:ok, compressed_data} <- extract_data(binary, entry),
               {:ok, decompressed} <-
                 decompress_entry(
                   compressed_data,
                   entry.compression_method,
                   entry.uncompressed_size
                 ) do
            # Verify CRC-32
            if :erlang.crc32(decompressed) == entry.crc32 do
              {:ok, decompressed}
            else
              {:error, :crc_mismatch}
            end
          end
      end
    end
  end

  @doc """
  Unzips all file entries from the ZIP archive and writes them into `output_dir`.
  Accepts either a file path or a binary containing ZIP data.

  Returns `:ok` or `{:error, reason}`.
  """
  @spec unzip(String.t() | binary(), String.t()) :: :ok | {:error, term()}
  def unzip(zip_binary_or_path, output_dir) do
    with {:ok, binary} <- get_zip_binary(zip_binary_or_path),
         {:ok, entries} <- list_entries(binary) do
      # Create output directory
      File.mkdir_p!(output_dir)

      results =
        Enum.map(entries, fn entry ->
          # Ensure we only extract files (skip directory entries which usually end with '/')
          if String.ends_with?(entry.filename, "/") do
            target_dir = Path.join(output_dir, entry.filename)
            File.mkdir_p!(target_dir)
            :ok
          else
            case extract_entry(binary, entry.filename) do
              {:ok, decompressed} ->
                target_path = Path.join(output_dir, entry.filename)
                # Ensure parent directory exists
                Path.dirname(target_path) |> File.mkdir_p!()
                File.write!(target_path, decompressed)
                :ok

              error ->
                error
            end
          end
        end)

      case Enum.find(results, &match?({:error, _}, &1)) do
        nil -> :ok
        error -> error
      end
    end
  end

  # Helper to resolve binary or load path
  defp get_zip_binary(binary) when is_binary(binary) do
    # Try to determine if it's a file path or raw binary.
    # ZIP signature is normally 50 4B 03 04 or 50 4B 05 06 (EOCD).
    # If it starts with 50 4B, or matches a typical ZIP structure, we treat it as binary.
    # Otherwise, if it is a valid file path, we read the file.
    if String.starts_with?(binary, "PK") or not File.exists?(binary) do
      {:ok, binary}
    else
      File.read(binary)
    end
  end

  # Locates the End of Central Directory (EOCD) record
  defp parse_eocd(binary) do
    size = byte_size(binary)
    # The minimum size of EOCD is 22 bytes. Archive comments can be up to 65,535 bytes.
    max_scan = min(size, 65535 + 22)
    scan_start = size - max_scan

    case scan_backward(binary, size - 22, scan_start) do
      nil ->
        {:error, :invalid_zip_archive}

      rest ->
        case rest do
          <<
            _disk_no::little-16,
            _cd_disk::little-16,
            _num_entries_this_disk::little-16,
            num_entries::little-16,
            cd_size::little-32,
            cd_offset::little-32,
            comment_len::little-16,
            comment::binary-size(comment_len),
            _::binary
          >> ->
            {:ok,
             %{
               num_entries: num_entries,
               cd_size: cd_size,
               cd_offset: cd_offset,
               comment: comment
             }}

          _ ->
            {:error, :invalid_eocd}
        end
    end
  end

  defp scan_backward(_binary, current, limit) when current < limit, do: nil

  defp scan_backward(binary, current, limit) do
    case binary do
      <<_::binary-size(^current), 0x50, 0x4B, 0x05, 0x06, rest::binary>> ->
        rest

      _ ->
        scan_backward(binary, current - 1, limit)
    end
  end

  # Parses the Central Directory record sequence
  defp parse_central_directory(<<>>, acc), do: Enum.reverse(acc)

  defp parse_central_directory(<<0x50, 0x4B, 0x01, 0x02, rest::binary>>, acc) do
    case rest do
      <<
        version_made::little-16,
        version_needed::little-16,
        flags::little-16,
        compression_method::little-16,
        last_mod_time::little-16,
        last_mod_date::little-16,
        crc32::little-32,
        compressed_size::little-32,
        uncompressed_size::little-32,
        filename_len::little-16,
        extra_len::little-16,
        comment_len::little-16,
        _disk_start::little-16,
        _internal_attr::little-16,
        _external_attr::little-32,
        local_header_offset::little-32,
        filename::binary-size(filename_len),
        _extra::binary-size(extra_len),
        _comment::binary-size(comment_len),
        next::binary
      >> ->
        entry = %Z64.Entry{
          filename: filename,
          version_made: version_made,
          version_needed: version_needed,
          flags: flags,
          compression_method: compression_method,
          last_mod_time: last_mod_time,
          last_mod_date: last_mod_date,
          crc32: crc32,
          compressed_size: compressed_size,
          uncompressed_size: uncompressed_size,
          local_header_offset: local_header_offset
        }

        parse_central_directory(next, [entry | acc])

      _ ->
        Enum.reverse(acc)
    end
  end

  defp parse_central_directory(_, acc), do: Enum.reverse(acc)

  # Extracts the raw compressed data from the local file header
  defp extract_data(binary, %{local_header_offset: offset, compressed_size: comp_size}) do
    case binary do
      <<_::binary-size(^offset), 0x50, 0x4B, 0x03, 0x04, rest::binary>> ->
        case rest do
          <<
            _version_needed::little-16,
            _flags::little-16,
            _compression_method::little-16,
            _last_mod_time::little-16,
            _last_mod_date::little-16,
            _crc32::little-32,
            _compressed_size::little-32,
            _uncompressed_size::little-32,
            filename_len::little-16,
            extra_len::little-16,
            rest_after_header::binary
          >> ->
            skip_bytes = filename_len + extra_len

            case rest_after_header do
              <<_::binary-size(^skip_bytes), compressed_data::binary-size(^comp_size), _::binary>> ->
                {:ok, compressed_data}

              _ ->
                {:error, :invalid_local_file_data}
            end

          _ ->
            {:error, :invalid_local_file_header}
        end

      _ ->
        {:error, :local_file_header_not_found}
    end
  end

  # Decompresses the data according to the method
  defp decompress_entry(data, 0, _uncompressed_size), do: {:ok, data}

  defp decompress_entry(data, 8, _uncompressed_size) do
    z = :zlib.open()

    try do
      :zlib.inflateInit(z, -15)
      decompressed = :zlib.inflate(z, data)
      :zlib.inflateEnd(z)
      {:ok, IO.iodata_to_binary(decompressed)}
    catch
      _, _ ->
        {:error, :inflate_failed}
    after
      :zlib.close(z)
    end
  end

  defp decompress_entry(data, 9, uncompressed_size) do
    try do
      {:ok, Z64.NIF.decompress(data, uncompressed_size)}
    catch
      :error, reason ->
        {:error, {:deflate64_failed, reason}}
    end
  end

  defp decompress_entry(_data, method, _uncompressed_size) do
    {:error, {:unsupported_compression_method, method}}
  end
end