Skip to main content

lib/pdf_ex/xref/stream_builder.ex

defmodule PdfEx.XRef.StreamBuilder do
  @moduledoc false

  alias PdfEx.COS.{Stream, Name}

  @spec build_xref_stream(
          %{{pos_integer(), non_neg_integer()} => non_neg_integer() | :free},
          pos_integer(),
          map()
        ) :: Stream.t()
  def build_xref_stream(offsets, new_obj_id, trailer) do
    sorted = Enum.sort_by(offsets, fn {{id, _gen}, _v} -> id end)

    # The offset field width is sized to the largest offset so that incremental
    # updates to documents past 4 GiB don't silently truncate (a 4-byte field
    # wraps mod 2^32). Minimum 4 bytes preserves the layout for ordinary files.
    ow = offset_width(sorted)
    payload = Enum.reduce(sorted, <<>>, fn entry, acc -> acc <> pack(entry, ow) end)

    index = Enum.flat_map(sorted, fn {{id, _gen}, _v} -> [id, 1] end)
    compressed = :zlib.compress(payload)

    dict =
      trailer
      |> Map.put(:Type, %Name{value: "XRef"})
      |> Map.put(:W, [1, ow, 2])
      |> Map.put(:Index, index)
      |> Map.put(:Size, new_obj_id + 1)
      |> Map.put(:Filter, %Name{value: "FlateDecode"})
      |> Map.put(:Length, byte_size(compressed))

    %Stream{dictionary: dict, raw_bytes: compressed}
  end

  defp offset_width(sorted) do
    max_offset =
      sorted
      |> Enum.map(fn {_k, v} -> if is_integer(v), do: v, else: 0 end)
      |> Enum.max(fn -> 0 end)

    cond do
      max_offset <= 0xFFFFFFFF -> 4
      max_offset <= 0xFFFFFFFFFF -> 5
      max_offset <= 0xFFFFFFFFFFFF -> 6
      max_offset <= 0xFFFFFFFFFFFFFF -> 7
      true -> 8
    end
  end

  defp pack({{_id, gen}, :free}, ow), do: <<0, 0::size(ow)-unit(8), min(gen + 1, 65535)::16>>

  defp pack({{_id, gen}, offset}, ow) when is_integer(offset),
    do: <<1, offset::size(ow)-unit(8), gen::16>>
end