defmodule Pdf.Reader.AcroForm do
@moduledoc """
AcroForm field walker for `Pdf.Reader`.
Extracts interactive form fields from a PDF's AcroForm field tree, returning
a flat list of leaf `%Pdf.Reader.FormField{}` structs with decoded names, types,
values, flags, and rectangles.
## Spec references
- PDF 1.7 (ISO 32000-1) § 12.7 — Interactive Forms:
https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf
- § 12.7.3 — Field Dictionaries
- § 12.7.3.1 — Field Flags
- § 12.7.4 — Field Types
"""
alias Pdf.Reader.{Document, FormField, ObjectResolver, Utils}
@max_field_depth 8
@doc """
Reads all AcroForm leaf fields from a document.
Returns `{:ok, [FormField.t()], Document.t()}` with a flat list of leaf fields.
When no `/AcroForm` is present, or `/Fields` is empty, returns `{:ok, [], doc}`.
Never returns `{:error, _}` for absent or empty AcroForms.
"""
@spec read(Document.t()) ::
{:ok, [FormField.t()], Document.t()} | {:error, term()}
def read(doc) do
with {:ok, catalog, doc2} <- resolve_catalog(doc),
{:ok, acroform, doc3} <- resolve_acroform_dict(doc2, catalog) do
case acroform do
nil ->
{:ok, [], doc3}
acroform_dict when is_map(acroform_dict) ->
fields_array = Map.get(acroform_dict, "Fields", [])
case fields_array do
[] ->
{:ok, [], doc3}
fields when is_list(fields) ->
{leaf_fields, doc4} =
walk_fields(fields, doc3, "", nil, MapSet.new(), 0, [])
{:ok, Enum.reverse(leaf_fields), doc4}
_ ->
{:ok, [], doc3}
end
end
end
end
# ---------------------------------------------------------------------------
# Catalog + AcroForm resolution
# ---------------------------------------------------------------------------
defp resolve_catalog(%Document{trailer: trailer} = doc) do
case Map.get(trailer, "Root") do
nil ->
{:error, :no_root}
root_ref ->
case ObjectResolver.resolve(doc, root_ref) do
{:ok, catalog, doc2} when is_map(catalog) -> {:ok, catalog, doc2}
{:ok, _other, _doc2} -> {:error, {:malformed, :catalog, %{not_a_dict: true}}}
{:error, _} = err -> err
end
end
end
defp resolve_acroform_dict(doc, catalog) do
case Map.get(catalog, "AcroForm") do
nil ->
{:ok, nil, doc}
:null ->
{:ok, nil, doc}
{:ref, _n, _g} = ref ->
case ObjectResolver.resolve(doc, ref) do
{:ok, dict, doc2} when is_map(dict) -> {:ok, dict, doc2}
{:ok, _other, doc2} -> {:ok, nil, doc2}
{:error, _} -> {:ok, nil, doc}
end
dict when is_map(dict) ->
{:ok, dict, doc}
_other ->
{:ok, nil, doc}
end
end
# ---------------------------------------------------------------------------
# Field tree walker
# walk_fields(refs_or_dicts, doc, name_prefix, inherited_ft, visited, depth, acc)
# Returns {accumulated_leaf_fields, doc}
# ---------------------------------------------------------------------------
defp walk_fields([], doc, _prefix, _inherited_ft, _visited, _depth, acc) do
{acc, doc}
end
defp walk_fields([entry | rest], doc, prefix, inherited_ft, visited, depth, acc) do
# Check depth cap — skip this entry if at or over limit
if depth >= @max_field_depth do
walk_fields(rest, doc, prefix, inherited_ft, visited, depth, acc)
else
# Cycle detection for indirect references
case entry do
{:ref, n, g} ->
key = {n, g}
if MapSet.member?(visited, key) do
# Cycle detected — skip this kid
walk_fields(rest, doc, prefix, inherited_ft, visited, depth, acc)
else
new_visited = MapSet.put(visited, key)
case ObjectResolver.resolve(doc, {:ref, n, g}) do
{:ok, field_dict, doc2} when is_map(field_dict) ->
{new_acc, doc3} =
process_field(field_dict, doc2, prefix, inherited_ft, new_visited, depth, acc)
walk_fields(rest, doc3, prefix, inherited_ft, new_visited, depth, new_acc)
{:ok, _other, doc2} ->
walk_fields(rest, doc2, prefix, inherited_ft, new_visited, depth, acc)
{:error, _} ->
walk_fields(rest, doc, prefix, inherited_ft, new_visited, depth, acc)
end
end
field_dict when is_map(field_dict) ->
{new_acc, doc2} =
process_field(field_dict, doc, prefix, inherited_ft, visited, depth, acc)
walk_fields(rest, doc2, prefix, inherited_ft, visited, depth, new_acc)
_other ->
walk_fields(rest, doc, prefix, inherited_ft, visited, depth, acc)
end
end
end
# Process a single resolved field dictionary
defp process_field(field_dict, doc, prefix, inherited_ft, visited, depth, acc) do
# Skip pure widget annotations that are not field nodes
if widget_only?(field_dict) do
{acc, doc}
else
# Extract partial name (/T) and build full name
partial_name = extract_partial_name(field_dict)
full_name = join_name(prefix, partial_name)
# Determine effective /FT (own takes precedence over inherited)
own_ft = Map.get(field_dict, "FT")
effective_ft = own_ft || inherited_ft
# Get /Kids array (may be nil or list)
kids = resolve_kids(field_dict)
# Determine if this is a leaf: no kids, or all kids are widget-only
if leaf_node?(kids, doc) do
# Emit leaf field
{field, doc2} = emit_leaf(field_dict, full_name, partial_name, effective_ft, doc)
{[field | acc], doc2}
else
# Intermediate node: recurse into kids
walk_fields(kids, doc, full_name, effective_ft, visited, depth + 1, acc)
end
end
end
# Get kids array from field dict (normalise to list or [])
defp resolve_kids(field_dict) do
case Map.get(field_dict, "Kids") do
nil -> []
kids when is_list(kids) -> kids
_ -> []
end
end
# A node is a leaf if it has no kids, or all kids are widget-only annotations
defp leaf_node?([], _doc), do: true
defp leaf_node?(kids, doc) when is_list(kids) do
only_widgets?(kids, doc)
end
# Check if all kids are widget-only annotations (not logical field nodes)
defp only_widgets?([], _doc), do: true
defp only_widgets?([kid | rest], doc) do
dict =
case kid do
{:ref, n, g} ->
case ObjectResolver.resolve(doc, {:ref, n, g}) do
{:ok, d, _doc2} when is_map(d) -> d
_ -> nil
end
d when is_map(d) ->
d
_ ->
nil
end
if dict == nil do
only_widgets?(rest, doc)
else
if widget_only?(dict) do
only_widgets?(rest, doc)
else
false
end
end
end
# A dict is a widget-only annotation (not a logical field) if:
# - It has /Subtype /Widget (or name "Widget")
# - AND has neither /T nor /FT
defp widget_only?(dict) when is_map(dict) do
subtype =
case Map.get(dict, "Subtype") do
{:name, name} -> name
name when is_binary(name) -> name
_ -> nil
end
has_t = Map.has_key?(dict, "T")
has_ft = Map.has_key?(dict, "FT")
subtype == "Widget" and not has_t and not has_ft
end
defp widget_only?(_), do: false
# Extract partial name from /T
defp extract_partial_name(field_dict) do
case Map.get(field_dict, "T") do
nil -> nil
{:string, bin} -> Utils.decode_pdf_string(bin)
{:hex_string, bin} -> bin
bin when is_binary(bin) -> Utils.decode_pdf_string(bin)
_ -> nil
end
end
# Join prefix + partial_name with "." separator
# Rules: nil partial → prefix unchanged; empty prefix → partial only
defp join_name("", nil), do: nil
defp join_name("", partial) when is_binary(partial), do: partial
defp join_name(prefix, nil) when is_binary(prefix), do: prefix
defp join_name(nil, partial), do: partial
defp join_name(prefix, partial) when is_binary(prefix) and is_binary(partial) do
prefix <> "." <> partial
end
# ---------------------------------------------------------------------------
# Leaf emission
# ---------------------------------------------------------------------------
defp emit_leaf(field_dict, full_name, partial_name, effective_ft, doc) do
type = ft_to_atom(effective_ft)
ff_int = Map.get(field_dict, "Ff")
flags = decode_flags(ff_int)
# Decode /V (value)
{value, doc2} = resolve_and_decode_value(Map.get(field_dict, "V"), type, ff_int, doc)
# Decode /DV (default value)
{default_val, doc3} = resolve_and_decode_value(Map.get(field_dict, "DV"), type, ff_int, doc2)
# Decode /TU (tooltip)
tooltip =
case Map.get(field_dict, "TU") do
nil -> nil
{:string, bin} -> Utils.decode_pdf_string(bin)
{:hex_string, bin} -> bin
bin when is_binary(bin) -> Utils.decode_pdf_string(bin)
_ -> nil
end
# Parse /Rect
rect = Utils.parse_rect(Map.get(field_dict, "Rect"))
field = %FormField{
name: full_name,
partial_name: partial_name,
type: type,
value: value,
default: default_val,
tooltip: tooltip,
flags: flags,
rect: rect
}
{field, doc3}
end
# ---------------------------------------------------------------------------
# ft_to_atom/1 — map /FT name to type atom (R-AF10)
# ---------------------------------------------------------------------------
defp ft_to_atom({:name, "Tx"}), do: :text
defp ft_to_atom({:name, "Btn"}), do: :button
defp ft_to_atom({:name, "Ch"}), do: :choice
defp ft_to_atom({:name, "Sig"}), do: :signature
defp ft_to_atom("Tx"), do: :text
defp ft_to_atom("Btn"), do: :button
defp ft_to_atom("Ch"), do: :choice
defp ft_to_atom("Sig"), do: :signature
defp ft_to_atom(nil), do: :unknown
defp ft_to_atom(_), do: :unknown
# ---------------------------------------------------------------------------
# button_subtype/1 — disambiguate button subtypes from /Ff bits (R-AF13)
# bit 17 (0x10000) = pushbutton; bit 16 (0x8000) = radio; else = checkbox
# Note: bits are 0-indexed from LSB per PDF spec.
# ---------------------------------------------------------------------------
defp button_subtype(ff_int) when is_integer(ff_int) do
cond do
Bitwise.band(ff_int, 0x10000) != 0 -> :pushbutton
Bitwise.band(ff_int, 0x8000) != 0 -> :radio
true -> :checkbox
end
end
defp button_subtype(_), do: :checkbox
# ---------------------------------------------------------------------------
# resolve_and_decode_value/4 (R-AF11, R-AF12)
# ---------------------------------------------------------------------------
defp resolve_and_decode_value(nil, _type, _ff_int, doc), do: {nil, doc}
defp resolve_and_decode_value(:null, _type, _ff_int, doc), do: {nil, doc}
defp resolve_and_decode_value({:ref, _n, _g} = ref, type, ff_int, doc) do
case ObjectResolver.resolve(doc, ref) do
{:ok, resolved, doc2} ->
resolve_and_decode_value(resolved, type, ff_int, doc2)
{:error, _} ->
{nil, doc}
end
end
defp resolve_and_decode_value(value, :text, _ff_int, doc) do
decoded = decode_value_as_string(value)
{decoded, doc}
end
defp resolve_and_decode_value(value, :button, ff_int, doc) do
subtype = button_subtype(ff_int)
result =
case subtype do
:pushbutton ->
nil
:radio ->
case value do
{:name, opt} -> {:selected, opt}
opt when is_binary(opt) -> {:selected, opt}
_ -> nil
end
:checkbox ->
case value do
{:name, "Off"} -> false
{:name, _other} -> true
"Off" -> false
_ when is_binary(value) -> true
_ -> nil
end
end
{result, doc}
end
defp resolve_and_decode_value(value, :choice, ff_int, doc) do
# Multi-select: /Ff bit 22 (0x200000) set → array of strings
is_multi =
case ff_int do
n when is_integer(n) -> Bitwise.band(n, 0x200000) != 0
_ -> false
end
result =
if is_multi do
case value do
list when is_list(list) ->
Enum.map(list, &decode_value_as_string/1)
other ->
[decode_value_as_string(other)]
end
else
case value do
list when is_list(list) ->
list |> Enum.map(&decode_value_as_string/1) |> List.first()
other ->
decode_value_as_string(other)
end
end
{result, doc}
end
defp resolve_and_decode_value(value, :signature, _ff_int, doc) do
result =
case value do
dict when is_map(dict) -> :present
:null -> nil
nil -> nil
_ -> nil
end
{result, doc}
end
defp resolve_and_decode_value(_value, :unknown, _ff_int, doc) do
{nil, doc}
end
defp resolve_and_decode_value(value, _type, _ff_int, doc) do
{decode_value_as_string(value), doc}
end
# Decode a raw PDF value to a string (for text fields, tooltip, etc.)
defp decode_value_as_string({:string, bin}) when is_binary(bin),
do: Utils.decode_pdf_string(bin)
defp decode_value_as_string({:hex_string, bin}) when is_binary(bin), do: decode_hex_string(bin)
defp decode_value_as_string(bin) when is_binary(bin), do: Utils.decode_pdf_string(bin)
defp decode_value_as_string(_), do: nil
# Decode hex string bytes — may carry UTF-16BE BOM
defp decode_hex_string(bin) when is_binary(bin), do: Utils.decode_pdf_string(bin)
# ---------------------------------------------------------------------------
# decode_flags/1 — /Ff bitmask → %{atom => boolean} (R-AF14)
# All 17 flag atoms per PDF 1.7 § 12.7.3.1 Table 227
# Bit positions are 0-indexed from LSB.
# ---------------------------------------------------------------------------
@flag_bits [
{:read_only, 0},
{:required, 1},
{:no_export, 2},
{:multiline, 12},
{:password, 13},
{:radio, 15},
{:pushbutton, 16},
{:combo, 17},
{:edit, 18},
{:sort, 19},
{:file_select, 20},
{:multi_select, 21},
{:do_not_spell_check, 22},
{:do_not_scroll, 23},
{:comb, 24},
{:rich_text, 25},
{:radios_in_unison, 25}
]
defp decode_flags(nil) do
Map.new(@flag_bits, fn {atom, _bit} -> {atom, false} end)
end
defp decode_flags(ff) when is_integer(ff) do
Map.new(@flag_bits, fn {atom, bit} ->
{atom, Bitwise.band(ff, Bitwise.bsl(1, bit)) != 0}
end)
end
defp decode_flags(_), do: decode_flags(nil)
end