lib/pdf/reader/annotations.ex

defmodule Pdf.Reader.Annotations do
  @moduledoc """
  Walker for per-page `/Annots` arrays.

  Iterates each page; resolves each annotation ref; dispatches by `/Subtype`
  to type-specific extraction to build `%Pdf.Reader.Annotation{}` structs.

  ## Spec references

  - PDF 1.7 (ISO 32000-1) § 12.5 — Annotations:
    https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf
  - PDF 1.7 § 12.5.6.x — Annotation types (Link, Text, Highlight, Underline,
    StrikeOut, Squiggly, Square, Circle, FreeText, FileAttachment)
  - PDF 1.7 § 12.6 — Actions
  """

  alias Pdf.Reader.{Document, ObjectResolver, Page, Destination, Annotation, Utils}

  # ---------------------------------------------------------------------------
  # Public API
  # ---------------------------------------------------------------------------

  @doc """
  Reads all annotations from all pages in the document.

  Returns `{:ok, [Annotation.t()], doc}` where annotations are ordered
  page-ascending. When no page has an `/Annots` array, returns `{:ok, [], doc}`.

  The returned `doc` may have a warmer cache than the input.
  """
  @spec read(Document.t()) :: {:ok, [Annotation.t()], Document.t()} | {:error, term()}
  def read(doc) do
    with {:ok, page_refs, doc1} <- Page.list_refs(doc),
         {:ok, page_index, doc2} <- Destination.ensure_page_index(doc1) do
      walk_pages(page_refs, doc2, page_index, 1, [])
    end
  end

  # ---------------------------------------------------------------------------
  # Internal — page walker
  # ---------------------------------------------------------------------------

  defp walk_pages([], doc, _page_index, _page_num, acc) do
    {:ok, Enum.reverse(acc), doc}
  end

  defp walk_pages([{n, g} | rest], doc, page_index, page_num, acc) do
    with {:ok, page_dict, doc1} <- ObjectResolver.resolve(doc, {:ref, n, g}) do
      case Map.get(page_dict, "Annots") do
        nil ->
          walk_pages(rest, doc1, page_index, page_num + 1, acc)

        annots when is_list(annots) ->
          {anns, doc2} = walk_annots(annots, doc1, page_index, page_num, [])
          walk_pages(rest, doc2, page_index, page_num + 1, anns ++ acc)

        _ ->
          walk_pages(rest, doc1, page_index, page_num + 1, acc)
      end
    else
      _ ->
        walk_pages(rest, doc, page_index, page_num + 1, acc)
    end
  end

  # ---------------------------------------------------------------------------
  # Internal — annotation walker
  # ---------------------------------------------------------------------------

  defp walk_annots([], doc, _page_index, _page_num, acc) do
    {Enum.reverse(acc), doc}
  end

  defp walk_annots([ref | rest], doc, page_index, page_num, acc) do
    case ObjectResolver.resolve(doc, ref) do
      {:ok, dict, doc1} when is_map(dict) ->
        {annotation, doc2} = build_annotation(dict, page_num, doc1, page_index)
        walk_annots(rest, doc2, page_index, page_num, [annotation | acc])

      _ ->
        walk_annots(rest, doc, page_index, page_num, acc)
    end
  end

  # ---------------------------------------------------------------------------
  # Internal — annotation builder
  # ---------------------------------------------------------------------------

  defp build_annotation(dict, page_num, doc, page_index) do
    type = subtype_to_atom(Map.get(dict, "Subtype"))
    rect = Utils.parse_rect(Map.get(dict, "Rect"))
    contents = Utils.decode_pdf_string(Map.get(dict, "Contents"))
    title = Utils.decode_pdf_string(Map.get(dict, "T"))
    subject = Utils.decode_pdf_string(Map.get(dict, "Subj"))
    created = Utils.decode_pdf_string(Map.get(dict, "CreationDate"))
    modified = Utils.decode_pdf_string(Map.get(dict, "M"))

    {dest_page, doc1} = resolve_dest(dict, doc, page_index)
    url = extract_url(dict)
    embedded = extract_embedded_file(dict)
    kind_specific = extract_kind_specific(type, dict)

    annotation = %Annotation{
      type: type,
      page: page_num,
      rect: rect,
      contents: contents,
      title: title,
      subject: subject,
      created: created,
      modified: modified,
      dest_page: dest_page,
      url: url,
      embedded_file_ref: embedded,
      kind_specific: kind_specific
    }

    {annotation, doc1}
  end

  # ---------------------------------------------------------------------------
  # Internal — subtype dispatch
  # ---------------------------------------------------------------------------

  defp subtype_to_atom({:name, "Link"}), do: :link
  defp subtype_to_atom({:name, "Text"}), do: :text
  defp subtype_to_atom({:name, "Highlight"}), do: :highlight
  defp subtype_to_atom({:name, "Underline"}), do: :underline
  defp subtype_to_atom({:name, "StrikeOut"}), do: :strikeout
  defp subtype_to_atom({:name, "Squiggly"}), do: :squiggly
  defp subtype_to_atom({:name, "Square"}), do: :square
  defp subtype_to_atom({:name, "Circle"}), do: :circle
  defp subtype_to_atom({:name, "FreeText"}), do: :freetext
  defp subtype_to_atom({:name, "FileAttachment"}), do: :file_attachment
  defp subtype_to_atom(_), do: :unknown

  # ---------------------------------------------------------------------------
  # Internal — destination resolution (page number)
  # ---------------------------------------------------------------------------

  defp resolve_dest(dict, doc, page_index) do
    dest =
      case Map.get(dict, "Dest") do
        nil -> Map.get(dict, "A")
        d -> d
      end

    case dest do
      nil ->
        {nil, doc}

      d ->
        case Destination.resolve(d, doc, page_index) do
          {:ok, page_num, doc1} -> {page_num, doc1}
          # Destination.resolve/3 never errors per spec (R-AO13); safety net kept
          # intentionally so any future return-type widening doesn't crash here.
          _ -> {nil, doc}
        end
    end
  end

  # ---------------------------------------------------------------------------
  # Internal — URL extraction (URI actions only, never a page dest)
  # ---------------------------------------------------------------------------

  defp extract_url(dict) do
    case Map.get(dict, "A") do
      %{"S" => {:name, "URI"}, "URI" => uri_value} ->
        Utils.decode_pdf_string(uri_value)

      _ ->
        nil
    end
  end

  # ---------------------------------------------------------------------------
  # Internal — embedded file ref extraction
  # ---------------------------------------------------------------------------

  defp extract_embedded_file(dict) do
    case Map.get(dict, "FS") do
      %{"EF" => %{"F" => {:ref, _, _} = ref}} -> ref
      _ -> nil
    end
  end

  # ---------------------------------------------------------------------------
  # Internal — kind-specific data by subtype
  # ---------------------------------------------------------------------------

  defp extract_kind_specific(type, dict)
       when type in [:highlight, :underline, :strikeout, :squiggly] do
    case Map.get(dict, "QuadPoints") do
      pts when is_list(pts) ->
        tuples =
          pts
          |> Enum.map(&to_float/1)
          |> Enum.chunk_every(8, 8, :discard)
          |> Enum.map(&List.to_tuple/1)

        %{quad_points: tuples}

      _ ->
        %{}
    end
  end

  defp extract_kind_specific(:text, dict) do
    %{open: Map.get(dict, "Open", false), name: Map.get(dict, "Name")}
  end

  defp extract_kind_specific(:unknown, dict), do: dict

  defp extract_kind_specific(_, _), do: %{}

  # ---------------------------------------------------------------------------
  # Internal — helpers
  # ---------------------------------------------------------------------------

  defp to_float(n) when is_integer(n), do: n * 1.0
  defp to_float(n) when is_float(n), do: n
  defp to_float(n), do: n
end