lib/foundry/context/llm_formatter.ex

Select File:
lib/foundry/context/llm_formatter.ex

defmodule Foundry.Context.LLMFormatter do
  @moduledoc """
  Formats project context for LLM consumption using compact text notation.

  Achieves ~60-70% token reduction vs raw JSON by:
  - Module dictionary encoding (short aliases for module names)
  - Compact attribute notation (name:type[:flag,...])
  - Compact edge notation (A1 --rel--> B1)
  - Type abbreviation legend
  """

  @type_abbr %{
    "resource" => "res",
    "reactor" => "rxr",
    "transfer" => "txr",
    "oban_job" => "job",
    "ash_authentication" => "auth",
    "ash_policy" => "pol",
    "blueprint" => "bp",
    "adapter" => "adp"
  }

  @attr_type_abbr %{
    "string" => "str",
    "integer" => "int",
    "boolean" => "bool",
    "decimal" => "dec",
    "float" => "float",
    "datetime" => "dt",
    "date" => "date",
    "map" => "map",
    "uuid" => "uuid"
  }

  @edge_abbr %{
    "references" => "ref",
    "referenced_by" => "rb",
    "writes" => "w",
    "reads" => "r",
    "async" => "async",
    "guards" => "grd",
    "sequence" => "seq",
    "compensation" => "comp",
    "configures" => "cfg",
    "authenticates" => "ath",
    "persists_to" => "pers",
    "queues_via" => "que",
    "calls_adapter" => "ca"
  }

  @doc_title_limit 90
  @doc_summary_limit 170

  def format(context) do
    # Ensure all keys are strings for consistent processing,
    # as context can come from internal assembly (atoms) or JSON decode (strings).
    context = stringify_keys(context)
    nodes = context["nodes"] || []
    edges = context["edges"] || []

    aliases = build_aliases(nodes)
    reverse = Map.new(aliases, fn {k, v} -> {v, k} end)

    [
      format_header(context),
      format_legend(),
      format_aliases(aliases),
      format_nodes(nodes, reverse),
      format_edges(edges, reverse),
      format_spec_kit(context["spec_kit"])
    ]
    |> Enum.reject(&(&1 == ""))
    |> Enum.join("\n\n")
  end

  defp stringify_keys(map) when is_map(map) do
    Map.new(map, fn {k, v} -> {to_string(k), stringify_keys(v)} end)
  end

  defp stringify_keys(list) when is_list(list), do: Enum.map(list, &stringify_keys/1)
  defp stringify_keys(v) when is_atom(v) and v not in [true, false, nil], do: Atom.to_string(v)
  defp stringify_keys(v), do: v

  defp format_header(context) do
    project = context["project"] || ""
    project_type = context["project_type"] || "standard"
    domain_type = context["domain_type"]
    dt_part = if domain_type, do: " · #{domain_type}", else: ""

    "# System Map: #{project} (#{project_type}#{dt_part})\n\nCompact text format for LLM consumption."
  end

  defp format_legend do
    types = Enum.map_join(@type_abbr, "  ", fn {k, v} -> "#{v}=#{k}" end)
    edges = Enum.map_join(@edge_abbr, "  ", fn {k, v} -> "#{v}=#{k}" end)

    """
    ## Legend

    **Types:** #{types}
    **Attrs:** pk=primary_key  pii=pii  s=sensitive  m=money  u=unique  req=required
    **Edges:** #{edges}
    """
  end

  defp format_aliases(aliases) do
    lines =
      aliases
      |> Enum.sort_by(fn {_a, name} -> name end)
      |> Enum.chunk_every(5)
      |> Enum.map(fn chunk ->
        Enum.map_join(chunk, "  ", fn {a, name} -> "#{a}=#{name}" end)
      end)

    "## Module Aliases\n\n" <> Enum.join(lines, "\n")
  end

  defp format_nodes(nodes, reverse) do
    body = nodes |> Enum.map(&format_node(&1, reverse)) |> Enum.join("\n\n")
    "## Nodes\n\n" <> body
  end

  defp format_node(node, reverse) do
    alias_name = Map.get(reverse, node["module"], node["module"])
    type = Map.get(@type_abbr, node["type"], node["type"] || "?")
    domain = node["domain"] || ""
    sensitive = if node["sensitive"], do: " · **sensitive**", else: ""

    desc_text =
      case format_node_description(node) do
        "" -> ""
        text -> "> #{text}"
      end

    initial_lines = [
      "[#{alias_name}] #{type} · #{short_domain(domain)}#{sensitive}",
      desc_text
    ]

    # Build all sections and combine
    attr_section =
      case node["attributes"] || [] do
        [] -> []
        attrs -> ["attrs: #{attrs |> Enum.map(&format_attr/1) |> Enum.join(", ")}"]
      end

    action_section =
      case node["actions"] || [] do
        [] -> []
        actions -> ["actions: #{actions |> Enum.map(& &1["name"]) |> Enum.join(", ")}"]
      end

    rel_section =
      case node["relationships"] || [] do
        [] ->
          []

        rels ->
          ["rels: #{rels |> Enum.map(&format_relationship(&1, reverse)) |> Enum.join(", ")}"]
      end

    comp_section =
      case node["compliance"] || [] do
        [] -> []
        compliance -> ["compliance: #{Enum.join(compliance, ", ")}"]
      end

    adr_section =
      case node["adrs"] || [] do
        [] -> []
        adrs -> ["adrs: #{Enum.join(adrs, ", ")}"]
      end

    flag_section =
      []
      |> then(
        &if node["data_layer"], do: &1 ++ ["dl=#{short_data_layer(node["data_layer"])}"], else: &1
      )
      |> then(&if node["paper_trail"], do: &1 ++ ["paper_trail"], else: &1)
      |> then(&if node["archival"], do: &1 ++ ["archival"], else: &1)
      |> then(&if node["pending_migrations"], do: &1 ++ ["pending!"], else: &1)
      |> then(&if node["authentication_subject"], do: &1 ++ ["auth_subject"], else: &1)
      |> then(&if node["rate_limited"], do: &1 ++ ["rate_limited"], else: &1)
      |> then(fn flags ->
        if flags == [], do: [], else: [Enum.join(flags, " · ")]
      end)

    (initial_lines ++
       attr_section ++
       action_section ++
       rel_section ++
       comp_section ++
       adr_section ++
       flag_section)
    |> Enum.reject(&(&1 == ""))
    |> Enum.join("\n  ")
  end

  defp format_node_description(node) do
    node
    |> Map.get("description", "")
    |> to_string()
    |> String.trim()
    |> case do
      "" ->
        ""

      description ->
        description
        |> String.split(~r/\n\s*\n|\n/, parts: 2)
        |> List.first()
        |> String.replace(~r/\s+/, " ")
        |> String.trim()
        |> first_sentence()
        |> String.slice(0, 170)
    end
  end

  defp first_sentence(text) do
    case Regex.run(~r/^.*?[.!?](?=\s|$)/u, text) do
      [sentence] -> sentence
      _ -> text
    end
  end

  defp format_attr(attr) do
    name = attr["name"] || "?"
    type_name = attr["type"] || "?"
    type = type_name |> String.split(".") |> List.last() |> abbreviate_type()

    flags = []
    flags = if attr["primary_key"], do: flags ++ ["pk"], else: flags
    flags = if attr["pii"], do: flags ++ ["pii"], else: flags
    flags = if attr["sensitive"], do: flags ++ ["s"], else: flags
    flags = if attr["money"], do: flags ++ ["m"], else: flags

    if flags == [] do
      "#{name}:#{type}"
    else
      "#{name}:#{type}:#{Enum.join(flags, ":")}"
    end
  end

  defp abbreviate_type(type_name) do
    type_name
    |> String.downcase()
    |> then(fn t ->
      case @attr_type_abbr[t] do
        nil -> t
        abbr -> abbr
      end
    end)
  end

  defp format_relationship(rel, reverse) do
    type =
      case rel["type"] do
        "has_many" -> "has_many"
        "belongs_to" -> "belongs_to"
        "has_one" -> "has_one"
        "many_to_many" -> "m2m"
        t -> t
      end

    target = Map.get(reverse, rel["related_resource"], rel["related_resource"])

    if rel["source_attribute"] do
      "#{type}:#{target}(#{rel["source_attribute"]})"
    else
      "#{type}:#{target}"
    end
  end

  defp format_edges(edges, reverse) do
    body = edges |> Enum.map(&format_edge(&1, reverse)) |> Enum.join("\n")
    "## Edges\n\n" <> body
  end

  defp format_edge(edge, reverse) do
    from = Map.get(reverse, edge["from"], edge["from"])
    to = Map.get(reverse, edge["to"], edge["to"])
    rel = Map.get(@edge_abbr, edge["relation"], edge["relation"])

    extras = []
    extras = if edge["step_index"], do: extras ++ ["seq=#{edge["step_index"]}"], else: extras
    extras = if edge["step_name"], do: extras ++ ["step=#{edge["step_name"]}"], else: extras
    extras = if edge["action_name"], do: extras ++ ["act=#{edge["action_name"]}"], else: extras

    extra_str = if extras == [], do: "", else: " [#{Enum.join(extras, ",")}]"
    "#{from} --#{rel}--> #{to}#{extra_str}"
  end

  defp format_spec_kit(nil), do: ""

  defp format_spec_kit(spec_kit) do
    overview = format_spec_kit_overview(spec_kit)

    sections =
      [
        format_doc_section("AGENTS", spec_kit["agents"]),
        format_doc_section("ADRs", spec_kit["adrs"]),
        format_doc_section("Runbooks", spec_kit["runbooks"]),
        format_doc_section("Findings", spec_kit["findings"]),
        format_doc_section("Regulations", spec_kit["regulations"]),
        format_doc_section("Usage Rules", spec_kit["usage_rules"])
      ]
      |> Enum.reject(&(&1 == ""))

    parts = [overview | sections] |> Enum.reject(&(&1 == ""))

    if parts == [], do: "", else: "## Spec-Kit\n\n" <> Enum.join(parts, "\n\n")
  end

  defp format_spec_kit_overview(spec_kit) do
    counts =
      [
        {"AGENTS", count_docs(spec_kit["agents"])},
        {"ADRs", count_docs(spec_kit["adrs"])},
        {"Runbooks", count_docs(spec_kit["runbooks"])},
        {"Findings", count_docs(spec_kit["findings"])},
        {"Regulations", count_docs(spec_kit["regulations"])},
        {"Usage Rules", count_docs(spec_kit["usage_rules"])}
      ]
      |> Enum.filter(fn {_label, count} -> count > 0 end)
      |> Enum.map_join("  ", fn {label, count} -> "#{label}=#{count}" end)

    tags =
      spec_kit
      |> collect_tags()
      |> Enum.take(8)
      |> Enum.join(", ")

    token_count = spec_kit["index_token_count"] || 0
    token_warn = spec_kit["index_token_warn"] || false
    tag_line = if tags == "", do: "", else: "\nThemes: #{tags}"

    """
    ### Overview

    Counts: #{counts}
    Navigation: Prefer direct node links (`adrs`, `compliance`, `runbook`) before tag-based lookup.
    Token estimate: #{token_count} (warn: #{token_warn})#{tag_line}
    """
    |> String.trim()
  end

  defp format_doc_section(_title, nil), do: ""
  defp format_doc_section(_title, []), do: ""

  defp format_doc_section(title, docs) do
    entries =
      docs
      |> Enum.map(&format_doc_entry/1)
      |> Enum.join("\n")

    "### #{title}\n\n" <> entries
  end

  defp format_doc_entry(doc) do
    id = doc["id"] || doc["filename"] || doc["path"] || doc["title"] || "unknown"
    title = doc["title"] |> presence_or_nil() |> truncate(@doc_title_limit)
    status = doc["status"] |> presence_or_nil() |> normalize_status()
    summary = summarize_doc(doc)

    label_parts =
      [id, status, title]
      |> Enum.reject(&is_nil/1)

    case summary do
      nil -> "- " <> Enum.join(label_parts, " · ")
      text -> "- " <> Enum.join(label_parts, " · ") <> " :: " <> text
    end
  end

  defp summarize_doc(doc) do
    doc["summary"]
    |> presence_or_nil()
    |> truncate(@doc_summary_limit)
    |> case do
      nil ->
        doc["tags"]
        |> List.wrap()
        |> Enum.take(5)
        |> Enum.join(", ")
        |> presence_or_nil()

      summary ->
        summary
    end
  end

  defp normalize_status(nil), do: nil

  defp normalize_status(status) do
    status |> String.trim() |> String.downcase()
  end

  defp count_docs(nil), do: 0
  defp count_docs(docs), do: length(List.wrap(docs))

  defp collect_tags(spec_kit) do
    ["adrs", "runbooks", "findings", "regulations", "usage_rules"]
    |> Enum.flat_map(fn key ->
      spec_kit
      |> Map.get(key, [])
      |> List.wrap()
      |> Enum.flat_map(&List.wrap(&1["tags"]))
    end)
    |> Enum.frequencies()
    |> Enum.sort_by(fn {tag, count} -> {-count, tag} end)
    |> Enum.map(&elem(&1, 0))
  end

  defp presence_or_nil(nil), do: nil

  defp presence_or_nil(text) when is_binary(text) do
    case String.trim(text) do
      "" -> nil
      value -> value
    end
  end

  defp truncate(nil, _limit), do: nil

  defp truncate(text, limit) when is_binary(text) do
    if String.length(text) > limit do
      String.slice(text, 0, limit - 3) <> "..."
    else
      text
    end
  end

  defp build_aliases(nodes) do
    nodes
    |> Enum.sort_by(& &1["module"])
    |> Enum.group_by(fn node ->
      parts = String.split(node["module"], ".")

      case parts do
        [_, domain | _] -> domain
        [_] -> "Root"
        _ -> "Other"
      end
    end)
    |> Enum.flat_map(fn {domain, group} ->
      prefix = domain |> String.slice(0, 2) |> String.upcase()

      group
      |> Enum.with_index(1)
      |> Enum.map(fn {node, idx} -> {"#{prefix}#{idx}", node["module"]} end)
    end)
    |> Map.new()
  end

  defp short_domain(domain) do
    domain |> String.split(".") |> List.last() |> String.downcase()
  end

  defp short_data_layer(dl) do
    case dl do
      "Elixir.AshPostgres.DataLayer" -> "postgres"
      "Ash.DataLayer.Ets" -> "ets"
      "Ash.DataLayer.Mnesia" -> "mnesia"
      _ -> dl
    end
  end
end