Skip to main content

lib/llms_txt.ex

defmodule LlmsTxt do
  @moduledoc """
  Reads a project's Hex dependencies from its lock file and turns each one's
  fetched hexdocs into an `llms.txt` file.

  This is the data layer behind the `mix docs.llms` task: it locates the
  packages, resolves the local hexdocs cache, writes one `llms.txt` per package
  and prunes documentation that no longer matches a current dependency.
  """

  alias Mix.Dep.Lock
  alias LlmsTxt.Converter

  @doc """
  Returns the `{app, version}` tuples for every Hex dependency in the lock file.

  Git and path dependencies are skipped because they have no Hex version and no
  documentation on HexDocs.
  """
  def hex_packages do
    Lock.read()
    |> Stream.filter(fn {_app, lock} ->
      is_tuple(lock) and elem(lock, 0) == :hex
    end)
    |> Enum.map(fn {app, lock} -> {app, elem(lock, 2)} end)
  end

  @doc """
  Returns the base directory of the local hexdocs cache.
  """
  def hex_docs_base do
    hex_home =
      System.get_env("HEX_HOME") || Path.join(System.user_home!(), ".hex")

    Path.join([hex_home, "docs", "hexpm"])
  end

  @doc """
  Converts the fetched docs for a single package into a `llms.txt` file.

  Returns `{:ok, path}` with the written file or `{:error, :docs_not_found}`
  when no docs were fetched for the given package and version.
  """
  def generate_package_llms(name, version, docs_base, output_dir) do
    package_dir = Path.join([docs_base, to_string(name), to_string(version)])

    if File.dir?(package_dir) do
      package_output = Path.join([output_dir, to_string(name), to_string(version)])

      File.mkdir_p!(package_output)
      output_file = Path.join(package_output, "llms.txt")
      File.write!(output_file, Converter.build_markdown(name, version, package_dir))
      {:ok, output_file}
    else
      {:error, :docs_not_found}
    end
  end

  @doc """
  Removes documentation that no longer matches a current dependency.

  Keeps only the `<package>/<version>` directory for each package in
  `packages`, deleting outdated version directories and packages that are no
  longer dependencies.
  """
  def prune_stale_docs(output_dir, packages) do
    current_versions =
      packages
      |> Stream.map(fn {name, version} ->
        Path.join([output_dir, to_string(name), to_string(version)])
      end)
      |> MapSet.new()

    output_dir
    |> Path.join("*/*")
    |> Path.wildcard()
    |> Stream.filter(&File.dir?/1)
    |> Stream.reject(&MapSet.member?(current_versions, &1))
    |> Enum.each(&File.rm_rf!/1)

    remove_empty_package_dirs(output_dir)
  end

  defp remove_empty_package_dirs(output_dir) do
    output_dir
    |> Path.join("*")
    |> Path.wildcard()
    |> Stream.filter(&File.dir?/1)
    |> Stream.filter(&(File.ls!(&1) == []))
    |> Enum.each(&File.rmdir!/1)
  end
end