defmodule Mix.Tasks.Llms.Txt do
@shortdoc "Fetches hexdocs for all dependencies and converts them to llms.txt files"
@moduledoc """
Fetches the HTML documentation for every Hex dependency at its installed
version with `mix hex.docs fetch`, converts each page to Markdown with
`Htmd` and writes one `llms.txt` per package.
After generating, it prunes stale documentation: outdated version
directories and packages that are no longer dependencies are removed, so the
output directory always mirrors the current lock file.
## Usage
mix llms.txt
## Options
* `--output DIR` - directory the `llms.txt` files are written to
(defaults to `priv/llms`). Each package gets its own versioned
subdirectory: `priv/llms/<package>/<version>/llms.txt`.
* `--no-fetch` - skip `mix hex.docs fetch` and only (re)convert the docs
that were already fetched locally.
"""
use Mix.Task
@default_output_dir "priv/llms"
@impl true
def run(arguments) do
{options, _rest} =
OptionParser.parse!(arguments, strict: [output: :string, fetch: :boolean])
output_dir = Keyword.get(options, :output, @default_output_dir)
if Keyword.get(options, :fetch, true) do
# `hex.docs` ships in the Hex archive, which is not on the code path of a
# custom task by default, so make it available before invoking it.
Mix.Local.append_archives()
Mix.Task.run("hex.docs", ["fetch"])
end
docs_base = LlmsTxt.hex_docs_base()
packages = LlmsTxt.hex_packages()
packages
|> Task.async_stream(
fn {name, version} ->
{name, version, LlmsTxt.generate_package_llms(name, version, docs_base, output_dir)}
end,
ordered: false,
timeout: :infinity
)
|> Enum.each(fn {:ok, result} -> report(result) end)
LlmsTxt.prune_stale_docs(output_dir, packages)
end
defp report({name, version, {:ok, file}}) do
Mix.shell().info("Generated #{file} for #{name} #{version}")
end
defp report({name, version, {:error, :docs_not_found}}) do
Mix.shell().info("No docs found for #{name} #{version}, skipping")
end
end