lib/mix/tasks/hf.cache.ex

defmodule Mix.Tasks.Hf.Cache do
  @shortdoc "Manage the local HuggingFace Hub cache"
  @moduledoc """
  Manage the local HuggingFace Hub cache directory.

      # Show cache contents and size
      $ mix hf.cache

      # Show detailed file listing
      $ mix hf.cache --verbose

      # Delete a specific revision
      $ mix hf.cache --delete-revision gpt2 --revision abc123def

      # Delete all cached files for a repo
      $ mix hf.cache --delete-repo my-org/my-model

      # Show cache directory path
      $ mix hf.cache --dir

  ## Options

    * `--verbose` / `-v` — show detailed file listing
    * `--dir` — print cache directory path and exit
    * `--delete-revision REPO [--revision SHA]` — delete a cached revision
    * `--delete-repo REPO` — delete all cached files for a repo
  """

  use Mix.Task

  @impl Mix.Task
  def run(args) do
    {opts, argv, _} =
      OptionParser.parse(args,
        aliases: [v: :verbose],
        strict: [
          verbose: :boolean,
          dir: :boolean,
          delete_revision: :string,
          delete_repo: :string,
          revision: :string
        ]
      )

    cache_dir = HuggingfaceClient.Config.cache_dir()

    cond do
      opts[:dir] ->
        Mix.shell().info(cache_dir)

      opts[:delete_revision] ->
        repo = opts[:delete_revision]
        rev = opts[:revision] || "main"
        Mix.shell().info("Deleting revision #{rev} of #{repo}...")

        case HuggingfaceClient.Hub.Cache.delete_revision(repo, revision: rev) do
          :ok -> Mix.shell().info("✓ Deleted")
          err -> Mix.raise("Failed: #{inspect(err)}")
        end

      opts[:delete_repo] ->
        repo = opts[:delete_repo]
        Mix.shell().info("Deleting all cached files for #{repo}...")

        case HuggingfaceClient.Hub.Cache.delete_repo_cache(repo) do
          :ok -> Mix.shell().info("✓ Deleted")
          err -> Mix.raise("Failed: #{inspect(err)}")
        end

      true ->
        print_cache_info(cache_dir, opts[:verbose] || false)
    end
  end

  defp print_cache_info(cache_dir, verbose) do
    if File.dir?(cache_dir) do
      print_cache_summary(cache_dir, verbose)
    else
      Mix.shell().info("Cache directory not found: #{cache_dir}")
      Mix.shell().info("No models have been cached yet.")
    end
  end

  defp print_cache_summary(cache_dir, verbose) do
    size = dir_size(cache_dir)
    repos = list_cached_repos(cache_dir)

    Mix.shell().info("HuggingFace Hub cache: #{cache_dir}")
    Mix.shell().info("Total size: #{format_bytes(size)}")
    Mix.shell().info("Cached repos: #{length(repos)}")
    Mix.shell().info("")

    Enum.each(repos, fn repo -> print_repo_entry(cache_dir, repo, verbose) end)
  end

  defp print_repo_entry(cache_dir, repo, verbose) do
    repo_path = Path.join(cache_dir, repo)
    repo_size = dir_size(repo_path)
    human_name = String.replace(repo, "--", "/")

    Mix.shell().info("  #{human_name} (#{format_bytes(repo_size)})")
    if verbose, do: print_repo_revisions(repo_path)
  end

  defp print_repo_revisions(repo_path) do
    snapshots = Path.join(repo_path, "snapshots")

    if File.dir?(snapshots) do
      snapshots
      |> File.ls!()
      |> Enum.each(fn snap ->
        Mix.shell().info("    rev: #{String.slice(snap, 0, 8)}...")
      end)
    end
  end

  defp list_cached_repos(cache_dir) do
    case File.ls(cache_dir) do
      {:ok, entries} ->
        entries
        |> Enum.filter(fn e ->
          File.dir?(Path.join(cache_dir, e)) and String.contains?(e, "--")
        end)
        |> Enum.sort()

      _ ->
        []
    end
  end

  defp dir_size(path) do
    if File.dir?(path), do: sum_file_sizes(path), else: 0
  end

  defp sum_file_sizes(path) do
    "#{path}/**/*"
    |> Path.wildcard()
    |> Enum.filter(&File.regular?/1)
    |> Enum.reduce(0, &add_file_size/2)
  end

  defp add_file_size(file, acc) do
    case File.stat(file) do
      {:ok, %{size: s}} -> acc + s
      _ -> acc
    end
  end

  defp format_bytes(bytes) when bytes >= 1_073_741_824,
    do: "#{Float.round(bytes / 1_073_741_824, 1)} GB"

  defp format_bytes(bytes) when bytes >= 1_048_576,
    do: "#{Float.round(bytes / 1_048_576, 1)} MB"

  defp format_bytes(bytes) when bytes >= 1_024,
    do: "#{Float.round(bytes / 1_024, 1)} KB"

  defp format_bytes(bytes), do: "#{bytes} B"
end