defmodule Mix.Tasks.Hf.Cache do
@shortdoc "Manage the local HuggingFace Hub cache"
@moduledoc """
Manage the local HuggingFace Hub cache directory.
# Show cache contents and size
$ mix hf.cache
# Show detailed file listing
$ mix hf.cache --verbose
# Delete a specific revision
$ mix hf.cache --delete-revision gpt2 --revision abc123def
# Delete all cached files for a repo
$ mix hf.cache --delete-repo my-org/my-model
# Show cache directory path
$ mix hf.cache --dir
## Options
* `--verbose` / `-v` — show detailed file listing
* `--dir` — print cache directory path and exit
* `--delete-revision REPO [--revision SHA]` — delete a cached revision
* `--delete-repo REPO` — delete all cached files for a repo
"""
use Mix.Task
@impl Mix.Task
def run(args) do
{opts, argv, _} =
OptionParser.parse(args,
aliases: [v: :verbose],
strict: [
verbose: :boolean,
dir: :boolean,
delete_revision: :string,
delete_repo: :string,
revision: :string
]
)
cache_dir = HuggingfaceClient.Config.cache_dir()
cond do
opts[:dir] ->
Mix.shell().info(cache_dir)
opts[:delete_revision] ->
repo = opts[:delete_revision]
rev = opts[:revision] || "main"
Mix.shell().info("Deleting revision #{rev} of #{repo}...")
case HuggingfaceClient.Hub.Cache.delete_revision(repo, revision: rev) do
:ok -> Mix.shell().info("✓ Deleted")
err -> Mix.raise("Failed: #{inspect(err)}")
end
opts[:delete_repo] ->
repo = opts[:delete_repo]
Mix.shell().info("Deleting all cached files for #{repo}...")
case HuggingfaceClient.Hub.Cache.delete_repo_cache(repo) do
:ok -> Mix.shell().info("✓ Deleted")
err -> Mix.raise("Failed: #{inspect(err)}")
end
true ->
print_cache_info(cache_dir, opts[:verbose] || false)
end
end
defp print_cache_info(cache_dir, verbose) do
if File.dir?(cache_dir) do
print_cache_summary(cache_dir, verbose)
else
Mix.shell().info("Cache directory not found: #{cache_dir}")
Mix.shell().info("No models have been cached yet.")
end
end
defp print_cache_summary(cache_dir, verbose) do
size = dir_size(cache_dir)
repos = list_cached_repos(cache_dir)
Mix.shell().info("HuggingFace Hub cache: #{cache_dir}")
Mix.shell().info("Total size: #{format_bytes(size)}")
Mix.shell().info("Cached repos: #{length(repos)}")
Mix.shell().info("")
Enum.each(repos, fn repo -> print_repo_entry(cache_dir, repo, verbose) end)
end
defp print_repo_entry(cache_dir, repo, verbose) do
repo_path = Path.join(cache_dir, repo)
repo_size = dir_size(repo_path)
human_name = String.replace(repo, "--", "/")
Mix.shell().info(" #{human_name} (#{format_bytes(repo_size)})")
if verbose, do: print_repo_revisions(repo_path)
end
defp print_repo_revisions(repo_path) do
snapshots = Path.join(repo_path, "snapshots")
if File.dir?(snapshots) do
snapshots
|> File.ls!()
|> Enum.each(fn snap ->
Mix.shell().info(" rev: #{String.slice(snap, 0, 8)}...")
end)
end
end
defp list_cached_repos(cache_dir) do
case File.ls(cache_dir) do
{:ok, entries} ->
entries
|> Enum.filter(fn e ->
File.dir?(Path.join(cache_dir, e)) and String.contains?(e, "--")
end)
|> Enum.sort()
_ ->
[]
end
end
defp dir_size(path) do
if File.dir?(path), do: sum_file_sizes(path), else: 0
end
defp sum_file_sizes(path) do
"#{path}/**/*"
|> Path.wildcard()
|> Enum.filter(&File.regular?/1)
|> Enum.reduce(0, &add_file_size/2)
end
defp add_file_size(file, acc) do
case File.stat(file) do
{:ok, %{size: s}} -> acc + s
_ -> acc
end
end
defp format_bytes(bytes) when bytes >= 1_073_741_824,
do: "#{Float.round(bytes / 1_073_741_824, 1)} GB"
defp format_bytes(bytes) when bytes >= 1_048_576,
do: "#{Float.round(bytes / 1_048_576, 1)} MB"
defp format_bytes(bytes) when bytes >= 1_024,
do: "#{Float.round(bytes / 1_024, 1)} KB"
defp format_bytes(bytes), do: "#{bytes} B"
end