defmodule CMDCEval.Suites.BFCL do
@moduledoc """
Berkeley Function Calling Leaderboard v3 (BFCL) Suite —— 占位实现 + fixtures 接入路径。
## 数据来源
BFCL fixtures 来自上游公开仓库:
<https://github.com/ShishirPatil/gorilla/tree/main/berkeley-function-call-leaderboard>
v0.1 实现:
- **fixtures 文件存放**:`priv/bfcl/v3/<category>.jsonl`(gitignore 不入 hex 包)
- **`mix cmdc.eval.fetch_bfcl`**:从上游公开仓库 `git clone` + 转换格式
- **`cases/0` 自动 load**:若 fixtures 目录不存在,返回空列表 + warning
- **`assert/2`**:按 BFCL ground truth 比对(v0.1 只比 tool name + 主要参数)
> v0.1 故意保持小范围:10 个用例占位 + 完整 fetch path。
> 更复杂的 sub-category (AST / executable / multi_turn 等) 留 v0.2 扩展。
## 限制
- 不内置 fixtures(许可证 + 文件大小考虑)
- 跑前需手动执行 `mix cmdc.eval.fetch_bfcl`
- v0.1 只覆盖 BFCL "simple" 子类(v1.0 公开 fixtures)
## 替代方案
如果 fixtures fetch 失败,可用 `CMDCEval.Suites.Internal` 做 cmdc kernel
自验证;BFCL 主要用于跨模型 / 跨 Agent kernel 横向比较。
"""
require Logger
@behaviour CMDCEval.Suite
alias CMDCEval.Case
@impl true
def name, do: "bfcl"
@impl true
def cases do
fixtures_dir = Application.get_env(:cmdc_eval, :bfcl_fixtures_dir, "priv/bfcl/v3")
case load_fixtures(fixtures_dir) do
{:ok, cases} ->
cases
{:error, :no_fixtures} ->
Logger.warning(
"[CMDCEval.Suites.BFCL] fixtures 不存在 (#{fixtures_dir}),返回空 cases 列表;" <>
"运行 `mix cmdc.eval.fetch_bfcl` 拉取 upstream fixtures"
)
[]
end
end
@impl true
def assert(%Case{expected: %{tool_called: name}}, reply) when is_binary(reply) do
# v0.1 简化:仅比对 reply 文本中是否提到该工具名(真 BFCL 需要 trace 工具调用)
String.contains?(String.downcase(reply), String.downcase(name))
end
def assert(%Case{expected: %{contains: text}}, reply) when is_binary(reply) do
String.contains?(reply, text)
end
def assert(_, _), do: false
@impl true
def default_tools, do: []
@impl true
def cost_estimator(%{tokens_in: ti, tokens_out: to}) do
Float.round((ti * 3.0 + to * 15.0) / 1_000_000.0, 6)
end
# ==========================================================================
# 私有 — fixtures 加载
# ==========================================================================
defp load_fixtures(dir) do
expanded = Path.expand(dir)
if File.dir?(expanded) do
cases =
expanded
|> File.ls!()
|> Enum.filter(&String.ends_with?(&1, ".jsonl"))
|> Enum.flat_map(fn fname -> parse_jsonl(Path.join(expanded, fname)) end)
if cases == [], do: {:error, :no_fixtures}, else: {:ok, cases}
else
{:error, :no_fixtures}
end
end
defp parse_jsonl(path) do
path
|> File.stream!()
|> Stream.map(&String.trim/1)
|> Stream.reject(&(&1 == ""))
|> Stream.map(&Jason.decode!/1)
|> Stream.map(&row_to_case/1)
|> Enum.to_list()
end
defp row_to_case(%{"id" => id, "question" => q} = row) do
expected =
cond do
Map.has_key?(row, "function") -> %{tool_called: row["function"]}
Map.has_key?(row, "ground_truth") -> %{contains: row["ground_truth"]}
true -> nil
end
Case.new(id: id, input: q, expected: expected, metadata: row)
end
defp row_to_case(_), do: nil
end