Skip to main content

lib/cmdc_rag_arcana.ex

defmodule CMDCRAGArcana do
  @moduledoc """
  Arcana-backed enterprise RAG adapter for CMDC.

  本包不改变 `cmdc` core,而是通过标准 `CMDC.Tool` 与 `CMDC.Plugin`
  边界把 Arcana 的 search/answer 能力挂入 Agent Runtime。第一版只提供
  只读检索与问答,不允许 Agent 直接 ingest / delete 企业知识库文档。

  ## 快速使用

      {:ok, session} =
        CMDC.create_agent(
          model: "anthropic:claude-sonnet-4-5",
          tools: [
            CMDCRAGArcana.Tool.Search,
            CMDCRAGArcana.Tool.Answer,
            CMDCRAGArcana.Tool.IngestStatus,
            CMDCRAGArcana.Tool.GraphStatus,
            CMDCRAGArcana.Tool.GraphSearch
          ],
          plugins: [
            CMDCRAGArcana.Plugin.AccessControl,
            CMDCRAGArcana.Plugin.CitationAudit
          ],
          user_data: %{
            tenant_id: "tenant-a",
            user_id: "u-1",
            roles: ["ops"],
            cmdc_rag_arcana: %{
              repo: MyApp.Repo,
              llm: "openai:gpt-4o-mini",
              status_backend: MyApp.Knowledge.RAGStatusBackend,
              graph_profiles: [%{id: "contract_graph", mode: :relationship_graph}],
              graph_policies: [%{profile_id: "contract_graph", collections: ["contracts"]}],
              allowed_collections: ["policies"]
            }
          }
        )

  集成测试可以把 `:backend` / `:status_backend` 改为自定义模块,从而避免
  真实 Repo / pgvector / LLM / 企业控制面数据库。
  """

  alias CMDCRAGArcana.{Config, SearchResult, Telemetry}
  alias CMDCRAGArcana.Graph.{Evidence, Preflight, Status}

  @doc "返回当前包版本号。"
  @spec version() :: String.t()
  def version, do: Mix.Project.config()[:version] || "0.1.0"

  @doc """
  执行 Arcana search 并归一化为 `CMDCRAGArcana.SearchResult`。
  """
  @spec search(String.t(), keyword()) :: {:ok, SearchResult.t()} | {:error, term()}
  def search(query, opts) when is_binary(query) and is_list(opts) do
    config = Config.new!(opts)

    Telemetry.span(:search, query, config, fn ->
      config.backend.search(query, Config.to_backend_opts(config))
    end)
    |> normalize_search(query, config)
  end

  @doc """
  执行 Arcana answer 并归一化为 `CMDCRAGArcana.SearchResult`。
  """
  @spec answer(String.t(), keyword()) :: {:ok, SearchResult.t()} | {:error, term()}
  def answer(question, opts) when is_binary(question) and is_list(opts) do
    config = Config.new!(opts)

    Telemetry.span(:answer, question, config, fn ->
      config.backend.answer(question, Config.to_backend_opts(config))
    end)
    |> normalize_answer(question, config)
  end

  defp normalize_search({:ok, raw_results}, query, config) when is_list(raw_results) do
    {:ok, SearchResult.from_search(query, raw_results, config)}
  end

  defp normalize_search({:error, reason}, _query, _config), do: {:error, reason}

  defp normalize_answer({:ok, answer, context}, question, config)
       when is_binary(answer) and is_list(context) do
    {:ok, SearchResult.from_answer(question, answer, context, config)}
  end

  defp normalize_answer({:ok, %{answer: answer, context: context}}, question, config)
       when is_binary(answer) and is_list(context) do
    {:ok, SearchResult.from_answer(question, answer, context, config)}
  end

  defp normalize_answer({:ok, answer}, question, config) when is_binary(answer) do
    {:ok, SearchResult.from_answer(question, answer, [], config)}
  end

  defp normalize_answer({:error, reason}, _question, _config), do: {:error, reason}

  @doc """
  按企业预配置 Pipeline preset 执行 Arcana Pipeline answer。
  """
  @spec pipeline_answer(String.t(), keyword()) :: {:ok, SearchResult.t()} | {:error, term()}
  def pipeline_answer(question, opts) when is_binary(question) and is_list(opts) do
    CMDCRAGArcana.Pipeline.answer(question, opts)
  end

  @doc """
  执行 GraphRAG 只读检索。

  该函数只启用 Arcana graph/fusion search,不会触发 graph rebuild、entity
  embedding、community detect 或 summary。
  """
  @spec graph_search(String.t(), keyword()) :: {:ok, SearchResult.t()} | {:error, term()}
  def graph_search(query, opts) when is_binary(query) and is_list(opts) do
    config = Config.new!(opts)

    with {:ok, preflight} <- Preflight.run(config),
         :ok <- require_graph_preflight(preflight),
         {:ok, raw_results} <- config.backend.search(query, graph_backend_opts(config)),
         {:ok, evidence} <- Evidence.get(query, config) do
      {:ok,
       query
       |> SearchResult.from_search(raw_results, config)
       |> Evidence.attach(evidence)}
    end
  end

  @doc "只读查询 GraphRAG 状态。"
  @spec graph_status(keyword() | map()) :: {:ok, Status.t()} | {:error, term()}
  def graph_status(opts), do: Status.get(opts)

  defp graph_backend_opts(%Config{} = config) do
    config
    |> Config.to_backend_opts()
    |> Keyword.put(:graph, true)
  end

  defp require_graph_preflight(%{status: :failed, checks: checks}) do
    {:error, {:graph_preflight_failed, failed_check_ids(checks)}}
  end

  defp require_graph_preflight(_preflight), do: :ok

  defp failed_check_ids(checks) do
    checks
    |> Enum.filter(&(&1.status == :failed))
    |> Enum.map(& &1.id)
  end
end