Skip to main content

lib/jidoka/eval.ex

defmodule Jidoka.Eval do
  @moduledoc """
  Small deterministic eval runner for Jidoka harness flows.

  The runner intentionally delegates execution to `Jidoka.Harness`. It adds no
  new runtime path; it only packages an agent/request pair with assertions that
  are useful for examples, regression tests, and optional live smoke checks.
  """

  alias Jidoka.Effect
  alias Jidoka.Eval.{Case, Run}
  alias Jidoka.Harness
  alias Jidoka.Runtime.AgentSnapshot
  alias Jidoka.Schema
  alias Jidoka.Turn

  @type case_input :: Case.t() | keyword() | map()

  @doc "Runs one eval case through the harness."
  @spec run_case(case_input(), keyword()) :: {:ok, Run.t()} | {:error, term()}
  def run_case(eval_case_input, opts \\ []) do
    with {:ok, %Case{} = eval_case} <- Case.from_input(eval_case_input, opts) do
      eval_case
      |> execute(opts)
      |> build_run(eval_case)
    end
  end

  @doc "Evaluates supported assertions against a completed turn result."
  @spec evaluate(Case.t(), Turn.Result.t()) :: [Run.assertion()]
  def evaluate(%Case{assertions: assertions}, %Turn.Result{} = result) do
    []
    |> maybe_assert_contains(Schema.get_key(assertions, :contains), result)
    |> maybe_assert_equals(Schema.get_key(assertions, :equals), result)
    |> maybe_assert_operation_called(Schema.get_key(assertions, :operation_called), result)
  end

  defp execute(%Case{} = eval_case, opts) do
    Harness.run_turn(eval_case.agent, eval_case.request, opts)
  end

  defp build_run({:ok, %Turn.Result{} = result}, %Case{} = eval_case) do
    assertions = evaluate(eval_case, result)
    status = if Enum.all?(assertions, &(&1.status == :passed)), do: :passed, else: :failed

    Run.new(
      case_id: eval_case.id,
      status: status,
      result: result,
      assertions: assertions,
      observations: observations(result),
      metadata: eval_case.metadata
    )
  end

  defp build_run({:hibernate, %AgentSnapshot{} = snapshot}, %Case{} = eval_case) do
    Run.new(
      case_id: eval_case.id,
      status: :error,
      error: %{reason: :hibernated, snapshot: Jidoka.project(snapshot)},
      assertions: [],
      metadata: eval_case.metadata
    )
  end

  defp build_run({:error, reason}, %Case{} = eval_case) do
    Run.new(
      case_id: eval_case.id,
      status: :error,
      error: Jidoka.error_to_map(Jidoka.normalize_error(reason, operation: :eval)),
      assertions: [],
      metadata: eval_case.metadata
    )
  end

  defp maybe_assert_contains(assertions, nil, _result), do: assertions

  defp maybe_assert_contains(assertions, expected, %Turn.Result{content: content}) do
    expected
    |> List.wrap()
    |> Enum.reduce(assertions, fn expected, assertions ->
      append_assertion(assertions, %{
        name: :contains,
        status: assertion_status(is_binary(expected) and String.contains?(content, expected)),
        expected: expected,
        actual: content
      })
    end)
  end

  defp maybe_assert_equals(assertions, nil, _result), do: assertions

  defp maybe_assert_equals(assertions, expected, %Turn.Result{content: content}) do
    append_assertion(assertions, %{
      name: :equals,
      status: assertion_status(content == expected),
      expected: expected,
      actual: content
    })
  end

  defp maybe_assert_operation_called(assertions, nil, _result), do: assertions

  defp maybe_assert_operation_called(assertions, expected, %Turn.Result{} = result) do
    actual = operation_names(result)

    expected
    |> List.wrap()
    |> Enum.reduce(assertions, fn expected, assertions ->
      expected = operation_name(expected)

      append_assertion(assertions, %{
        name: :operation_called,
        status: assertion_status(expected in actual),
        expected: expected,
        actual: actual
      })
    end)
  end

  defp append_assertion(assertions, assertion), do: assertions ++ [assertion]

  defp assertion_status(true), do: :passed
  defp assertion_status(false), do: :failed

  defp operation_names(%Turn.Result{agent_state: %{operation_results: operation_results}}) do
    Enum.map(operation_results, fn
      %Effect.OperationResult{operation: operation} -> operation
      %{operation: operation} -> operation
      %{"operation" => operation} -> operation
      _other -> nil
    end)
    |> Enum.reject(&is_nil/1)
  end

  defp operation_name(name) when is_atom(name), do: Atom.to_string(name)
  defp operation_name(name) when is_binary(name), do: name
  defp operation_name(name), do: name

  defp observations(%Turn.Result{} = result) do
    %{
      content: result.content,
      operation_calls: operation_names(result),
      event_count: length(result.events),
      journal_intents: map_size(result.journal.intents),
      journal_results: map_size(result.journal.results)
    }
  end
end