Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,15 @@ async def initiate_evaluation(
evaluation_set, _ = EvalHelpers.load_eval_set(
self.context.eval_set, self.context.eval_ids
)

# Validate that resume mode is not used with multiple evaluations
if self.context.resume and len(evaluation_set.evaluations) > 1:
raise ValueError(
f"Resume mode is not supported with multiple evaluations. "
f"Found {len(evaluation_set.evaluations)} evaluations in the set. "
f"Please run with a single evaluation using --eval-ids to specify one evaluation."
)

evaluators = await self._load_evaluators(evaluation_set, runtime)

await self.event_bus.publish(
Expand Down Expand Up @@ -892,15 +901,16 @@ async def execute_runtime(
# 4. Pass this map to the delegate runtime
if self.context.resume:
logger.info(f"Resuming evaluation {eval_item.id}")
options = UiPathExecuteOptions(resume=True)
result = await execution_runtime.execute(
input=input_overrides if self.context.job_id is None else None,
options=options,
)
input = input_overrides if self.context.job_id is None else None
else:
result = await execution_runtime.execute(
input=inputs_with_overrides,
)
input = inputs_with_overrides

# Always pass UiPathExecuteOptions explicitly for consistency with debug flow
options = UiPathExecuteOptions(resume=self.context.resume)
result = await execution_runtime.execute(
input=input,
options=options,
)

# Log suspend status if applicable
if result.status == UiPathRuntimeStatus.SUSPENDED:
Expand Down
37 changes: 37 additions & 0 deletions tests/cli/eval/evals/eval-sets/multiple-evals.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"version": "1.0",
"id": "multiple-eval-set-id",
"name": "Multiple Evaluations Set",
"evaluatorRefs": [
"ExactMatchEvaluator"
],
"evaluations": [
{
"id": "eval-1",
"name": "First Evaluation",
"inputs": {"foo": "bar"},
"evaluationCriterias": {
"ExactMatchEvaluator": {
"expectedOutput": {
"foo": "bar"
}
}
}
},
{
"id": "eval-2",
"name": "Second Evaluation",
"inputs": {"baz": "qux"},
"evaluationCriterias": {
"ExactMatchEvaluator": {
"expectedOutput": {
"baz": "qux"
}
}
}
}
],
"modelSettings": [],
"createdAt": "2025-09-04T18:54:58.379Z",
"updatedAt": "2025-09-04T18:55:55.416Z"
}
193 changes: 193 additions & 0 deletions tests/cli/eval/test_eval_resume_flow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
"""Unit tests for eval resume flow to ensure UiPathExecuteOptions is passed correctly."""

from pathlib import Path
from unittest.mock import AsyncMock, patch

import pytest
from uipath.core.tracing import UiPathTraceManager
from uipath.runtime import (
UiPathExecuteOptions,
UiPathRuntimeFactoryProtocol,
UiPathRuntimeProtocol,
UiPathRuntimeResult,
UiPathRuntimeStatus,
)

from uipath._cli._evals._runtime import UiPathEvalContext, UiPathEvalRuntime
from uipath._events._event_bus import EventBus

# ============================================================================
# Direct unit tests using mocks to verify the specific code path we changed
# ============================================================================
#
# These tests directly verify that UiPathExecuteOptions is being passed correctly
# in the execute_runtime method, which is the specific code path we modified.
#


@pytest.mark.asyncio
async def test_execute_runtime_method_passes_options_with_resume_false():
"""Direct test of execute_runtime method to verify UiPathExecuteOptions(resume=False) is passed."""
# Arrange
from uipath._cli._evals._models._evaluation_set import EvaluationItem

event_bus = EventBus()
trace_manager = UiPathTraceManager()
context = UiPathEvalContext()
context.eval_set = str(
Path(__file__).parent / "evals" / "eval-sets" / "default.json"
)
context.resume = False # Test resume=False

# Create a mock runtime that will be wrapped
mock_runtime = AsyncMock(spec=UiPathRuntimeProtocol)
mock_runtime.execute = AsyncMock(
return_value=UiPathRuntimeResult(
output={"result": "success"}, status=UiPathRuntimeStatus.SUCCESSFUL
)
)

# Create a mock factory
mock_factory = AsyncMock(spec=UiPathRuntimeFactoryProtocol)
mock_factory.new_runtime = AsyncMock(return_value=mock_runtime)

eval_runtime = UiPathEvalRuntime(context, mock_factory, trace_manager, event_bus)

eval_item = EvaluationItem(
id="test-eval",
name="Test Evaluation",
inputs={"foo": "bar"},
evaluation_criterias={},
)

# Act - Call execute_runtime directly
with patch(
"uipath._cli._evals._runtime.UiPathExecutionRuntime"
) as mock_execution_runtime_class:
# Set up the mock to capture the execute call
mock_execution_runtime_instance = AsyncMock()
mock_execution_runtime_instance.execute = AsyncMock(
return_value=UiPathRuntimeResult(
output={"result": "success"}, status=UiPathRuntimeStatus.SUCCESSFUL
)
)
mock_execution_runtime_class.return_value = mock_execution_runtime_instance

await eval_runtime.execute_runtime(
eval_item=eval_item, execution_id="test-exec-id", runtime=mock_runtime
)

# Assert - Verify that execute was called with UiPathExecuteOptions(resume=False)
assert mock_execution_runtime_instance.execute.called
call_args = mock_execution_runtime_instance.execute.call_args

# Extract the options argument
options = call_args.kwargs.get("options") or (
call_args.args[1] if len(call_args.args) > 1 else None
)

# Assert that options were passed and resume=False
assert options is not None, "UiPathExecuteOptions should be passed explicitly"
assert isinstance(options, UiPathExecuteOptions)
assert options.resume is False, (
"resume should be False when context.resume=False"
)


@pytest.mark.asyncio
async def test_execute_runtime_method_passes_options_with_resume_true():
"""Direct test of execute_runtime method to verify UiPathExecuteOptions(resume=True) is passed."""
# Arrange
from uipath._cli._evals._models._evaluation_set import EvaluationItem

event_bus = EventBus()
trace_manager = UiPathTraceManager()
context = UiPathEvalContext()
context.eval_set = str(
Path(__file__).parent / "evals" / "eval-sets" / "default.json"
)
context.resume = True # Test resume=True

# Create a mock runtime
mock_runtime = AsyncMock(spec=UiPathRuntimeProtocol)
mock_runtime.execute = AsyncMock(
return_value=UiPathRuntimeResult(
output={"result": "success"}, status=UiPathRuntimeStatus.SUCCESSFUL
)
)

# Create a mock factory
mock_factory = AsyncMock(spec=UiPathRuntimeFactoryProtocol)
mock_factory.new_runtime = AsyncMock(return_value=mock_runtime)

eval_runtime = UiPathEvalRuntime(context, mock_factory, trace_manager, event_bus)

eval_item = EvaluationItem(
id="test-eval",
name="Test Evaluation",
inputs={"foo": "bar"},
evaluation_criterias={},
)

# Act - Call execute_runtime directly
with patch(
"uipath._cli._evals._runtime.UiPathExecutionRuntime"
) as mock_execution_runtime_class:
# Set up the mock to capture the execute call
mock_execution_runtime_instance = AsyncMock()
mock_execution_runtime_instance.execute = AsyncMock(
return_value=UiPathRuntimeResult(
output={"result": "success"}, status=UiPathRuntimeStatus.SUCCESSFUL
)
)
mock_execution_runtime_class.return_value = mock_execution_runtime_instance

await eval_runtime.execute_runtime(
eval_item=eval_item, execution_id="test-exec-id", runtime=mock_runtime
)

# Assert - Verify that execute was called with UiPathExecuteOptions(resume=True)
assert mock_execution_runtime_instance.execute.called
call_args = mock_execution_runtime_instance.execute.call_args

# Extract the options argument
options = call_args.kwargs.get("options") or (
call_args.args[1] if len(call_args.args) > 1 else None
)

# Assert that options were passed and resume=True
assert options is not None, "UiPathExecuteOptions should be passed explicitly"
assert isinstance(options, UiPathExecuteOptions)
assert options.resume is True, "resume should be True when context.resume=True"


@pytest.mark.asyncio
async def test_resume_with_multiple_evaluations_raises_error():
"""Test that resume mode with multiple evaluations raises a ValueError."""
# Arrange
event_bus = EventBus()
trace_manager = UiPathTraceManager()
context = UiPathEvalContext()
context.eval_set = str(
Path(__file__).parent / "evals" / "eval-sets" / "multiple-evals.json"
)
context.resume = True # Enable resume mode

# Create a mock factory
mock_factory = AsyncMock(spec=UiPathRuntimeFactoryProtocol)
mock_runtime = AsyncMock(spec=UiPathRuntimeProtocol)
mock_factory.new_runtime = AsyncMock(return_value=mock_runtime)

eval_runtime = UiPathEvalRuntime(
context=context,
factory=mock_factory,
event_bus=event_bus,
trace_manager=trace_manager,
)

# Act & Assert
with pytest.raises(
ValueError,
match=r"Resume mode is not supported with multiple evaluations.*Found 2 evaluations",
):
await eval_runtime.initiate_evaluation(mock_runtime)