diff --git a/pyproject.toml b/pyproject.toml index 395539a26..d331ae2f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.5.4" +version = "2.5.5" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/src/uipath/_cli/_evals/_progress_reporter.py b/src/uipath/_cli/_evals/_progress_reporter.py index 6c924ccc6..4bf4e74a8 100644 --- a/src/uipath/_cli/_evals/_progress_reporter.py +++ b/src/uipath/_cli/_evals/_progress_reporter.py @@ -422,8 +422,8 @@ async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None: try: eval_run_id = self.eval_run_ids.get(payload.execution_id) - # Use evalRunId as the trace_id for agent execution spans - # This makes all agent spans children of the eval run trace + # Use evalRunId as the trace_id for agent execution and evaluator spans + # This makes all spans children of the eval run trace if eval_run_id: self.spans_exporter.trace_id = eval_run_id else: @@ -433,8 +433,14 @@ async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None: self.eval_set_execution_id ) + # Export agent execution spans self.spans_exporter.export(payload.spans) + # Export evaluator spans (including LLM calls made by evaluators) + # with the same trace_id so they can be fetched together + if payload.evaluator_spans: + self.spans_exporter.export(payload.evaluator_spans) + for eval_result in payload.eval_results: evaluator_id = eval_result.evaluator_id if evaluator_id in self.evaluator_scores: diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 9b21f4d60..85141ea6a 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -778,6 +778,10 @@ async def _execute_eval( ) ) + # Collect evaluator spans after all evaluators have run + # These spans include LLM calls made by evaluators + evaluator_spans, _ = self._get_and_clear_execution_data(execution_id) + exception_details = None agent_output = agent_execution_output.result.output if agent_execution_output.result.status == UiPathRuntimeStatus.FAULTED: @@ -803,6 +807,7 @@ async def _execute_eval( agent_output=agent_output, agent_execution_time=agent_execution_output.execution_time, spans=agent_execution_output.spans, + evaluator_spans=evaluator_spans, logs=agent_execution_output.logs, exception_details=exception_details, ), @@ -821,6 +826,11 @@ async def _execute_eval( ) ) + # Collect any evaluator spans that were generated before the exception + exception_evaluator_spans, _ = self._get_and_clear_execution_data( + execution_id + ) + eval_run_updated_event = EvalRunUpdatedEvent( execution_id=execution_id, eval_item=eval_item, @@ -830,6 +840,7 @@ async def _execute_eval( agent_execution_time=0.0, exception_details=exception_details, spans=[], + evaluator_spans=exception_evaluator_spans, logs=[], ) if isinstance(e, EvaluationRuntimeException): diff --git a/src/uipath/_events/_events.py b/src/uipath/_events/_events.py index 3ac0c4967..632b1ce5f 100644 --- a/src/uipath/_events/_events.py +++ b/src/uipath/_events/_events.py @@ -49,6 +49,7 @@ class EvalRunUpdatedEvent(BaseModel): agent_output: Any agent_execution_time: float spans: list[ReadableSpan] + evaluator_spans: list[ReadableSpan] = [] logs: list[logging.LogRecord] exception_details: EvalItemExceptionDetails | None = None diff --git a/uv.lock b/uv.lock index c63e780a6..1cecf049f 100644 --- a/uv.lock +++ b/uv.lock @@ -2486,7 +2486,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.5.4" +version = "2.5.5" source = { editable = "." } dependencies = [ { name = "applicationinsights" },