diff --git a/pyproject.toml b/pyproject.toml
index 395539a26..d331ae2f6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath"
-version = "2.5.4"
+version = "2.5.5"
 description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
diff --git a/src/uipath/_cli/_evals/_progress_reporter.py b/src/uipath/_cli/_evals/_progress_reporter.py
index 6c924ccc6..4bf4e74a8 100644
--- a/src/uipath/_cli/_evals/_progress_reporter.py
+++ b/src/uipath/_cli/_evals/_progress_reporter.py
@@ -422,8 +422,8 @@ async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
         try:
             eval_run_id = self.eval_run_ids.get(payload.execution_id)
 
-            # Use evalRunId as the trace_id for agent execution spans
-            # This makes all agent spans children of the eval run trace
+            # Use evalRunId as the trace_id for agent execution and evaluator spans
+            # This makes all spans children of the eval run trace
             if eval_run_id:
                 self.spans_exporter.trace_id = eval_run_id
             else:
@@ -433,8 +433,14 @@ async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None:
                         self.eval_set_execution_id
                     )
 
+            # Export agent execution spans
             self.spans_exporter.export(payload.spans)
 
+            # Export evaluator spans (including LLM calls made by evaluators)
+            # with the same trace_id so they can be fetched together
+            if payload.evaluator_spans:
+                self.spans_exporter.export(payload.evaluator_spans)
+
             for eval_result in payload.eval_results:
                 evaluator_id = eval_result.evaluator_id
                 if evaluator_id in self.evaluator_scores:
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
index 9b21f4d60..85141ea6a 100644
--- a/src/uipath/_cli/_evals/_runtime.py
+++ b/src/uipath/_cli/_evals/_runtime.py
@@ -778,6 +778,10 @@ async def _execute_eval(
                         )
                     )
 
+                # Collect evaluator spans after all evaluators have run
+                # These spans include LLM calls made by evaluators
+                evaluator_spans, _ = self._get_and_clear_execution_data(execution_id)
+
                 exception_details = None
                 agent_output = agent_execution_output.result.output
                 if agent_execution_output.result.status == UiPathRuntimeStatus.FAULTED:
@@ -803,6 +807,7 @@ async def _execute_eval(
                         agent_output=agent_output,
                         agent_execution_time=agent_execution_output.execution_time,
                         spans=agent_execution_output.spans,
+                        evaluator_spans=evaluator_spans,
                         logs=agent_execution_output.logs,
                         exception_details=exception_details,
                     ),
@@ -821,6 +826,11 @@ async def _execute_eval(
                         )
                     )
 
+                # Collect any evaluator spans that were generated before the exception
+                exception_evaluator_spans, _ = self._get_and_clear_execution_data(
+                    execution_id
+                )
+
                 eval_run_updated_event = EvalRunUpdatedEvent(
                     execution_id=execution_id,
                     eval_item=eval_item,
@@ -830,6 +840,7 @@ async def _execute_eval(
                     agent_execution_time=0.0,
                     exception_details=exception_details,
                     spans=[],
+                    evaluator_spans=exception_evaluator_spans,
                     logs=[],
                 )
                 if isinstance(e, EvaluationRuntimeException):
diff --git a/src/uipath/_events/_events.py b/src/uipath/_events/_events.py
index 3ac0c4967..632b1ce5f 100644
--- a/src/uipath/_events/_events.py
+++ b/src/uipath/_events/_events.py
@@ -49,6 +49,7 @@ class EvalRunUpdatedEvent(BaseModel):
     agent_output: Any
     agent_execution_time: float
     spans: list[ReadableSpan]
+    evaluator_spans: list[ReadableSpan] = []
     logs: list[logging.LogRecord]
     exception_details: EvalItemExceptionDetails | None = None
 
diff --git a/uv.lock b/uv.lock
index c63e780a6..1cecf049f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2486,7 +2486,7 @@ wheels = [
 
 [[package]]
 name = "uipath"
-version = "2.5.4"
+version = "2.5.5"
 source = { editable = "." }
 dependencies = [
     { name = "applicationinsights" },