Skip to content

Commit 5aeebe5

Browse files
committed
fix hte
1 parent 4fc1e22 commit 5aeebe5

File tree

8 files changed

+160
-87
lines changed

8 files changed

+160
-87
lines changed

src/uipath/_cli/_evals/_evaluate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ async def evaluate(
1616
trace_manager: UiPathTraceManager,
1717
eval_context: UiPathEvalContext,
1818
event_bus: EventBus,
19-
live_tracking_exporter: LlmOpsHttpExporter | None = None,
19+
live_tracking_exporter: LlmOpsHttpExporter,
2020
) -> UiPathRuntimeResult:
2121
async with UiPathEvalRuntime(
2222
factory=runtime_factory,

src/uipath/_cli/_evals/_progress_reporter.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ async def wrapper(self, *args, **kwargs):
7676
class StudioWebProgressReporter:
7777
"""Handles reporting evaluation progress to StudioWeb."""
7878

79-
def __init__(self, live_tracking_exporter: LlmOpsHttpExporter | None = None):
79+
def __init__(self, live_tracking_exporter: LlmOpsHttpExporter):
8080
self.live_tracking_exporter = live_tracking_exporter
8181
logging.getLogger("uipath._cli.middlewares").setLevel(logging.CRITICAL)
8282
console_logger = ConsoleLogger.get_instance()
@@ -240,6 +240,9 @@ async def create_eval_set_run_sw(
240240
scoped="org" if self._is_localhost() else "tenant",
241241
)
242242
eval_set_run_id = json.loads(response.content)["id"]
243+
logger.info(
244+
f"[TraceID] StudioWeb backend generated eval_set_run_id = {eval_set_run_id}"
245+
)
243246
return eval_set_run_id
244247

245248
@gracefully_handle_errors
@@ -376,15 +379,28 @@ async def handle_create_eval_set_run(self, payload: EvalSetRunCreatedEvent) -> N
376379
evaluators=payload.evaluators,
377380
is_coded=is_coded,
378381
)
382+
logger.info(
383+
f"[TraceID] Generated new eval_set_run_id = {eval_set_run_id} (not provided by user)"
384+
)
385+
else:
386+
logger.info(
387+
f"[TraceID] Received eval_set_run_id from event = {eval_set_run_id} (from payload)"
388+
)
389+
390+
# Always set trace_id to ensure it matches eval_set_run_id
391+
# This ensures consistency whether the ID was provided by user or generated
392+
if self.live_tracking_exporter and eval_set_run_id:
393+
logger.info(
394+
f"[TraceID] Setting live_tracking_exporter.trace_id = {eval_set_run_id} "
395+
f"(current value: {getattr(self.live_tracking_exporter, 'trace_id', None)})"
396+
)
397+
self.live_tracking_exporter.trace_id = eval_set_run_id
398+
379399
self.eval_set_run_ids[payload.execution_id] = eval_set_run_id
380400
current_span = trace.get_current_span()
381401
if current_span.is_recording():
382402
current_span.set_attribute("eval_set_run_id", eval_set_run_id)
383403

384-
# Set trace_id for live tracking to group all spans under eval_set_run_id
385-
if self.live_tracking_exporter and eval_set_run_id:
386-
self.live_tracking_exporter.trace_id = eval_set_run_id
387-
388404
logger.debug(
389405
f"Created eval set run with ID: {eval_set_run_id} (coded={is_coded})"
390406
)

src/uipath/_cli/_evals/_runtime.py

Lines changed: 47 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,9 @@
88
from time import time
99
from typing import (
1010
Any,
11-
Awaitable,
12-
Iterable,
1311
Iterator,
1412
Protocol,
1513
Sequence,
16-
Tuple,
1714
runtime_checkable,
1815
)
1916

@@ -203,6 +200,15 @@ def _upsert_span_async(
203200

204201
def _upsert():
205202
try:
203+
span_type = (
204+
span.attributes.get("span_type") if span.attributes else "unknown"
205+
)
206+
span_name = span.name if hasattr(span, "name") else "unknown"
207+
exporter_trace_id = getattr(self.exporter, "trace_id", None)
208+
logger.debug(
209+
f"[TraceID] Upserting span '{span_name}' (type={span_type}) "
210+
f"with exporter.trace_id = {exporter_trace_id}"
211+
)
206212
if status_override:
207213
self.exporter.upsert_span(span, status_override=status_override)
208214
else:
@@ -337,7 +343,7 @@ def __init__(
337343
factory: UiPathRuntimeFactoryProtocol,
338344
trace_manager: UiPathTraceManager,
339345
event_bus: EventBus,
340-
live_tracking_exporter: "LlmOpsHttpExporter | None" = None,
346+
live_tracking_exporter: LlmOpsHttpExporter,
341347
):
342348
self.context: UiPathEvalContext = context
343349
# Wrap the factory to support model settings overrides
@@ -353,8 +359,11 @@ def __init__(
353359
self.trace_manager.tracer_provider.add_span_processor(span_processor)
354360

355361
# Live tracking processor for real-time span updates
356-
# Use provided exporter or create a new one
357-
self.live_tracking_exporter = live_tracking_exporter or LlmOpsHttpExporter()
362+
self.live_tracking_exporter = live_tracking_exporter
363+
logger.info(
364+
f"[TraceID] UiPathEvalRuntime initialized with live_tracking_exporter.trace_id = "
365+
f"{getattr(live_tracking_exporter, 'trace_id', None)}"
366+
)
358367
live_tracking_processor = LiveTrackingSpanProcessor(self.live_tracking_exporter)
359368
self.trace_manager.tracer_span_processors.append(live_tracking_processor)
360369
self.trace_manager.tracer_provider.add_span_processor(live_tracking_processor)
@@ -400,23 +409,38 @@ def _mocker_cache(self) -> Iterator[None]:
400409
cache_manager.flush()
401410
cache_manager_context.set(None)
402411

403-
async def initiate_evaluation(
404-
self,
405-
runtime: UiPathRuntimeProtocol,
406-
) -> Tuple[
407-
EvaluationSet,
408-
list[BaseEvaluator[Any, Any, Any]],
409-
Iterable[Awaitable[EvaluationRunResult]],
410-
]:
412+
async def execute(self) -> UiPathRuntimeResult:
413+
logger.info("=" * 80)
414+
logger.info("EVAL RUNTIME: Starting evaluation execution")
415+
logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}")
416+
logger.info(f"EVAL RUNTIME: Job ID: {self.context.job_id}")
417+
logger.info(f"EVAL RUNTIME: Resume mode: {self.context.resume}")
418+
if self.context.resume:
419+
logger.info(
420+
"🟢 EVAL RUNTIME: RESUME MODE ENABLED - Will resume from suspended state"
421+
)
422+
logger.info("=" * 80)
423+
424+
# Configure model settings override before creating runtime
425+
await self._configure_model_settings_override()
426+
427+
runtime = await self.factory.new_runtime(
428+
entrypoint=self.context.entrypoint or "",
429+
runtime_id=self.execution_id,
430+
)
431+
432+
# CRITICAL: Load eval set and publish CREATE_EVAL_SET_RUN event BEFORE creating any spans
433+
# This ensures eval_set_run_id is created and trace_id is set on the exporter early
411434
if self.context.eval_set is None:
412435
raise ValueError("eval_set must be provided for evaluation runs")
413436

414-
# Load eval set (path is already resolved in cli_eval.py)
415437
evaluation_set, _ = EvalHelpers.load_eval_set(
416438
self.context.eval_set, self.context.eval_ids
417439
)
418440
evaluators = await self._load_evaluators(evaluation_set, runtime)
419441

442+
# Publish CREATE_EVAL_SET_RUN event and WAIT for it to complete
443+
# This creates the eval_set_run_id in progress reporter and sets trace_id on exporter
420444
await self.event_bus.publish(
421445
EvaluationEvents.CREATE_EVAL_SET_RUN,
422446
EvalSetRunCreatedEvent(
@@ -429,33 +453,9 @@ async def initiate_evaluation(
429453
),
430454
)
431455

432-
return (
433-
evaluation_set,
434-
evaluators,
435-
(
436-
self._execute_eval(eval_item, evaluators, runtime)
437-
for eval_item in evaluation_set.evaluations
438-
),
439-
)
440-
441-
async def execute(self) -> UiPathRuntimeResult:
442-
logger.info("=" * 80)
443-
logger.info("EVAL RUNTIME: Starting evaluation execution")
444-
logger.info(f"EVAL RUNTIME: Execution ID: {self.execution_id}")
445-
logger.info(f"EVAL RUNTIME: Job ID: {self.context.job_id}")
446-
logger.info(f"EVAL RUNTIME: Resume mode: {self.context.resume}")
447-
if self.context.resume:
448-
logger.info(
449-
"🟢 EVAL RUNTIME: RESUME MODE ENABLED - Will resume from suspended state"
450-
)
451-
logger.info("=" * 80)
452-
453-
# Configure model settings override before creating runtime
454-
await self._configure_model_settings_override()
455-
456-
runtime = await self.factory.new_runtime(
457-
entrypoint=self.context.entrypoint or "",
458-
runtime_id=self.execution_id,
456+
logger.info(
457+
f"[TraceID] After CREATE_EVAL_SET_RUN event, exporter.trace_id = "
458+
f"{getattr(self.live_tracking_exporter, 'trace_id', None)}"
459459
)
460460
try:
461461
with self._mocker_cache():
@@ -475,11 +475,11 @@ async def execute(self) -> UiPathRuntimeResult:
475475
"Evaluation Set Run", attributes=span_attributes
476476
) as span:
477477
try:
478-
(
479-
evaluation_set,
480-
evaluators,
481-
evaluation_iterable,
482-
) = await self.initiate_evaluation(runtime)
478+
# Evaluation set and evaluators already loaded and event published above
479+
evaluation_iterable = (
480+
self._execute_eval(eval_item, evaluators, runtime)
481+
for eval_item in evaluation_set.evaluations
482+
)
483483
workers = self.context.workers or 1
484484
assert workers >= 1
485485
eval_run_result_list = await execute_parallel(

src/uipath/_cli/cli_eval.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
import ast
22
import asyncio
3+
import logging
34
import os
45
from typing import Any
56

67
import click
78
from uipath.core.tracing import UiPathTraceManager
89
from uipath.runtime import UiPathRuntimeContext, UiPathRuntimeFactoryRegistry
910

11+
logger = logging.getLogger(__name__)
12+
1013
from uipath._cli._evals._console_progress_reporter import ConsoleProgressReporter
1114
from uipath._cli._evals._evaluate import evaluate
1215
from uipath._cli._evals._progress_reporter import StudioWebProgressReporter
@@ -209,6 +212,9 @@ async def execute_eval():
209212
# Set trace_id early if eval_set_run_id is already known
210213
if eval_context.eval_set_run_id:
211214
live_tracking_exporter.trace_id = eval_context.eval_set_run_id
215+
logger.info(
216+
f"[TraceID] Set live_tracking_exporter.trace_id = {eval_context.eval_set_run_id} (user-provided via --eval-set-run-id)"
217+
)
212218

213219
if should_register_progress_reporter:
214220
progress_reporter = StudioWebProgressReporter(
@@ -233,11 +239,7 @@ async def execute_eval():
233239
eval_context.job_id = ctx.job_id
234240

235241
if ctx.job_id:
236-
# Create exporter with trace_id set to eval_set_run_id
237-
job_exporter = LlmOpsHttpExporter()
238-
if eval_context.eval_set_run_id:
239-
job_exporter.trace_id = eval_context.eval_set_run_id
240-
trace_manager.add_span_exporter(job_exporter)
242+
trace_manager.add_span_exporter(live_tracking_exporter)
241243

242244
if trace_file:
243245
trace_manager.add_span_exporter(

tests/cli/eval/test_eval_runtime_metadata.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@
2929
UiPathEvalRuntime,
3030
)
3131
from uipath._events._event_bus import EventBus
32+
from uipath.tracing import LlmOpsHttpExporter
33+
34+
35+
@pytest.fixture
36+
def live_tracking_exporter():
37+
"""Create a live tracking exporter for tests."""
38+
return LlmOpsHttpExporter()
3239

3340

3441
class MockRuntimeSchema(UiPathRuntimeSchema):
@@ -164,7 +171,9 @@ async def create_runtime():
164171
return BaseTestRuntime()
165172

166173
factory = MockFactory(create_runtime)
167-
return UiPathEvalRuntime(context, factory, trace_manager, event_bus)
174+
return UiPathEvalRuntime(
175+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
176+
)
168177

169178
def test_finds_model_in_direct_runtime(self, eval_runtime):
170179
"""Test finding agent model directly on runtime."""
@@ -228,7 +237,9 @@ async def create_runtime():
228237
factory = MockFactory(create_runtime)
229238
event_bus = EventBus()
230239
trace_manager = UiPathTraceManager()
231-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
240+
eval_runtime = UiPathEvalRuntime(
241+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
242+
)
232243

233244
runtime = await create_runtime()
234245
model = await eval_runtime._get_agent_model(runtime)
@@ -243,7 +254,9 @@ async def create_runtime():
243254
factory = MockFactory(create_runtime)
244255
event_bus = EventBus()
245256
trace_manager = UiPathTraceManager()
246-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
257+
eval_runtime = UiPathEvalRuntime(
258+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
259+
)
247260

248261
runtime = await create_runtime()
249262
model = await eval_runtime._get_agent_model(runtime)
@@ -258,7 +271,9 @@ async def create_runtime():
258271
factory = MockFactory(create_runtime)
259272
event_bus = EventBus()
260273
trace_manager = UiPathTraceManager()
261-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
274+
eval_runtime = UiPathEvalRuntime(
275+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
276+
)
262277

263278
runtime = await create_runtime()
264279

@@ -277,7 +292,9 @@ async def create_good_runtime():
277292
factory = MockFactory(create_good_runtime)
278293
event_bus = EventBus()
279294
trace_manager = UiPathTraceManager()
280-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
295+
eval_runtime = UiPathEvalRuntime(
296+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
297+
)
281298

282299
# Create a bad runtime that raises during get_agent_model
283300
class BadRuntime(BaseTestRuntime):
@@ -310,7 +327,9 @@ async def create_runtime():
310327
factory = MockFactory(create_runtime)
311328
event_bus = EventBus()
312329
trace_manager = UiPathTraceManager()
313-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
330+
eval_runtime = UiPathEvalRuntime(
331+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
332+
)
314333

315334
runtime = await create_runtime()
316335
schema = await eval_runtime.get_schema(runtime)
@@ -326,7 +345,9 @@ async def create_runtime():
326345
factory = MockFactory(create_runtime)
327346
event_bus = EventBus()
328347
trace_manager = UiPathTraceManager()
329-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
348+
eval_runtime = UiPathEvalRuntime(
349+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
350+
)
330351

331352
runtime = await create_runtime()
332353

@@ -346,7 +367,9 @@ async def create_runtime():
346367
factory = MockFactory(create_runtime)
347368
event_bus = EventBus()
348369
trace_manager = UiPathTraceManager()
349-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
370+
eval_runtime = UiPathEvalRuntime(
371+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
372+
)
350373

351374
runtime = await create_runtime()
352375

@@ -393,7 +416,9 @@ async def create_runtime():
393416
factory = MockFactory(create_runtime)
394417
event_bus = EventBus()
395418
trace_manager = UiPathTraceManager()
396-
eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus)
419+
eval_runtime = UiPathEvalRuntime(
420+
context, factory, trace_manager, event_bus, LlmOpsHttpExporter()
421+
)
397422

398423
model = await eval_runtime._get_agent_model(resumable_runtime)
399424
assert model == "gpt-4o-from-agent-json"

0 commit comments

Comments
 (0)