Skip to content

Commit a583ab4

Browse files
committed
fix: decouple mocking from evals
1 parent 5e7912c commit a583ab4

File tree

13 files changed

+234
-193
lines changed

13 files changed

+234
-193
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath"
3-
version = "2.5.9"
3+
version = "2.5.10"
44
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

src/uipath/_cli/_evals/_models/_evaluation_set.py

Lines changed: 9 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
1-
from enum import Enum, IntEnum
2-
from typing import Annotated, Any, Literal, Union
1+
from enum import IntEnum
2+
from typing import Any, Literal
33

44
from pydantic import BaseModel, ConfigDict, Field
55
from pydantic.alias_generators import to_camel
66

7+
from uipath._cli._evals.mocks.strategy import (
8+
InputMockingStrategy,
9+
MockingStrategy,
10+
ToolSimulation,
11+
)
12+
713

814
class EvaluatorReference(BaseModel):
915
"""Reference to an evaluator with optional weight.
@@ -52,32 +58,6 @@ def serialize(instance: "EvaluatorReference") -> Any:
5258
)
5359

5460

55-
class EvaluationSimulationTool(BaseModel):
56-
name: str = Field(..., alias="name")
57-
58-
59-
class MockingStrategyType(str, Enum):
60-
LLM = "llm"
61-
MOCKITO = "mockito"
62-
UNKNOWN = "unknown"
63-
64-
65-
class BaseMockingStrategy(BaseModel):
66-
pass
67-
68-
69-
class ModelSettings(BaseModel):
70-
"""Model Generation Parameters."""
71-
72-
model: str = Field(..., alias="model")
73-
temperature: float | str | None = Field(default=None, alias="temperature")
74-
top_p: float | None = Field(default=None, alias="topP")
75-
top_k: int | None = Field(default=None, alias="topK")
76-
frequency_penalty: float | None = Field(default=None, alias="frequencyPenalty")
77-
presence_penalty: float | None = Field(default=None, alias="presencePenalty")
78-
max_tokens: int | None = Field(default=None, alias="maxTokens")
79-
80-
8161
class EvaluationSetModelSettings(BaseModel):
8262
"""Model setting overrides within evaluation sets with ID."""
8363

@@ -88,75 +68,6 @@ class EvaluationSetModelSettings(BaseModel):
8868
temperature: float | str | None = Field(default=None, alias="temperature")
8969

9070

91-
class LLMMockingStrategy(BaseMockingStrategy):
92-
type: Literal[MockingStrategyType.LLM] = MockingStrategyType.LLM
93-
prompt: str = Field(..., alias="prompt")
94-
tools_to_simulate: list[EvaluationSimulationTool] = Field(
95-
..., alias="toolsToSimulate"
96-
)
97-
model: ModelSettings | None = Field(None, alias="model")
98-
99-
model_config = ConfigDict(
100-
validate_by_name=True, validate_by_alias=True, extra="allow"
101-
)
102-
103-
104-
class InputMockingStrategy(BaseModel):
105-
prompt: str = Field(..., alias="prompt")
106-
model: ModelSettings | None = Field(None, alias="model")
107-
108-
model_config = ConfigDict(
109-
validate_by_name=True, validate_by_alias=True, extra="allow"
110-
)
111-
112-
113-
class MockingArgument(BaseModel):
114-
args: list[Any] = Field(default_factory=lambda: [], alias="args")
115-
kwargs: dict[str, Any] = Field(default_factory=lambda: {}, alias="kwargs")
116-
117-
118-
class MockingAnswerType(str, Enum):
119-
RETURN = "return"
120-
RAISE = "raise"
121-
122-
123-
class MockingAnswer(BaseModel):
124-
type: MockingAnswerType
125-
value: Any = Field(..., alias="value")
126-
127-
128-
class MockingBehavior(BaseModel):
129-
function: str = Field(..., alias="function")
130-
arguments: MockingArgument = Field(..., alias="arguments")
131-
then: list[MockingAnswer] = Field(..., alias="then")
132-
133-
134-
class MockitoMockingStrategy(BaseMockingStrategy):
135-
type: Literal[MockingStrategyType.MOCKITO] = MockingStrategyType.MOCKITO
136-
behaviors: list[MockingBehavior] = Field(..., alias="config")
137-
138-
model_config = ConfigDict(
139-
validate_by_name=True, validate_by_alias=True, extra="allow"
140-
)
141-
142-
143-
KnownMockingStrategy = Annotated[
144-
Union[LLMMockingStrategy, MockitoMockingStrategy],
145-
Field(discriminator="type"),
146-
]
147-
148-
149-
class UnknownMockingStrategy(BaseMockingStrategy):
150-
type: str = Field(..., alias="type")
151-
152-
model_config = ConfigDict(
153-
validate_by_name=True, validate_by_alias=True, extra="allow"
154-
)
155-
156-
157-
MockingStrategy = Union[KnownMockingStrategy, UnknownMockingStrategy]
158-
159-
16071
class EvaluationItem(BaseModel):
16172
"""Individual evaluation item within an evaluation set."""
16273

@@ -201,7 +112,7 @@ class LegacyEvaluationItem(BaseModel):
201112
simulation_instructions: str | None = Field(
202113
default=None, alias="simulationInstructions"
203114
)
204-
tools_to_simulate: list[EvaluationSimulationTool] = Field(
115+
tools_to_simulate: list[ToolSimulation] = Field(
205116
default_factory=list, alias="toolsToSimulate"
206117
)
207118

src/uipath/_cli/_evals/_runtime.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,9 @@ async def _execute_eval(
614614
eval_item, runtime
615615
)
616616

617-
set_execution_context(eval_item, self.span_collector, execution_id)
617+
set_execution_context(
618+
eval_item.mocking_strategy, self.span_collector, execution_id
619+
)
618620

619621
await self.event_bus.publish(
620622
EvaluationEvents.CREATE_EVAL_RUN,
@@ -868,8 +870,16 @@ async def _generate_input_for_eval(
868870
self, eval_item: EvaluationItem, runtime: UiPathRuntimeProtocol
869871
) -> EvaluationItem:
870872
"""Use LLM to generate a mock input for an evaluation item."""
873+
expected_output = (
874+
getattr(eval_item, "evaluation_criterias", None)
875+
or getattr(eval_item, "expected_output", None)
876+
or {}
877+
)
871878
generated_input = await generate_llm_input(
872-
eval_item, (await self.get_schema(runtime)).input
879+
eval_item.input_mocking_strategy,
880+
(await self.get_schema(runtime)).input,
881+
expected_behavior=eval_item.expected_agent_behavior or "",
882+
expected_output=expected_output,
873883
)
874884
updated_eval_item = eval_item.model_copy(update={"inputs": generated_input})
875885
return updated_eval_item

src/uipath/_cli/_evals/mocks/input_mocker.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
from datetime import datetime
55
from typing import Any
66

7-
from uipath._cli._evals._models._evaluation_set import EvaluationItem
7+
from uipath._cli._evals.mocks.strategy import (
8+
InputMockingStrategy,
9+
)
810
from uipath.platform import UiPath
911
from uipath.tracing import traced
1012

@@ -54,8 +56,10 @@ def get_input_mocking_prompt(
5456

5557
@traced(name="__mocker__", recording=False)
5658
async def generate_llm_input(
57-
evaluation_item: EvaluationItem,
59+
mocking_strategy: InputMockingStrategy,
5860
input_schema: dict[str, Any],
61+
expected_behavior: str,
62+
expected_output: dict[str, Any],
5963
) -> dict[str, Any]:
6064
"""Generate synthetic input using an LLM based on the evaluation context."""
6165
from .mocks import cache_manager_context
@@ -68,18 +72,12 @@ async def generate_llm_input(
6872
if "additionalProperties" not in input_schema:
6973
input_schema["additionalProperties"] = False
7074

71-
expected_output = (
72-
getattr(evaluation_item, "evaluation_criterias", None)
73-
or getattr(evaluation_item, "expected_output", None)
74-
or {}
75-
)
76-
7775
prompt_generation_args = {
7876
"input_schema": json.dumps(input_schema),
79-
"input_generation_instructions": evaluation_item.input_mocking_strategy.prompt
80-
if evaluation_item.input_mocking_strategy
77+
"input_generation_instructions": mocking_strategy.prompt
78+
if mocking_strategy
8179
else "",
82-
"expected_behavior": evaluation_item.expected_agent_behavior or "",
80+
"expected_behavior": expected_behavior or "",
8381
"expected_output": json.dumps(expected_output),
8482
}
8583

@@ -94,11 +92,7 @@ async def generate_llm_input(
9492
},
9593
}
9694

97-
model_parameters = (
98-
evaluation_item.input_mocking_strategy.model
99-
if evaluation_item.input_mocking_strategy
100-
else None
101-
)
95+
model_parameters = mocking_strategy.model if mocking_strategy else None
10296
completion_kwargs = (
10397
model_parameters.model_dump(by_alias=False, exclude_none=True)
10498
if model_parameters

src/uipath/_cli/_evals/mocks/llm_mocker.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,13 @@
66

77
from pydantic import BaseModel, TypeAdapter
88

9+
from uipath._cli._evals.mocks.strategy import (
10+
LLMMockingStrategy,
11+
MockingStrategy,
12+
)
913
from uipath.tracing import traced
1014
from uipath.tracing._utils import _SpanUtils
1115

12-
from .._models._evaluation_set import EvaluationItem, LLMMockingStrategy
1316
from .._models._mocks import ExampleCall
1417
from .mocker import (
1518
Mocker,
@@ -74,29 +77,27 @@ def pydantic_to_dict_safe(obj: Any) -> Any:
7477
class LLMMocker(Mocker):
7578
"""LLM Based Mocker."""
7679

77-
def __init__(self, evaluation_item: EvaluationItem):
80+
def __init__(self, mocking_strategy: MockingStrategy):
7881
"""LLM Mocker constructor."""
79-
self.evaluation_item = evaluation_item
80-
assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy)
82+
self.mocking_strategy = mocking_strategy
83+
assert isinstance(self.mocking_strategy, LLMMockingStrategy)
8184

8285
@traced(name="__mocker__", recording=False)
8386
async def response(
8487
self, func: Callable[[T], R], params: dict[str, Any], *args: T, **kwargs
8588
) -> R:
8689
"""Respond with mocked response generated by an LLM."""
87-
assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy)
90+
assert isinstance(self.mocking_strategy, LLMMockingStrategy)
8891

8992
function_name = params.get("name") or func.__name__
90-
if function_name in [
91-
x.name for x in self.evaluation_item.mocking_strategy.tools_to_simulate
92-
]:
93+
if function_name in [x.name for x in self.mocking_strategy.tools_to_simulate]:
9394
from uipath.platform import UiPath
9495
from uipath.platform.chat._llm_gateway_service import _cleanup_schema
9596

9697
from .mocks import (
9798
cache_manager_context,
98-
evaluation_context,
9999
execution_id_context,
100+
mocking_strategy_context,
100101
span_collector_context,
101102
)
102103

@@ -127,10 +128,10 @@ async def response(
127128
]
128129

129130
test_run_history = "(empty)"
130-
eval_item = evaluation_context.get()
131+
strategy = mocking_strategy_context.get()
131132
span_collector = span_collector_context.get()
132133
execution_id = execution_id_context.get()
133-
if eval_item and span_collector and execution_id:
134+
if strategy and span_collector and execution_id:
134135
spans = span_collector.get_spans(execution_id)
135136
test_run_history = _SpanUtils.spans_to_llm_context(spans)
136137

@@ -155,16 +156,16 @@ async def response(
155156
},
156157
"agentInfo": { # This is incomplete
157158
# "agentName": self.evaluation_item.name, # to be obtained.
158-
"actionName": self.evaluation_item.name, # Not sure if this is right?
159-
"userInput": self.evaluation_item.inputs,
159+
# "actionName": self.evaluation_item.name, # Not sure if this is right?
160+
# "userInput": self.evaluation_item.inputs,
160161
},
161-
"testRunProctorInstructions": self.evaluation_item.mocking_strategy.prompt,
162+
"testRunProctorInstructions": self.mocking_strategy.prompt,
162163
}
163164
prompt_generation_args = {
164165
k: json.dumps(pydantic_to_dict_safe(v))
165166
for k, v in prompt_input.items()
166167
}
167-
model_parameters = self.evaluation_item.mocking_strategy.model
168+
model_parameters = self.mocking_strategy.model
168169
completion_kwargs = (
169170
model_parameters.model_dump(by_alias=False, exclude_none=True)
170171
if model_parameters
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,25 @@
11
"""Mocker Factory."""
22

3-
from uipath._cli._evals._models._evaluation_set import (
4-
EvaluationItem,
5-
LLMMockingStrategy,
6-
MockitoMockingStrategy,
7-
)
83
from uipath._cli._evals.mocks.llm_mocker import LLMMocker
94
from uipath._cli._evals.mocks.mocker import Mocker
105
from uipath._cli._evals.mocks.mockito_mocker import MockitoMocker
6+
from uipath._cli._evals.mocks.strategy import (
7+
LLMMockingStrategy,
8+
MockingStrategy,
9+
MockitoMockingStrategy,
10+
)
1111

1212

1313
class MockerFactory:
1414
"""Mocker factory."""
1515

1616
@staticmethod
17-
def create(evaluation_item: EvaluationItem) -> Mocker:
17+
def create(strategy: MockingStrategy) -> Mocker:
1818
"""Create a mocker instance."""
19-
match evaluation_item.mocking_strategy:
19+
match strategy:
2020
case LLMMockingStrategy():
21-
return LLMMocker(evaluation_item)
21+
return LLMMocker(strategy)
2222
case MockitoMockingStrategy():
23-
return MockitoMocker(evaluation_item)
23+
return MockitoMocker(strategy)
2424
case _:
2525
raise ValueError("Unknown mocking strategy")

0 commit comments

Comments
 (0)