|
23 | 23 | from magentic_ui.types import CheckpointEvent |
24 | 24 | from magentic_ui.agents import WebSurfer, CoderAgent, FileSurfer |
25 | 25 | from magentic_ui.teams import GroupChat |
26 | | -from magentic_ui.agents.users import MetadataUserProxy |
| 26 | +from magentic_ui.agents.users import MetadataUserProxy, DummyUserProxy |
27 | 27 | from magentic_ui.tools.playwright.browser import VncDockerPlaywrightBrowser |
28 | 28 | from magentic_ui.tools.playwright.browser.utils import get_available_port |
29 | 29 | from magentic_ui.approval_guard import ( |
@@ -106,7 +106,11 @@ def __init__( |
106 | 106 | self, |
107 | 107 | name: str = "MagenticUISimUserSystem", |
108 | 108 | simulated_user_type: Literal[ |
109 | | - "co-planning", "co-execution", "co-planning-and-execution", "none" |
| 109 | + "co-planning", |
| 110 | + "co-execution", |
| 111 | + "co-planning-and-execution", |
| 112 | + "none", |
| 113 | + "dummy", |
110 | 114 | ] = "none", |
111 | 115 | how_helpful_user_proxy: Literal["strict", "soft", "no_hints"] = "soft", |
112 | 116 | web_surfer_only: bool = False, |
@@ -217,7 +221,7 @@ async def _runner() -> Tuple[str, List[str]]: |
217 | 221 | if self.simulated_user_type in ["co-execution", "none"] |
218 | 222 | else True, |
219 | 223 | autonomous_execution=True |
220 | | - if self.simulated_user_type in ["co-planning", "none"] |
| 224 | + if self.simulated_user_type in ["co-planning", "none", "dummy"] |
221 | 225 | else False, |
222 | 226 | allow_follow_up_input=False, |
223 | 227 | final_answer_prompt=FINAL_ANSWER_PROMPT, |
@@ -312,6 +316,10 @@ def get_model_client( |
312 | 316 |
|
313 | 317 | if self.simulated_user_type == "none": |
314 | 318 | user_proxy = None |
| 319 | + elif self.simulated_user_type == "dummy": |
| 320 | + user_proxy = DummyUserProxy( |
| 321 | + name="user_proxy", |
| 322 | + ) |
315 | 323 | else: |
316 | 324 | user_proxy = MetadataUserProxy( |
317 | 325 | name="user_proxy", |
@@ -346,7 +354,7 @@ def get_model_client( |
346 | 354 | from autogen_core import CancellationToken |
347 | 355 | from autogen_core.models import UserMessage |
348 | 356 |
|
349 | | - prompt = f"""Rewrite the following helpful hints to help solve the task, but remove any information that directly reveals the answer. \nKeep the hints as close to the original as possible but remove any information that directly reveals the answer.\nHelpful hints: {task_metadata}\n\nAnswer: {getattr(task, 'ground_truth', '')}\n\nDo not include anything else in your response except the rewritten hints.\nRewritten helpful hints:""" |
| 357 | + prompt = f"""Rewrite the following helpful hints to help solve the task, but remove any information that directly reveals the answer. \nKeep the hints as close to the original as possible but remove any information that directly reveals the answer.\nHelpful hints: {task_metadata}\n\nAnswer: {getattr(task, "ground_truth", "")}\n\nDo not include anything else in your response except the rewritten hints.\nRewritten helpful hints:""" |
350 | 358 | result = await model_client_orch.create( |
351 | 359 | messages=[UserMessage(content=prompt, source="user")], |
352 | 360 | cancellation_token=CancellationToken(), |
@@ -410,16 +418,17 @@ def get_model_client( |
410 | 418 | # Convert list of logevent objects to list of dicts |
411 | 419 | messages_json = [msg.model_dump() for msg in messages_so_far] |
412 | 420 | await f.write(json.dumps(messages_json, indent=2)) |
| 421 | + await f.flush() # Flush to disk immediately |
413 | 422 | # how the final answer is formatted: "Final Answer: FINAL ANSWER: Actual final answer" |
414 | 423 |
|
415 | 424 | if message_str.startswith("Final Answer:"): |
416 | 425 | answer = message_str[len("Final Answer:") :].strip() |
417 | 426 | # remove the "FINAL ANSWER:" part and get the string after it |
418 | 427 | answer = answer.split("FINAL ANSWER:")[1].strip() |
419 | 428 |
|
420 | | - assert isinstance( |
421 | | - answer, str |
422 | | - ), f"Expected answer to be a string, got {type(answer)}" |
| 429 | + assert isinstance(answer, str), ( |
| 430 | + f"Expected answer to be a string, got {type(answer)}" |
| 431 | + ) |
423 | 432 |
|
424 | 433 | # save the usage of each of the client in a usage json file |
425 | 434 | def get_usage(model_client: ChatCompletionClient) -> Dict[str, int]: |
@@ -447,8 +456,8 @@ def get_usage(model_client: ChatCompletionClient) -> Dict[str, int]: |
447 | 456 | if key != "user_proxy" |
448 | 457 | ), |
449 | 458 | } |
450 | | - with open(f"{output_dir}/model_tokens_usage.json", "w") as f: |
451 | | - json.dump(usage_json, f) |
| 459 | + async with aiofiles.open(f"{output_dir}/model_tokens_usage.json", "w") as f: |
| 460 | + await f.write(json.dumps(usage_json, indent=2)) |
452 | 461 |
|
453 | 462 | await team.close() |
454 | 463 | # Step 5: Prepare the screenshots |
|
0 commit comments