diff --git a/src/cuga/backend/cuga_graph/nodes/task_decomposition_planning/plan_controller.py b/src/cuga/backend/cuga_graph/nodes/task_decomposition_planning/plan_controller.py index 39b8290f..955ca244 100644 --- a/src/cuga/backend/cuga_graph/nodes/task_decomposition_planning/plan_controller.py +++ b/src/cuga/backend/cuga_graph/nodes/task_decomposition_planning/plan_controller.py @@ -71,6 +71,9 @@ async def node_handler( # Final answer ifs if state.sender == "TaskDecompositionAgent": + # Reset iteration count when entering from task decomposition + state.plan_controller_iteration_count = 0 + # Add forced apps to api_intent_relevant_apps when arriving from task decomposition force_lite_apps = getattr(settings.advanced_features, 'force_lite_mode_apps', []) if force_lite_apps: @@ -161,6 +164,21 @@ async def node_handler( # Else is loop return logger.debug("returning from planner or api agent") + + # Increment and check iteration count to prevent unbounded loops + state.plan_controller_iteration_count += 1 + max_iterations = getattr(settings.advanced_features, 'max_plan_iterations', 15) + if state.plan_controller_iteration_count > max_iterations: + logger.warning( + f"PlanController exceeded max iterations ({max_iterations}). " + f"Forcing task conclusion to prevent unbounded loop." + ) + state.last_planner_answer = ( + f"Task execution stopped: exceeded maximum of {max_iterations} planning iterations. " + f"Partial progress: {state.sub_tasks_progress}" + ) + return Command(update=state.model_dump(), goto="FinalAnswerAgent") + if ignore_controller and state.last_planner_answer: state.messages.append( AIMessage( diff --git a/src/cuga/backend/cuga_graph/state/agent_state.py b/src/cuga/backend/cuga_graph/state/agent_state.py index 668991be..6373c759 100644 --- a/src/cuga/backend/cuga_graph/state/agent_state.py +++ b/src/cuga/backend/cuga_graph/state/agent_state.py @@ -969,6 +969,9 @@ class AgentState(BaseModel): tool_calls: List[Dict[str, Any]] = Field( default_factory=list ) # List of tracked tool calls (when track_tool_calls is enabled) + plan_controller_iteration_count: int = ( + 0 # Tracks PlanController loop iterations to prevent unbounded loops + ) @property def variables_manager(self) -> 'StateVariablesManager': diff --git a/src/cuga/sdk.py b/src/cuga/sdk.py index 9de1d755..fbd9cd67 100644 --- a/src/cuga/sdk.py +++ b/src/cuga/sdk.py @@ -1633,6 +1633,8 @@ async def invoke( run_config = config or {} if "configurable" not in run_config: run_config["configurable"] = {} + if "recursion_limit" not in run_config: + run_config["recursion_limit"] = 135 # Match agent_loop.py to prevent unbounded recursion # Pass track_tool_calls flag via configurable run_config["configurable"]["track_tool_calls"] = track_tool_calls @@ -1851,6 +1853,8 @@ async def stream( run_config = config or {} if "configurable" not in run_config: run_config["configurable"] = {} + if "recursion_limit" not in run_config: + run_config["recursion_limit"] = 135 # Match agent_loop.py to prevent unbounded recursion # Handle resume case (message is None or action_response is provided) if message is None or action_response is not None: diff --git a/src/cuga/settings.toml b/src/cuga/settings.toml index a4f7e857..423ca6b7 100644 --- a/src/cuga/settings.toml +++ b/src/cuga/settings.toml @@ -48,6 +48,7 @@ sub_task_keep_last_n = 5 # Number of most recent generated variables to keep wh code_executor_keep_last_n = -1 # Number of variables to keep after code execution: -1 = keep all, 1 = keep last 1, 2 = keep last 2, etc. cuga_lite_max_steps = 70 # Maximum number of steps (call_model + sandbox cycles) before returning error path_segment_index = 1 # Which path segment to use for operation naming (1 = first, 2 = second, 3 = third) +max_plan_iterations = 15 # Maximum PlanController iterations before forcing task conclusion force_autonomous_mode = false tool_call_timeout = 30 # Timeout in seconds for tool/API calls (sandbox operations). If exceeded, raises TimeoutError execution_output_max_length = 3500 # Maximum characters to show in execution output (prevents token overflow) diff --git a/src/system_tests/unit/test_plan_controller_step_limit.py b/src/system_tests/unit/test_plan_controller_step_limit.py new file mode 100644 index 00000000..6053e9c8 --- /dev/null +++ b/src/system_tests/unit/test_plan_controller_step_limit.py @@ -0,0 +1,168 @@ +""" +Tests for PlanController iteration limit to prevent unbounded agent loops. + +Verifies that PlanController forces task conclusion when max_plan_iterations is exceeded, +preventing the agent from running 800+ steps (issue #21). +""" + +import pytest +from unittest.mock import AsyncMock, patch, MagicMock + +from langchain_core.messages import AIMessage +from langgraph.types import Command + +from cuga.backend.cuga_graph.state.agent_state import AgentState +from cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller import PlanControllerNode +from cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller_agent.prompts.load_prompt import ( + PlanControllerOutput, +) + + +def _make_state(**overrides) -> AgentState: + """Create a minimal AgentState for testing.""" + defaults = { + "input": "test task", + "url": "", + "sender": "APIPlannerAgent", + "plan_controller_iteration_count": 0, + "sub_tasks_progress": [], + } + defaults.update(overrides) + return AgentState(**defaults) + + +def _make_llm_response(conclude_task=False) -> AIMessage: + """Create a mock PlanControllerAgent LLM response.""" + output = PlanControllerOutput( + thoughts=["thinking"], + next_subtask="do something" if not conclude_task else "", + subtasks_progress=["in-progress"], + conclude_task=conclude_task, + conclude_final_answer="done" if conclude_task else "", + next_subtask_app="test_app" if not conclude_task else "", + next_subtask_type="api" if not conclude_task else "", + ) + return AIMessage(content=output.model_dump_json()) + + +@pytest.mark.asyncio +async def test_plan_controller_forces_conclusion_when_max_iterations_exceeded(): + """PlanController should route to FinalAnswerAgent when iteration count exceeds max_plan_iterations.""" + state = _make_state( + plan_controller_iteration_count=15, # Already at limit + sender="APIPlannerAgent", + last_planner_answer=None, + ) + + # Need at least 2 subtasks so ignore_controller is False + mock_task_decomposition = MagicMock() + mock_task_decomposition.task_decomposition = [MagicMock(), MagicMock()] + state.task_decomposition = mock_task_decomposition + + mock_agent = AsyncMock() + mock_config = MagicMock() + + with patch( + "cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.settings" + ) as mock_settings: + mock_settings.advanced_features.max_plan_iterations = 15 + + result = await PlanControllerNode.node_handler( + state=state, + agent=mock_agent, + name="PlanControllerAgent", + config=mock_config, + ) + + assert isinstance(result, Command) + assert result.goto == "FinalAnswerAgent" + assert state.plan_controller_iteration_count == 16 + assert "exceeded maximum" in state.last_planner_answer + # LLM should NOT have been called + mock_agent.run.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_plan_controller_resets_count_on_first_entry(): + """PlanController should reset iteration count when entering from TaskDecompositionAgent.""" + state = _make_state( + plan_controller_iteration_count=10, # Leftover from previous run + sender="TaskDecompositionAgent", + api_intent_relevant_apps=[], + ) + + # Single subtask so ignore_controller is True -> takes fast path + mock_subtask = MagicMock() + mock_subtask.task = "do something" + mock_subtask.app = "test_app" + mock_subtask.type = "api" + mock_task_decomposition = MagicMock() + mock_task_decomposition.task_decomposition = [mock_subtask] + state.task_decomposition = mock_task_decomposition + + mock_agent = AsyncMock() + mock_config = MagicMock() + + with patch( + "cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.settings" + ) as mock_settings: + mock_settings.advanced_features.max_plan_iterations = 15 + mock_settings.advanced_features.force_lite_mode_apps = [] + + with patch( + "cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.get_apis", + new_callable=AsyncMock, + return_value=[], + ): + await PlanControllerNode.node_handler( + state=state, + agent=mock_agent, + name="PlanControllerAgent", + config=mock_config, + ) + + # Count should have been reset to 0 + assert state.plan_controller_iteration_count == 0 + + +@pytest.mark.asyncio +async def test_plan_controller_allows_iterations_within_limit(): + """PlanController should proceed normally when iteration count is within limit.""" + state = _make_state( + plan_controller_iteration_count=5, # Well within limit + sender="APIPlannerAgent", + last_planner_answer=None, + ) + + mock_task_decomposition = MagicMock() + mock_task_decomposition.task_decomposition = [MagicMock(), MagicMock()] + state.task_decomposition = mock_task_decomposition + + # LLM returns conclude_task=True + mock_agent = AsyncMock() + mock_agent.run.return_value = _make_llm_response(conclude_task=True) + + mock_config = MagicMock() + + with patch( + "cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.settings" + ) as mock_settings: + mock_settings.advanced_features.max_plan_iterations = 15 + + with patch( + "cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.tracker" + ) as mock_tracker: + mock_tracker.collect_step = MagicMock() + + result = await PlanControllerNode.node_handler( + state=state, + agent=mock_agent, + name="PlanControllerAgent", + config=mock_config, + ) + + assert isinstance(result, Command) + assert result.goto == "FinalAnswerAgent" + assert state.plan_controller_iteration_count == 6 + # LLM should have been called (iteration was within limit) + mock_agent.run.assert_awaited_once()