Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ async def node_handler(
# Final answer ifs

if state.sender == "TaskDecompositionAgent":
# Reset iteration count when entering from task decomposition
state.plan_controller_iteration_count = 0

# Add forced apps to api_intent_relevant_apps when arriving from task decomposition
force_lite_apps = getattr(settings.advanced_features, 'force_lite_mode_apps', [])
if force_lite_apps:
Expand Down Expand Up @@ -161,6 +164,21 @@ async def node_handler(

# Else is loop return
logger.debug("returning from planner or api agent")

# Increment and check iteration count to prevent unbounded loops
state.plan_controller_iteration_count += 1
max_iterations = getattr(settings.advanced_features, 'max_plan_iterations', 15)
if state.plan_controller_iteration_count > max_iterations:
logger.warning(
f"PlanController exceeded max iterations ({max_iterations}). "
f"Forcing task conclusion to prevent unbounded loop."
)
state.last_planner_answer = (
f"Task execution stopped: exceeded maximum of {max_iterations} planning iterations. "
f"Partial progress: {state.sub_tasks_progress}"
)
return Command(update=state.model_dump(), goto="FinalAnswerAgent")

if ignore_controller and state.last_planner_answer:
state.messages.append(
AIMessage(
Expand Down
3 changes: 3 additions & 0 deletions src/cuga/backend/cuga_graph/state/agent_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,9 @@ class AgentState(BaseModel):
tool_calls: List[Dict[str, Any]] = Field(
default_factory=list
) # List of tracked tool calls (when track_tool_calls is enabled)
plan_controller_iteration_count: int = (
0 # Tracks PlanController loop iterations to prevent unbounded loops
)

@property
def variables_manager(self) -> 'StateVariablesManager':
Expand Down
4 changes: 4 additions & 0 deletions src/cuga/sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -1633,6 +1633,8 @@ async def invoke(
run_config = config or {}
if "configurable" not in run_config:
run_config["configurable"] = {}
if "recursion_limit" not in run_config:
run_config["recursion_limit"] = 135 # Match agent_loop.py to prevent unbounded recursion

# Pass track_tool_calls flag via configurable
run_config["configurable"]["track_tool_calls"] = track_tool_calls
Expand Down Expand Up @@ -1851,6 +1853,8 @@ async def stream(
run_config = config or {}
if "configurable" not in run_config:
run_config["configurable"] = {}
if "recursion_limit" not in run_config:
run_config["recursion_limit"] = 135 # Match agent_loop.py to prevent unbounded recursion

# Handle resume case (message is None or action_response is provided)
if message is None or action_response is not None:
Expand Down
1 change: 1 addition & 0 deletions src/cuga/settings.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ sub_task_keep_last_n = 5 # Number of most recent generated variables to keep wh
code_executor_keep_last_n = -1 # Number of variables to keep after code execution: -1 = keep all, 1 = keep last 1, 2 = keep last 2, etc.
cuga_lite_max_steps = 70 # Maximum number of steps (call_model + sandbox cycles) before returning error
path_segment_index = 1 # Which path segment to use for operation naming (1 = first, 2 = second, 3 = third)
max_plan_iterations = 15 # Maximum PlanController iterations before forcing task conclusion
force_autonomous_mode = false
tool_call_timeout = 30 # Timeout in seconds for tool/API calls (sandbox operations). If exceeded, raises TimeoutError
execution_output_max_length = 3500 # Maximum characters to show in execution output (prevents token overflow)
Expand Down
168 changes: 168 additions & 0 deletions src/system_tests/unit/test_plan_controller_step_limit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
"""
Tests for PlanController iteration limit to prevent unbounded agent loops.

Verifies that PlanController forces task conclusion when max_plan_iterations is exceeded,
preventing the agent from running 800+ steps (issue #21).
"""

import pytest
from unittest.mock import AsyncMock, patch, MagicMock

from langchain_core.messages import AIMessage
from langgraph.types import Command

from cuga.backend.cuga_graph.state.agent_state import AgentState
from cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller import PlanControllerNode
from cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller_agent.prompts.load_prompt import (
PlanControllerOutput,
)


def _make_state(**overrides) -> AgentState:
"""Create a minimal AgentState for testing."""
defaults = {
"input": "test task",
"url": "",
"sender": "APIPlannerAgent",
"plan_controller_iteration_count": 0,
"sub_tasks_progress": [],
}
defaults.update(overrides)
return AgentState(**defaults)


def _make_llm_response(conclude_task=False) -> AIMessage:
"""Create a mock PlanControllerAgent LLM response."""
output = PlanControllerOutput(
thoughts=["thinking"],
next_subtask="do something" if not conclude_task else "",
subtasks_progress=["in-progress"],
conclude_task=conclude_task,
conclude_final_answer="done" if conclude_task else "",
next_subtask_app="test_app" if not conclude_task else "",
next_subtask_type="api" if not conclude_task else "",
)
return AIMessage(content=output.model_dump_json())


@pytest.mark.asyncio
async def test_plan_controller_forces_conclusion_when_max_iterations_exceeded():
"""PlanController should route to FinalAnswerAgent when iteration count exceeds max_plan_iterations."""
state = _make_state(
plan_controller_iteration_count=15, # Already at limit
sender="APIPlannerAgent",
last_planner_answer=None,
)

# Need at least 2 subtasks so ignore_controller is False
mock_task_decomposition = MagicMock()
mock_task_decomposition.task_decomposition = [MagicMock(), MagicMock()]
state.task_decomposition = mock_task_decomposition

mock_agent = AsyncMock()
mock_config = MagicMock()

with patch(
"cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.settings"
) as mock_settings:
mock_settings.advanced_features.max_plan_iterations = 15

result = await PlanControllerNode.node_handler(
state=state,
agent=mock_agent,
name="PlanControllerAgent",
config=mock_config,
)

assert isinstance(result, Command)
assert result.goto == "FinalAnswerAgent"
assert state.plan_controller_iteration_count == 16
assert "exceeded maximum" in state.last_planner_answer
# LLM should NOT have been called
mock_agent.run.assert_not_awaited()


@pytest.mark.asyncio
async def test_plan_controller_resets_count_on_first_entry():
"""PlanController should reset iteration count when entering from TaskDecompositionAgent."""
state = _make_state(
plan_controller_iteration_count=10, # Leftover from previous run
sender="TaskDecompositionAgent",
api_intent_relevant_apps=[],
)

# Single subtask so ignore_controller is True -> takes fast path
mock_subtask = MagicMock()
mock_subtask.task = "do something"
mock_subtask.app = "test_app"
mock_subtask.type = "api"
mock_task_decomposition = MagicMock()
mock_task_decomposition.task_decomposition = [mock_subtask]
state.task_decomposition = mock_task_decomposition

mock_agent = AsyncMock()
mock_config = MagicMock()

with patch(
"cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.settings"
) as mock_settings:
mock_settings.advanced_features.max_plan_iterations = 15
mock_settings.advanced_features.force_lite_mode_apps = []

with patch(
"cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.get_apis",
new_callable=AsyncMock,
return_value=[],
):
await PlanControllerNode.node_handler(
state=state,
agent=mock_agent,
name="PlanControllerAgent",
config=mock_config,
)

# Count should have been reset to 0
assert state.plan_controller_iteration_count == 0


@pytest.mark.asyncio
async def test_plan_controller_allows_iterations_within_limit():
"""PlanController should proceed normally when iteration count is within limit."""
state = _make_state(
plan_controller_iteration_count=5, # Well within limit
sender="APIPlannerAgent",
last_planner_answer=None,
)

mock_task_decomposition = MagicMock()
mock_task_decomposition.task_decomposition = [MagicMock(), MagicMock()]
state.task_decomposition = mock_task_decomposition

# LLM returns conclude_task=True
mock_agent = AsyncMock()
mock_agent.run.return_value = _make_llm_response(conclude_task=True)

mock_config = MagicMock()

with patch(
"cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.settings"
) as mock_settings:
mock_settings.advanced_features.max_plan_iterations = 15

with patch(
"cuga.backend.cuga_graph.nodes.task_decomposition_planning.plan_controller.tracker"
) as mock_tracker:
mock_tracker.collect_step = MagicMock()

result = await PlanControllerNode.node_handler(
state=state,
agent=mock_agent,
name="PlanControllerAgent",
config=mock_config,
)

assert isinstance(result, Command)
assert result.goto == "FinalAnswerAgent"
assert state.plan_controller_iteration_count == 6
# LLM should have been called (iteration was within limit)
mock_agent.run.assert_awaited_once()
Loading