Skip to content

Commit a8aeadf

Browse files
committed
fix: set data processing timeout to 1h
1 parent cb8889f commit a8aeadf

File tree

5 files changed

+11
-4
lines changed

5 files changed

+11
-4
lines changed

rdagent/app/finetune/llm/conf.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ class LLMFinetunePropSetting(ExtendedBaseSettings):
3939
# Timeouts (longer for LLM training, all for Docker container timeout)
4040
full_timeout: int = 360000
4141
"""Full training timeout in seconds (default 100 hours, env: FT_FULL_TIMEOUT). Used in running stage for complete model training."""
42-
data_processing_timeout: int = 14400
43-
"""Data processing script timeout in seconds (default 4 hours, env: FT_DATA_PROCESSING_TIMEOUT)."""
42+
data_processing_timeout: int = 3600
43+
"""Data processing script timeout in seconds (default 1 hour, env: FT_DATA_PROCESSING_TIMEOUT). Used for full data processing in running stage."""
44+
debug_data_processing_timeout: int = 1200
45+
"""Debug data processing timeout in seconds (default 20 minutes, env: FT_DEBUG_DATA_PROCESSING_TIMEOUT). Used for --debug mode in coding stage."""
4446
micro_batch_timeout: int = 1800
4547
"""Micro-batch test timeout in seconds (default 30 minutes, env: FT_MICRO_BATCH_TIMEOUT)."""
4648

rdagent/components/coder/finetune/conf.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def get_ft_env(
197197
# Select timeout based on operation type
198198
timeout_map = {
199199
"data_processing": FT_RD_SETTING.data_processing_timeout,
200+
"debug_data_processing": FT_RD_SETTING.debug_data_processing_timeout,
200201
"micro_batch": FT_RD_SETTING.micro_batch_timeout,
201202
"full_training": FT_RD_SETTING.full_timeout,
202203
}
@@ -229,6 +230,7 @@ def get_ft_env(
229230

230231
def get_data_processing_env(
231232
enable_cache: bool | None = None,
233+
is_debug: bool = False,
232234
) -> tuple[Env, dict]:
233235
"""Get environment for data processing scripts with LLM API access.
234236
@@ -239,13 +241,14 @@ def get_data_processing_env(
239241
240242
Args:
241243
enable_cache: Whether to enable Docker caching
244+
is_debug: Whether running in debug mode (shorter timeout, default 20 min vs 1 hour)
242245
243246
Returns:
244247
Tuple of (env, env_vars) where env_vars contains LLM API keys
245248
to be passed to env.run() as the env parameter
246249
"""
247250
env = get_ft_env(
248-
operation="data_processing",
251+
operation="debug_data_processing" if is_debug else "data_processing",
249252
enable_cache=enable_cache,
250253
)
251254

rdagent/components/coder/finetune/eval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def evaluate(
7777
# So we clear the workspace every time.
7878

7979
# Step 3: Execute script in DEBUG mode (generates ~10 samples for fast validation)
80-
env, env_vars = get_data_processing_env()
80+
env, env_vars = get_data_processing_env(is_debug=True)
8181

8282
# Clear workspace (except logs and file_dict items) before data processing
8383
clear_workspace(implementation, env=env)

rdagent/scenarios/finetune/scen/prompts.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ scenario_description: |-
6969
7070
## Timeout Constraints
7171
- Full Training Timeout: {{ full_timeout }}
72+
- Data Processing Timeout: {{ data_processing_timeout }}
7273
{% endif %}
7374
7475
## (Very important!)Sample Size Control (Code-Based, No LLM)

rdagent/scenarios/finetune/scen/scenario.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ def get_scenario_all_desc(self, enable_dataset_description: bool = False) -> str
298298
dataset_config=self.dataset_config,
299299
model_info=self.model_info,
300300
full_timeout=f"{self.real_full_timeout() / 60 / 60:.2f} hours",
301+
data_processing_timeout=f"{FT_RD_SETTING.data_processing_timeout / 60:.0f} minutes",
301302
enable_dataset_description=enable_dataset_description,
302303
upper_data_size_limit=FT_RD_SETTING.upper_data_size_limit,
303304
)

0 commit comments

Comments
 (0)