Skip to content

Commit ddb3b61

Browse files
committed
Fixing worker-state initialization bug
1 parent 6edf3a0 commit ddb3b61

File tree

2 files changed

+152
-17
lines changed

2 files changed

+152
-17
lines changed

runpod/serverless/modules/worker_state.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -67,36 +67,41 @@ class JobsProgress:
6767
"""Track the state of current jobs in progress using shared memory."""
6868

6969
_instance: Optional['JobsProgress'] = None
70-
_manager: SyncManager
71-
_shared_data: Any
72-
_lock: Any
73-
70+
# Singleton
7471
def __new__(cls):
7572
if cls._instance is None:
76-
instance = object.__new__(cls)
77-
# Initialize instance variables
78-
instance._manager = Manager()
79-
instance._shared_data = instance._manager.dict()
80-
instance._shared_data['jobs'] = instance._manager.list()
81-
instance._lock = instance._manager.Lock()
82-
cls._instance = instance
73+
cls._instance = super().__new__(cls)
8374
return cls._instance
8475

8576
def __init__(self):
86-
# Everything is already initialized in __new__
87-
pass
77+
if not hasattr(self, '_initialized'):
78+
self._manager: Optional[SyncManager] = None
79+
self._shared_data: Optional[Any] = None
80+
self._lock: Optional[Any] = None
81+
self._initialized = True
82+
83+
def _ensure_initialized(self):
84+
"""Initialize the multiprocessing manager and shared data structures only when needed."""
85+
if self._manager is None:
86+
self._manager = Manager()
87+
self._shared_data = self._manager.dict()
88+
self._shared_data['jobs'] = self._manager.list()
89+
self._lock = self._manager.Lock()
8890

8991
def __repr__(self) -> str:
9092
return f"<{self.__class__.__name__}>: {self.get_job_list()}"
9193

9294
def clear(self) -> None:
95+
self._ensure_initialized()
9396
with self._lock:
9497
self._shared_data['jobs'][:] = []
9598

9699
def add(self, element: Any):
97100
"""
98101
Adds a Job object to the set.
99102
"""
103+
self._ensure_initialized()
104+
100105
if isinstance(element, str):
101106
job_dict = {'id': element}
102107
elif isinstance(element, dict):
@@ -123,6 +128,8 @@ def get(self, element: Any) -> Optional[Job]:
123128
124129
If the element is a string, searches for Job with that id.
125130
"""
131+
self._ensure_initialized()
132+
126133
if isinstance(element, str):
127134
search_id = element
128135
elif isinstance(element, Job):
@@ -142,6 +149,8 @@ def remove(self, element: Any):
142149
"""
143150
Removes a Job object from the set.
144151
"""
152+
self._ensure_initialized()
153+
145154
if isinstance(element, str):
146155
job_id = element
147156
elif isinstance(element, dict):
@@ -153,7 +162,6 @@ def remove(self, element: Any):
153162

154163
with self._lock:
155164
job_list = self._shared_data['jobs']
156-
# Find and remove the job
157165
for i, job_dict in enumerate(job_list):
158166
if job_dict['id'] == job_id:
159167
del job_list[i]
@@ -164,6 +172,9 @@ def get_job_list(self) -> Optional[str]:
164172
"""
165173
Returns the list of job IDs as comma-separated string.
166174
"""
175+
if self._manager is None:
176+
return None
177+
167178
with self._lock:
168179
job_list = list(self._shared_data['jobs'])
169180

@@ -177,11 +188,17 @@ def get_job_count(self) -> int:
177188
"""
178189
Returns the number of jobs.
179190
"""
191+
if self._manager is None:
192+
return 0
193+
180194
with self._lock:
181195
return len(self._shared_data['jobs'])
182196

183197
def __iter__(self):
184198
"""Make the class iterable - returns Job objects"""
199+
if self._manager is None:
200+
return iter([])
201+
185202
with self._lock:
186203
# Create a snapshot of jobs to avoid holding lock during iteration
187204
job_dicts = list(self._shared_data['jobs'])
@@ -195,14 +212,15 @@ def __len__(self):
195212

196213
def __contains__(self, element: Any) -> bool:
197214
"""Support 'in' operator"""
215+
if self._manager is None:
216+
return False
217+
198218
if isinstance(element, str):
199219
search_id = element
200220
elif isinstance(element, Job):
201221
search_id = element.id
202-
elif isinstance(element, dict):
203-
search_id = element.get('id')
204222
else:
205-
return False
223+
raise TypeError("Only Job objects can be checked in JobsProgress.")
206224

207225
with self._lock:
208226
for job_dict in self._shared_data['jobs']:

tests/test_cli/test_cli_sanity.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
"""
2+
CLI Sanity Checks
3+
4+
These tests ensure that basic CLI operations work correctly and efficiently.
5+
"""
6+
7+
import subprocess
8+
import sys
9+
import unittest
10+
from click.testing import CliRunner
11+
12+
from runpod.cli.entry import runpod_cli
13+
14+
15+
class TestCLISanity(unittest.TestCase):
16+
"""Test basic CLI functionality and import safety"""
17+
18+
def test_help_command_works(self):
19+
"""
20+
Test that --help commands work correctly for all CLI commands.
21+
"""
22+
runner = CliRunner()
23+
24+
# Test main help
25+
result = runner.invoke(runpod_cli, ["--help"])
26+
self.assertEqual(result.exit_code, 0, f"Main --help failed: {result.output}")
27+
self.assertIn("A collection of CLI functions for RunPod", result.output)
28+
29+
# Test subcommand help
30+
result = runner.invoke(runpod_cli, ["pod", "--help"])
31+
self.assertEqual(result.exit_code, 0, f"Pod --help failed: {result.output}")
32+
self.assertIn("Manage and interact with pods", result.output)
33+
34+
result = runner.invoke(runpod_cli, ["config", "--help"])
35+
self.assertEqual(result.exit_code, 0, f"Config --help failed: {result.output}")
36+
37+
result = runner.invoke(runpod_cli, ["project", "--help"])
38+
self.assertEqual(result.exit_code, 0, f"Project --help failed: {result.output}")
39+
40+
result = runner.invoke(runpod_cli, ["ssh", "--help"])
41+
self.assertEqual(result.exit_code, 0, f"SSH --help failed: {result.output}")
42+
43+
result = runner.invoke(runpod_cli, ["exec", "--help"])
44+
self.assertEqual(result.exit_code, 0, f"Exec --help failed: {result.output}")
45+
46+
def test_help_command_subprocess(self):
47+
"""
48+
Test --help commands using subprocess to ensure they work in real-world usage.
49+
"""
50+
# Test main help using the installed runpod command
51+
result = subprocess.run(
52+
["runpod", "--help"],
53+
capture_output=True,
54+
text=True,
55+
timeout=10 # Prevent hanging
56+
)
57+
self.assertEqual(result.returncode, 0,
58+
f"Subprocess --help failed: {result.stderr}")
59+
self.assertIn("A collection of CLI functions for RunPod", result.stdout)
60+
61+
# Test pod help
62+
result = subprocess.run(
63+
["runpod", "pod", "--help"],
64+
capture_output=True,
65+
text=True,
66+
timeout=10
67+
)
68+
self.assertEqual(result.returncode, 0,
69+
f"Subprocess pod --help failed: {result.stderr}")
70+
self.assertIn("Manage and interact with pods", result.stdout)
71+
72+
def test_import_safety(self):
73+
"""
74+
Test that importing runpod modules works correctly.
75+
"""
76+
# Test importing main package
77+
try:
78+
import runpod
79+
self.assertTrue(True, "Main runpod import successful")
80+
except Exception as e:
81+
self.fail(f"Failed to import runpod: {e}")
82+
83+
# Test importing serverless modules
84+
try:
85+
from runpod.serverless.modules.worker_state import JobsProgress
86+
jobs = JobsProgress()
87+
# Ensure lazy initialization is working
88+
self.assertIsNone(jobs._manager,
89+
"Manager should not be created until first use")
90+
self.assertTrue(True, "JobsProgress import and instantiation successful")
91+
except Exception as e:
92+
self.fail(f"Failed to import/instantiate JobsProgress: {e}")
93+
94+
# Test that read-only operations work efficiently
95+
try:
96+
from runpod.serverless.modules.worker_state import JobsProgress
97+
jobs = JobsProgress()
98+
count = jobs.get_job_count() # Should work without heavy initialization
99+
self.assertEqual(count, 0)
100+
self.assertIsNone(jobs._manager,
101+
"Manager should not be created for read-only operations")
102+
except Exception as e:
103+
self.fail(f"Read-only operations failed: {e}")
104+
105+
def test_cli_entry_point_import(self):
106+
"""
107+
Test that the CLI entry point can be imported without issues.
108+
"""
109+
try:
110+
from runpod.cli.entry import runpod_cli
111+
self.assertTrue(callable(runpod_cli), "runpod_cli should be callable")
112+
except Exception as e:
113+
self.fail(f"Failed to import CLI entry point: {e}")
114+
115+
116+
if __name__ == "__main__":
117+
unittest.main()

0 commit comments

Comments
 (0)