Skip to content

Commit 24db742

Browse files
committed
Added metrics to run_tests and slurm job.
1 parent 4062b5f commit 24db742

File tree

4 files changed

+596
-99
lines changed

4 files changed

+596
-99
lines changed

llumnix_slurm_job.sh

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,12 @@ source .venv/bin/activate
1717

1818
echo "HOST=$(hostname) JOBID=$SLURM_JOB_ID TASK=$SLURM_ARRAY_TASK_ID" >&2
1919

20-
python3 run_tests.py --index "$SLURM_ARRAY_TASK_ID"
20+
# Run Llumnix plots and Llumnix vs LOR comparisons in parallel for this task index.
21+
# Plots stay Llumnix-only; compare runs paired Llumnix/LOR metrics (no plots).
22+
python3 run_tests.py --mode plots --index "$SLURM_ARRAY_TASK_ID" &
23+
PLOTS_PID=$!
24+
25+
python3 run_tests.py --mode compare --index "$SLURM_ARRAY_TASK_ID" &
26+
COMPARE_PID=$!
27+
28+
wait $PLOTS_PID $COMPARE_PID

run_tests.py

Lines changed: 197 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,29 @@
11
"""
2-
Run all Llumnix latency scenarios, generate plots, and log results to Weights & Biases.
3-
4-
For each scenario in vidur.metrics.latency_config.LATENCY_TESTS:
5-
1) Execute the simulator with a scenario-specific output root.
6-
2) Run latency_analysis to produce plots under <run_dir>/plots.
7-
3) Log summaries + plots to wandb under a test-name namespace.
8-
9-
Environment:
10-
- Set WANDB_PROJECT / WANDB_ENTITY / WANDB_MODE as needed for logging.
11-
- Metrics tracing must be enabled (already set in the base command).
12-
13-
Look at the PRIORITY_DISTRIBUTION.md file and make plots (you need to change the variable
14-
--synthetic_request_generator_config_priority_distribution_type in the config
15-
files used in the different tests (latency_config.py)) to compare the performance over different priority distributions.
16-
In addition, we can now change the amount of priority levels used in the tests (for example from 2 to 5).
17-
This can be changed in the config files used in the different tests by changing the variable
18-
--synthetic_request_generator_config_num_priority_levels. Please keep the same structure in the config files
19-
(latency_config.py) as before, where we have several tests with a specific name, hardness, etc etc. Keep having descriptions
20-
for each test that explain what is being tested.
21-
22-
Also make the plot compare the different TTFT and TBT of the different priority levels for each distribution.
2+
Run Llumnix/Llumlet plots or Llumnix vs LOR+vLLM metric comparisons.
3+
4+
Two modes:
5+
- plots: generate latency plots for Llumnix+Llumlet only (existing behavior).
6+
- compare: run matched scenarios for Llumnix+Llumlet and LOR+vLLM, then compute
7+
aggregate metrics + speedups via vidur.metrics.system_metrics.
238
"""
249

2510
from __future__ import annotations
2611

12+
import argparse
2713
import os
2814
import subprocess
2915
from pathlib import Path
30-
from typing import Dict, List, Tuple, Optional
16+
from typing import Dict, List, Optional
3117

3218
import pandas as pd
3319
import wandb
3420

35-
from vidur.metrics.latency_config import LATENCY_TESTS
21+
from vidur.metrics.latency_config import LATENCY_TESTS_BY_SYSTEM, TEST_SCENARIO_MATRIX
3622
from vidur.metrics import latency_analysis as la
23+
from vidur.metrics import system_metrics as sm
3724

38-
import sys
39-
40-
# Support Slurm job-array mode: run a single test if --index is provided.
41-
if "--index" in sys.argv:
42-
idx = int(sys.argv[sys.argv.index("--index") + 1])
43-
from vidur.metrics.latency_config import LATENCY_TESTS as _ALL_TESTS
44-
LATENCY_TESTS = [_ALL_TESTS[idx]]
25+
SYSTEM_LLUMNIX = "llumnix_llumlet"
26+
SYSTEM_LOR = "lor_vllm"
4527

4628

4729
def _run_command(cmd: str) -> None:
@@ -136,12 +118,110 @@ def _load_wandb_api_key(env_path: Path = Path(".env")) -> Optional[str]:
136118
return key or None
137119

138120

139-
def run_all_tests() -> None:
121+
def _parse_args() -> argparse.Namespace:
122+
parser = argparse.ArgumentParser(
123+
description="Run Llumnix plots or Llumnix vs LOR+vLLM comparisons."
124+
)
125+
parser.add_argument(
126+
"--mode",
127+
choices=["plots", "compare"],
128+
default="plots",
129+
help="plots: Llumnix-only plots. compare: run Llumnix+Llumlet vs LOR+vLLM comparisons.",
130+
)
131+
parser.add_argument(
132+
"--index",
133+
type=int,
134+
default=None,
135+
help="Optional scenario index (0-based) to run a single scenario.",
136+
)
137+
parser.add_argument(
138+
"--latency-target",
139+
type=float,
140+
default=None,
141+
help="Override latency target when computing cost-vs-latency metrics.",
142+
)
143+
parser.add_argument(
144+
"--skip-plots",
145+
action="store_true",
146+
help="Skip latency plot generation (mostly useful in compare mode).",
147+
)
148+
return parser.parse_args()
149+
150+
151+
def _select_tests(tests: List[dict], index: Optional[int]) -> List[dict]:
152+
if index is None:
153+
return tests
154+
if index < 0 or index >= len(tests):
155+
raise IndexError(f"Index {index} out of range for {len(tests)} tests.")
156+
return [tests[index]]
157+
158+
159+
def _extract_flag_value(cmd: str, flag: str) -> Optional[str]:
160+
"""Return the value following a CLI flag inside a command string."""
161+
tokens = cmd.split()
162+
for i, tok in enumerate(tokens):
163+
if tok == flag and i + 1 < len(tokens):
164+
return tokens[i + 1]
165+
return None
166+
167+
168+
def _derive_compare_run_name(llumnix_cmd: str) -> str:
169+
"""Build wandb run name like comparison_qps_X_req_Y from the Llumnix command."""
170+
qps = _extract_flag_value(
171+
llumnix_cmd, "--poisson_request_interval_generator_config_qps"
172+
) or "unknown"
173+
num_req = _extract_flag_value(
174+
llumnix_cmd, "--synthetic_request_generator_config_num_requests"
175+
) or "unknown"
176+
# strip any trailing punctuation/commas if present
177+
qps_clean = str(qps).strip().strip(",")
178+
req_clean = str(num_req).strip().strip(",")
179+
return f"comparison_qps_{qps_clean}_req_{req_clean}"
180+
181+
182+
def _execute_test(test: dict, generate_plots: bool, step: int, wandb_run=None) -> Path:
183+
name = test["name"]
184+
desc = test.get("description", "")
185+
base_root = Path("simulator_output") / name
186+
before_dirs = {p for p in base_root.glob("*") if p.is_dir()}
187+
base_root.mkdir(parents=True, exist_ok=True)
188+
189+
cmd = f"{test['cmd']} --metrics_config_output_dir {base_root}"
190+
_run_command(cmd)
191+
192+
run_dir = _find_new_run_dir(base_root, before_dirs)
193+
print(f"[info] Latest run dir for {name}: {run_dir}")
194+
195+
plots: List[Path] = []
196+
summary: Dict[str, float] = {}
197+
if generate_plots:
198+
la.main(str(run_dir))
199+
plots_dir = run_dir / "plots"
200+
plots = sorted(p for p in plots_dir.glob("*.png"))
201+
summary = _build_summary(run_dir)
202+
203+
_log_to_wandb(
204+
wandb_run,
205+
test_name=name,
206+
description=desc,
207+
cmd=cmd,
208+
run_dir=run_dir,
209+
plots=plots,
210+
summary=summary,
211+
step=step,
212+
)
213+
214+
return run_dir
215+
216+
217+
def run_llumnix_plots(args: argparse.Namespace) -> None:
140218
api_key = _load_wandb_api_key()
141219
if api_key:
142220
wandb.login(key=api_key)
143221

144-
for idx, test in enumerate(LATENCY_TESTS):
222+
tests = _select_tests(LATENCY_TESTS_BY_SYSTEM[SYSTEM_LLUMNIX], args.index)
223+
224+
for idx, test in enumerate(tests):
145225
name = test["name"]
146226
desc = test.get("description", "")
147227
run_name = os.getenv("WANDB_RUN_NAME", name)
@@ -152,41 +232,102 @@ def run_all_tests() -> None:
152232
mode=os.getenv("WANDB_MODE", "online"),
153233
name=run_name,
154234
group=os.getenv("WANDB_GROUP"),
155-
config={"test_name": name, "description": desc, "num_tests": len(LATENCY_TESTS)},
235+
config={
236+
"test_name": name,
237+
"description": desc,
238+
"num_tests": len(tests),
239+
"system": SYSTEM_LLUMNIX,
240+
},
156241
)
157242

158-
# Direct outputs for this scenario under simulator_output/<name>/...
159-
base_root = Path("simulator_output") / name
160-
before_dirs = {p for p in base_root.glob("*") if p.is_dir()}
161-
base_root.mkdir(parents=True, exist_ok=True)
243+
_execute_test(test, generate_plots=not args.skip_plots, step=idx, wandb_run=wandb_run)
162244

163-
cmd = f"{test['cmd']} --metrics_config_output_dir {base_root}"
164-
_run_command(cmd)
245+
if wandb_run:
246+
wandb_run.finish()
165247

166-
run_dir = _find_new_run_dir(base_root, before_dirs)
167-
print(f"[info] Latest run dir for {name}: {run_dir}")
168248

169-
# Generate plots
170-
la.main(str(run_dir))
249+
def run_comparison(args: argparse.Namespace) -> None:
250+
api_key = _load_wandb_api_key()
251+
if api_key:
252+
wandb.login(key=api_key)
171253

172-
plots_dir = run_dir / "plots"
173-
plots = sorted(p for p in plots_dir.glob("*.png"))
174-
summary = _build_summary(run_dir)
254+
scenario_items = sorted(TEST_SCENARIO_MATRIX.items())
255+
if args.index is not None:
256+
if args.index < 0 or args.index >= len(scenario_items):
257+
raise IndexError(f"Index {args.index} out of range for {len(scenario_items)} scenarios.")
258+
scenario_items = [scenario_items[args.index]]
259+
260+
rows = []
261+
for step, (scenario_id, system_tests) in enumerate(scenario_items):
262+
if SYSTEM_LLUMNIX not in system_tests or SYSTEM_LOR not in system_tests:
263+
print(f"[warn] Skipping scenario {scenario_id} because one system is missing.")
264+
continue
265+
llumnix_test = system_tests[SYSTEM_LLUMNIX]
266+
lor_test = system_tests[SYSTEM_LOR]
267+
268+
run_name = _derive_compare_run_name(llumnix_test["cmd"])
269+
wandb_run = wandb.init(
270+
project=os.getenv("WANDB_PROJECT", "llumnix"),
271+
entity=os.getenv("WANDB_ENTITY"),
272+
mode=os.getenv("WANDB_MODE", "online"),
273+
name=run_name,
274+
group=os.getenv("WANDB_GROUP", "comparison"),
275+
config={
276+
"scenario": scenario_id,
277+
"llumnix_command": llumnix_test["cmd"],
278+
"lor_command": lor_test["cmd"],
279+
},
280+
)
281+
282+
llumnix_run_dir = _execute_test(
283+
llumnix_test, generate_plots=False, step=step, wandb_run=wandb_run
284+
)
285+
lor_run_dir = _execute_test(
286+
lor_test, generate_plots=False, step=step, wandb_run=wandb_run
287+
)
175288

176-
_log_to_wandb(
177-
wandb_run,
178-
test_name=name,
179-
description=desc,
180-
cmd=cmd,
181-
run_dir=run_dir,
182-
plots=plots,
183-
summary=summary,
184-
step=idx,
289+
_, llumnix_metrics = sm.compute_run_metrics(
290+
llumnix_run_dir, SYSTEM_LLUMNIX, llumnix_test["name"], latency_target=args.latency_target
291+
)
292+
_, lor_metrics = sm.compute_run_metrics(
293+
lor_run_dir, SYSTEM_LOR, lor_test["name"], latency_target=args.latency_target
294+
)
295+
comparison = sm.compare_runs(llumnix_metrics, lor_metrics)
296+
297+
rows.append(
298+
{
299+
"scenario": scenario_id,
300+
"llumnix_run_dir": str(llumnix_run_dir),
301+
"lor_run_dir": str(lor_run_dir),
302+
**comparison,
303+
}
185304
)
186305

306+
print(f"[info] Scenario {scenario_id} speedups:")
307+
for metric, value in comparison.items():
308+
print(f" {metric}: {value}")
309+
187310
if wandb_run:
311+
payload = {
312+
"scenario": scenario_id,
313+
"llumnix_run_dir": str(llumnix_run_dir),
314+
"lor_run_dir": str(lor_run_dir),
315+
}
316+
payload.update({k: v for k, v in comparison.items() if v is not None})
317+
wandb.log(payload, step=step)
188318
wandb_run.finish()
189319

320+
if rows:
321+
df = pd.DataFrame(rows)
322+
output_path = Path("simulator_output") / "comparison_metrics.csv"
323+
output_path.parent.mkdir(parents=True, exist_ok=True)
324+
df.to_csv(output_path, index=False)
325+
print(f"[info] Wrote comparison metrics to {output_path}")
326+
190327

191328
if __name__ == "__main__":
192-
run_all_tests()
329+
args = _parse_args()
330+
if args.mode == "compare":
331+
run_comparison(args)
332+
else:
333+
run_llumnix_plots(args)

0 commit comments

Comments
 (0)