11"""
2- Run all Llumnix latency scenarios, generate plots, and log results to Weights & Biases.
3-
4- For each scenario in vidur.metrics.latency_config.LATENCY_TESTS:
5- 1) Execute the simulator with a scenario-specific output root.
6- 2) Run latency_analysis to produce plots under <run_dir>/plots.
7- 3) Log summaries + plots to wandb under a test-name namespace.
8-
9- Environment:
10- - Set WANDB_PROJECT / WANDB_ENTITY / WANDB_MODE as needed for logging.
11- - Metrics tracing must be enabled (already set in the base command).
12-
13- Look at the PRIORITY_DISTRIBUTION.md file and make plots (you need to change the variable
14- --synthetic_request_generator_config_priority_distribution_type in the config
15- files used in the different tests (latency_config.py)) to compare the performance over different priority distributions.
16- In addition, we can now change the amount of priority levels used in the tests (for example from 2 to 5).
17- This can be changed in the config files used in the different tests by changing the variable
18- --synthetic_request_generator_config_num_priority_levels. Please keep the same structure in the config files
19- (latency_config.py) as before, where we have several tests with a specific name, hardness, etc etc. Keep having descriptions
20- for each test that explain what is being tested.
21-
22- Also make the plot compare the different TTFT and TBT of the different priority levels for each distribution.
2+ Run Llumnix/Llumlet plots or Llumnix vs LOR+vLLM metric comparisons.
3+
4+ Two modes:
5+ - plots: generate latency plots for Llumnix+Llumlet only (existing behavior).
6+ - compare: run matched scenarios for Llumnix+Llumlet and LOR+vLLM, then compute
7+ aggregate metrics + speedups via vidur.metrics.system_metrics.
238"""
249
2510from __future__ import annotations
2611
12+ import argparse
2713import os
2814import subprocess
2915from pathlib import Path
30- from typing import Dict , List , Tuple , Optional
16+ from typing import Dict , List , Optional
3117
3218import pandas as pd
3319import wandb
3420
35- from vidur .metrics .latency_config import LATENCY_TESTS
21+ from vidur .metrics .latency_config import LATENCY_TESTS_BY_SYSTEM , TEST_SCENARIO_MATRIX
3622from vidur .metrics import latency_analysis as la
23+ from vidur .metrics import system_metrics as sm
3724
38- import sys
39-
40- # Support Slurm job-array mode: run a single test if --index is provided.
41- if "--index" in sys .argv :
42- idx = int (sys .argv [sys .argv .index ("--index" ) + 1 ])
43- from vidur .metrics .latency_config import LATENCY_TESTS as _ALL_TESTS
44- LATENCY_TESTS = [_ALL_TESTS [idx ]]
25+ SYSTEM_LLUMNIX = "llumnix_llumlet"
26+ SYSTEM_LOR = "lor_vllm"
4527
4628
4729def _run_command (cmd : str ) -> None :
@@ -136,12 +118,110 @@ def _load_wandb_api_key(env_path: Path = Path(".env")) -> Optional[str]:
136118 return key or None
137119
138120
139- def run_all_tests () -> None :
121+ def _parse_args () -> argparse .Namespace :
122+ parser = argparse .ArgumentParser (
123+ description = "Run Llumnix plots or Llumnix vs LOR+vLLM comparisons."
124+ )
125+ parser .add_argument (
126+ "--mode" ,
127+ choices = ["plots" , "compare" ],
128+ default = "plots" ,
129+ help = "plots: Llumnix-only plots. compare: run Llumnix+Llumlet vs LOR+vLLM comparisons." ,
130+ )
131+ parser .add_argument (
132+ "--index" ,
133+ type = int ,
134+ default = None ,
135+ help = "Optional scenario index (0-based) to run a single scenario." ,
136+ )
137+ parser .add_argument (
138+ "--latency-target" ,
139+ type = float ,
140+ default = None ,
141+ help = "Override latency target when computing cost-vs-latency metrics." ,
142+ )
143+ parser .add_argument (
144+ "--skip-plots" ,
145+ action = "store_true" ,
146+ help = "Skip latency plot generation (mostly useful in compare mode)." ,
147+ )
148+ return parser .parse_args ()
149+
150+
151+ def _select_tests (tests : List [dict ], index : Optional [int ]) -> List [dict ]:
152+ if index is None :
153+ return tests
154+ if index < 0 or index >= len (tests ):
155+ raise IndexError (f"Index { index } out of range for { len (tests )} tests." )
156+ return [tests [index ]]
157+
158+
159+ def _extract_flag_value (cmd : str , flag : str ) -> Optional [str ]:
160+ """Return the value following a CLI flag inside a command string."""
161+ tokens = cmd .split ()
162+ for i , tok in enumerate (tokens ):
163+ if tok == flag and i + 1 < len (tokens ):
164+ return tokens [i + 1 ]
165+ return None
166+
167+
168+ def _derive_compare_run_name (llumnix_cmd : str ) -> str :
169+ """Build wandb run name like comparison_qps_X_req_Y from the Llumnix command."""
170+ qps = _extract_flag_value (
171+ llumnix_cmd , "--poisson_request_interval_generator_config_qps"
172+ ) or "unknown"
173+ num_req = _extract_flag_value (
174+ llumnix_cmd , "--synthetic_request_generator_config_num_requests"
175+ ) or "unknown"
176+ # strip any trailing punctuation/commas if present
177+ qps_clean = str (qps ).strip ().strip ("," )
178+ req_clean = str (num_req ).strip ().strip ("," )
179+ return f"comparison_qps_{ qps_clean } _req_{ req_clean } "
180+
181+
182+ def _execute_test (test : dict , generate_plots : bool , step : int , wandb_run = None ) -> Path :
183+ name = test ["name" ]
184+ desc = test .get ("description" , "" )
185+ base_root = Path ("simulator_output" ) / name
186+ before_dirs = {p for p in base_root .glob ("*" ) if p .is_dir ()}
187+ base_root .mkdir (parents = True , exist_ok = True )
188+
189+ cmd = f"{ test ['cmd' ]} --metrics_config_output_dir { base_root } "
190+ _run_command (cmd )
191+
192+ run_dir = _find_new_run_dir (base_root , before_dirs )
193+ print (f"[info] Latest run dir for { name } : { run_dir } " )
194+
195+ plots : List [Path ] = []
196+ summary : Dict [str , float ] = {}
197+ if generate_plots :
198+ la .main (str (run_dir ))
199+ plots_dir = run_dir / "plots"
200+ plots = sorted (p for p in plots_dir .glob ("*.png" ))
201+ summary = _build_summary (run_dir )
202+
203+ _log_to_wandb (
204+ wandb_run ,
205+ test_name = name ,
206+ description = desc ,
207+ cmd = cmd ,
208+ run_dir = run_dir ,
209+ plots = plots ,
210+ summary = summary ,
211+ step = step ,
212+ )
213+
214+ return run_dir
215+
216+
217+ def run_llumnix_plots (args : argparse .Namespace ) -> None :
140218 api_key = _load_wandb_api_key ()
141219 if api_key :
142220 wandb .login (key = api_key )
143221
144- for idx , test in enumerate (LATENCY_TESTS ):
222+ tests = _select_tests (LATENCY_TESTS_BY_SYSTEM [SYSTEM_LLUMNIX ], args .index )
223+
224+ for idx , test in enumerate (tests ):
145225 name = test ["name" ]
146226 desc = test .get ("description" , "" )
147227 run_name = os .getenv ("WANDB_RUN_NAME" , name )
@@ -152,41 +232,102 @@ def run_all_tests() -> None:
152232 mode = os .getenv ("WANDB_MODE" , "online" ),
153233 name = run_name ,
154234 group = os .getenv ("WANDB_GROUP" ),
155- config = {"test_name" : name , "description" : desc , "num_tests" : len (LATENCY_TESTS )},
235+ config = {
236+ "test_name" : name ,
237+ "description" : desc ,
238+ "num_tests" : len (tests ),
239+ "system" : SYSTEM_LLUMNIX ,
240+ },
156241 )
157242
158- # Direct outputs for this scenario under simulator_output/<name>/...
159- base_root = Path ("simulator_output" ) / name
160- before_dirs = {p for p in base_root .glob ("*" ) if p .is_dir ()}
161- base_root .mkdir (parents = True , exist_ok = True )
243+ _execute_test (test , generate_plots = not args .skip_plots , step = idx , wandb_run = wandb_run )
162244
163- cmd = f" { test [ 'cmd' ] } --metrics_config_output_dir { base_root } "
164- _run_command ( cmd )
245+ if wandb_run :
246+ wandb_run . finish ( )
165247
166- run_dir = _find_new_run_dir (base_root , before_dirs )
167- print (f"[info] Latest run dir for { name } : { run_dir } " )
168248
169- # Generate plots
170- la .main (str (run_dir ))
249+ def run_comparison (args : argparse .Namespace ) -> None :
250+ api_key = _load_wandb_api_key ()
251+ if api_key :
252+ wandb .login (key = api_key )
171253
172- plots_dir = run_dir / "plots"
173- plots = sorted (p for p in plots_dir .glob ("*.png" ))
174- summary = _build_summary (run_dir )
254+ scenario_items = sorted (TEST_SCENARIO_MATRIX .items ())
255+ if args .index is not None :
256+ if args .index < 0 or args .index >= len (scenario_items ):
257+ raise IndexError (f"Index { args .index } out of range for { len (scenario_items )} scenarios." )
258+ scenario_items = [scenario_items [args .index ]]
259+
260+ rows = []
261+ for step , (scenario_id , system_tests ) in enumerate (scenario_items ):
262+ if SYSTEM_LLUMNIX not in system_tests or SYSTEM_LOR not in system_tests :
263+ print (f"[warn] Skipping scenario { scenario_id } because one system is missing." )
264+ continue
265+ llumnix_test = system_tests [SYSTEM_LLUMNIX ]
266+ lor_test = system_tests [SYSTEM_LOR ]
267+
268+ run_name = _derive_compare_run_name (llumnix_test ["cmd" ])
269+ wandb_run = wandb .init (
270+ project = os .getenv ("WANDB_PROJECT" , "llumnix" ),
271+ entity = os .getenv ("WANDB_ENTITY" ),
272+ mode = os .getenv ("WANDB_MODE" , "online" ),
273+ name = run_name ,
274+ group = os .getenv ("WANDB_GROUP" , "comparison" ),
275+ config = {
276+ "scenario" : scenario_id ,
277+ "llumnix_command" : llumnix_test ["cmd" ],
278+ "lor_command" : lor_test ["cmd" ],
279+ },
280+ )
281+
282+ llumnix_run_dir = _execute_test (
283+ llumnix_test , generate_plots = False , step = step , wandb_run = wandb_run
284+ )
285+ lor_run_dir = _execute_test (
286+ lor_test , generate_plots = False , step = step , wandb_run = wandb_run
287+ )
175288
176- _log_to_wandb (
177- wandb_run ,
178- test_name = name ,
179- description = desc ,
180- cmd = cmd ,
181- run_dir = run_dir ,
182- plots = plots ,
183- summary = summary ,
184- step = idx ,
289+ _ , llumnix_metrics = sm .compute_run_metrics (
290+ llumnix_run_dir , SYSTEM_LLUMNIX , llumnix_test ["name" ], latency_target = args .latency_target
291+ )
292+ _ , lor_metrics = sm .compute_run_metrics (
293+ lor_run_dir , SYSTEM_LOR , lor_test ["name" ], latency_target = args .latency_target
294+ )
295+ comparison = sm .compare_runs (llumnix_metrics , lor_metrics )
296+
297+ rows .append (
298+ {
299+ "scenario" : scenario_id ,
300+ "llumnix_run_dir" : str (llumnix_run_dir ),
301+ "lor_run_dir" : str (lor_run_dir ),
302+ ** comparison ,
303+ }
185304 )
186305
306+ print (f"[info] Scenario { scenario_id } speedups:" )
307+ for metric , value in comparison .items ():
308+ print (f" { metric } : { value } " )
309+
187310 if wandb_run :
311+ payload = {
312+ "scenario" : scenario_id ,
313+ "llumnix_run_dir" : str (llumnix_run_dir ),
314+ "lor_run_dir" : str (lor_run_dir ),
315+ }
316+ payload .update ({k : v for k , v in comparison .items () if v is not None })
317+ wandb .log (payload , step = step )
188318 wandb_run .finish ()
189319
320+ if rows :
321+ df = pd .DataFrame (rows )
322+ output_path = Path ("simulator_output" ) / "comparison_metrics.csv"
323+ output_path .parent .mkdir (parents = True , exist_ok = True )
324+ df .to_csv (output_path , index = False )
325+ print (f"[info] Wrote comparison metrics to { output_path } " )
326+
190327
191328if __name__ == "__main__" :
192- run_all_tests ()
329+ args = _parse_args ()
330+ if args .mode == "compare" :
331+ run_comparison (args )
332+ else :
333+ run_llumnix_plots (args )
0 commit comments