Update shapes in microbenchmarks to reflect more realistic data (#3438)

jainapurva · web-flow · commit 72d0f494a5e6 · 2025-12-12T10:10:26.000-05:00
diff --git a/.github/workflows/dashboard_perf_test.yml b/.github/workflows/dashboard_perf_test.yml
@@ -10,6 +10,7 @@ on:
 
 jobs:
   benchmark:
+    timeout-minutes: 500
     runs-on: linux.aws.a100
     strategy:
       matrix:
diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml
@@ -1,7 +1,7 @@
 # Benchmark configuration for microbenchmarks
 benchmark_mode: "inference"
 quantization_config_recipe_names: # Will run a baseline inference for model by default, without quantization for comparison
-  # - "int8wo" TODO: Re-enable once we debug the delay in the benchmark
+  # - "int8wo"
   - "int8dq"
   - "float8dq-tensor"
   - "float8dq-row"
@@ -10,9 +10,18 @@ output_dir: "benchmarks/microbenchmarks/results"
 model_params:
   - name: "small_bf16_linear"
     matrix_shapes:
-      - name: "small_sweep"
-        min_power: 10
-        max_power: 15
+      - name: "llama4"
+      - name: "deepseek_v3_236b"
+      - name: "deepseek_v3_671b"
+      - name: "qwen3_32b"
+      - name: "gemma3_27b"
+      - name: "custom"
+        shapes: [
+          [1920, 3072, 3072],
+          [1920, 3072, 9216],
+          [1920, 3072, 14336],
+          [1920, 14336, 3072]
+        ]
     high_precision_dtype: "torch.bfloat16"
     torch_compile_mode: "max-autotune"
     device: "cuda"
diff --git a/benchmarks/microbenchmarks/benchmark_runner.py b/benchmarks/microbenchmarks/benchmark_runner.py
@@ -60,6 +60,53 @@ def get_shapes_for_config(
                 "ffn.w2": (M, 3584, 8192),
             }
             shapes.extend([(f"{name}_{k}", v) for k, v in llama_shapes.items()])
+        elif name == "llama4":
+            # LLaMa 4 shapes
+            llama4_shapes = [
+                ("FFN", (16384, 8192, 5120)),
+                ("QO_proj", (16384, 8192, 8192)),
+                ("KV_proj", (16384, 8192, 1024)),
+                ("FFN", (128000, 8192, 5120)),
+                ("QO_proj", (128000, 8192, 8192)),
+                ("KV_proj", (128000, 8192, 1024)),
+            ]
+            shapes.extend([(f"{name}_{k}", v) for k, v in llama4_shapes])
+        elif name == "deepseek_v3_236b":
+            # DeepSeek V3 236B shapes
+            deepseek_v3_236b_shapes = [
+                ("FFN", (16384, 1536, 5120)),
+                ("QKVO_proj", (16384, 7168, 7168)),
+                ("FFN", (128000, 1536, 5120)),
+                ("QKVO_proj", (128000, 7168, 7168)),
+            ]
+            shapes.extend([(f"{name}_{k}", v) for k, v in deepseek_v3_236b_shapes])
+        elif name == "deepseek_v3_671b":
+            # DeepSeek V3 671B shapes
+            deepseek_v3_671b_shapes = [
+                ("FFN", (16384, 2048, 7168)),
+                ("QKVO_proj", (16384, 7168, 7168)),
+                ("FFN", (128000, 2048, 7168)),
+                ("QKVO_proj", (128000, 7168, 7168)),
+            ]
+            shapes.extend([(f"{name}_{k}", v) for k, v in deepseek_v3_671b_shapes])
+        elif name == "qwen3_32b":
+            # Qwen3 32B shapes
+            qwen3_32b_shapes = [
+                ("QO_proj", (16384, 5120, 5120)),
+                ("KV_proj", (16384, 5120, 640)),
+                ("QO_proj", (128000, 5120, 5120)),
+                ("KV_proj", (128000, 5120, 640)),
+            ]
+            shapes.extend([(f"{name}_{k}", v) for k, v in qwen3_32b_shapes])
+        elif name == "gemma3_27b":
+            # Gemma3 27B shapes
+            gemma3_27b_shapes = [
+                ("QO_proj", (16384, 4096, 4096)),
+                ("KV_proj", (16384, 4096, 1024)),
+                ("QO_proj", (128000, 4096, 4096)),
+                ("KV_proj", (128000, 4096, 1024)),
+            ]
+            shapes.extend([(f"{name}_{k}", v) for k, v in gemma3_27b_shapes])
         elif name == "pow2":
             # Generate shapes with dimensions that are powers of 2
             min_power_of_2 = shape_config.get("min_power", 10)  # 1024
@@ -105,7 +152,7 @@ def get_shapes_for_config(
                         counter += 1
         else:
             raise NotImplementedError(
-                f"Shape config {name} not supported. Supported options: custom, llama, pow2, pow2_extended, sweep."
+                f"Shape config {name} not supported. Supported options: custom, llama, llama4, deepseek_v3_236b, deepseek_v3_671b, qwen3_32b, gemma3_27b, pow2, pow2_extended, sweep."
             )
     return shapes