Skip to content

Commit 72d0f49

Browse files
authored
Update shapes in microbenchmarks to reflect more realistic data (#3438)
1 parent 7bfc8da commit 72d0f49

File tree

3 files changed

+62
-5
lines changed

3 files changed

+62
-5
lines changed

.github/workflows/dashboard_perf_test.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ on:
1010

1111
jobs:
1212
benchmark:
13+
timeout-minutes: 500
1314
runs-on: linux.aws.a100
1415
strategy:
1516
matrix:

benchmarks/dashboard/microbenchmark_quantization_config.yml

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Benchmark configuration for microbenchmarks
22
benchmark_mode: "inference"
33
quantization_config_recipe_names: # Will run a baseline inference for model by default, without quantization for comparison
4-
# - "int8wo" TODO: Re-enable once we debug the delay in the benchmark
4+
# - "int8wo"
55
- "int8dq"
66
- "float8dq-tensor"
77
- "float8dq-row"
@@ -10,9 +10,18 @@ output_dir: "benchmarks/microbenchmarks/results"
1010
model_params:
1111
- name: "small_bf16_linear"
1212
matrix_shapes:
13-
- name: "small_sweep"
14-
min_power: 10
15-
max_power: 15
13+
- name: "llama4"
14+
- name: "deepseek_v3_236b"
15+
- name: "deepseek_v3_671b"
16+
- name: "qwen3_32b"
17+
- name: "gemma3_27b"
18+
- name: "custom"
19+
shapes: [
20+
[1920, 3072, 3072],
21+
[1920, 3072, 9216],
22+
[1920, 3072, 14336],
23+
[1920, 14336, 3072]
24+
]
1625
high_precision_dtype: "torch.bfloat16"
1726
torch_compile_mode: "max-autotune"
1827
device: "cuda"

benchmarks/microbenchmarks/benchmark_runner.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,53 @@ def get_shapes_for_config(
6060
"ffn.w2": (M, 3584, 8192),
6161
}
6262
shapes.extend([(f"{name}_{k}", v) for k, v in llama_shapes.items()])
63+
elif name == "llama4":
64+
# LLaMa 4 shapes
65+
llama4_shapes = [
66+
("FFN", (16384, 8192, 5120)),
67+
("QO_proj", (16384, 8192, 8192)),
68+
("KV_proj", (16384, 8192, 1024)),
69+
("FFN", (128000, 8192, 5120)),
70+
("QO_proj", (128000, 8192, 8192)),
71+
("KV_proj", (128000, 8192, 1024)),
72+
]
73+
shapes.extend([(f"{name}_{k}", v) for k, v in llama4_shapes])
74+
elif name == "deepseek_v3_236b":
75+
# DeepSeek V3 236B shapes
76+
deepseek_v3_236b_shapes = [
77+
("FFN", (16384, 1536, 5120)),
78+
("QKVO_proj", (16384, 7168, 7168)),
79+
("FFN", (128000, 1536, 5120)),
80+
("QKVO_proj", (128000, 7168, 7168)),
81+
]
82+
shapes.extend([(f"{name}_{k}", v) for k, v in deepseek_v3_236b_shapes])
83+
elif name == "deepseek_v3_671b":
84+
# DeepSeek V3 671B shapes
85+
deepseek_v3_671b_shapes = [
86+
("FFN", (16384, 2048, 7168)),
87+
("QKVO_proj", (16384, 7168, 7168)),
88+
("FFN", (128000, 2048, 7168)),
89+
("QKVO_proj", (128000, 7168, 7168)),
90+
]
91+
shapes.extend([(f"{name}_{k}", v) for k, v in deepseek_v3_671b_shapes])
92+
elif name == "qwen3_32b":
93+
# Qwen3 32B shapes
94+
qwen3_32b_shapes = [
95+
("QO_proj", (16384, 5120, 5120)),
96+
("KV_proj", (16384, 5120, 640)),
97+
("QO_proj", (128000, 5120, 5120)),
98+
("KV_proj", (128000, 5120, 640)),
99+
]
100+
shapes.extend([(f"{name}_{k}", v) for k, v in qwen3_32b_shapes])
101+
elif name == "gemma3_27b":
102+
# Gemma3 27B shapes
103+
gemma3_27b_shapes = [
104+
("QO_proj", (16384, 4096, 4096)),
105+
("KV_proj", (16384, 4096, 1024)),
106+
("QO_proj", (128000, 4096, 4096)),
107+
("KV_proj", (128000, 4096, 1024)),
108+
]
109+
shapes.extend([(f"{name}_{k}", v) for k, v in gemma3_27b_shapes])
63110
elif name == "pow2":
64111
# Generate shapes with dimensions that are powers of 2
65112
min_power_of_2 = shape_config.get("min_power", 10) # 1024
@@ -105,7 +152,7 @@ def get_shapes_for_config(
105152
counter += 1
106153
else:
107154
raise NotImplementedError(
108-
f"Shape config {name} not supported. Supported options: custom, llama, pow2, pow2_extended, sweep."
155+
f"Shape config {name} not supported. Supported options: custom, llama, llama4, deepseek_v3_236b, deepseek_v3_671b, qwen3_32b, gemma3_27b, pow2, pow2_extended, sweep."
109156
)
110157
return shapes
111158

0 commit comments

Comments
 (0)