Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
265 changes: 265 additions & 0 deletions plot_grid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
import argparse
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from matplotlib.lines import Line2D
from scipy.stats.mstats import gmean
from npbench.infrastructure import utilities as util

parser = argparse.ArgumentParser()
parser.add_argument("-r", "--rows", type=int, default=6, help="Number of rows in the grid")
parser.add_argument("-c", "--cols", type=int, default=9, help="Number of columns in the grid")
args = parser.parse_args()


def bootstrap_ci(data, statfunction=np.median, alpha=0.05, n_samples=300):
def bootstrap_ids(data, n_samples=100):
for _ in range(n_samples):
yield np.random.randint(data.shape[0], size=(data.shape[0], ))

alphas = np.array([alpha / 2, 1 - alpha / 2])
nvals = np.round((n_samples - 1) * alphas).astype(int)

data = np.array(data)
if np.prod(data.shape) != max(data.shape):
raise ValueError("Data must be 1D")
data = data.ravel()

boot_indexes = bootstrap_ids(data, n_samples)
stat = np.asarray([statfunction(data[_ids]) for _ids in boot_indexes])
stat.sort(axis=0)

return stat[nvals]


database = r"npbench.db"
conn = util.create_connection(database)
data = pd.read_sql_query("SELECT * FROM results", conn)

data = data.drop(['timestamp', 'kind', 'dwarf', 'version'],
axis=1).reset_index(drop=True)

data = data[data["domain"] != ""]

data = data[data['preset'] == 'paper']
data = data.drop(['preset'], axis=1).reset_index(drop=True)

aggdata = data.groupby(["benchmark", "domain", "framework", "mode", "details"],
dropna=False).agg({
"time": "median",
"validated": "first"
}).reset_index()
best = aggdata.sort_values("time").groupby(
["benchmark", "domain", "framework", "mode"],
dropna=False).first().reset_index()
bestgroup = best.drop(["time", "validated"], axis=1)
data = pd.merge(left=bestgroup,
right=data,
on=["benchmark", "domain", "framework", "mode", "details"],
how="inner")
data = data.drop(['mode', 'details'], axis=1).reset_index(drop=True)

data = data[data['framework'] != 'dace_cpu']
data = data[data['framework'] != 'pythran']
data = data[data['framework'] != 'numba']

frmwrks = list(data['framework'].unique())
assert ('numpy' in frmwrks)
frmwrks.remove('numpy')
frmwrks.sort()

benchmarks_unsorted = list(data['benchmark'].unique())
results = []

for benchmark in benchmarks_unsorted:
bench_data = data[data['benchmark'] == benchmark]

numpy_times = bench_data[bench_data['framework'] == 'numpy']['time'].values
if len(numpy_times) == 0:
continue
numpy_median = np.median(numpy_times)

for framework in frmwrks:
frmwrk_data = bench_data[bench_data['framework'] == framework]
frmwrk_times = frmwrk_data['time'].values

if len(frmwrk_times) == 0:
continue

validated = frmwrk_data['validated'].all()

speedups = numpy_median / frmwrk_times
median_speedup = np.median(speedups)

ci = bootstrap_ci(speedups, statfunction=np.median, alpha=0.05, n_samples=300)
ci_lower = ci[0]
ci_upper = ci[1]

error_lower = median_speedup - ci_lower
error_upper = ci_upper - median_speedup

results.append({
'benchmark': benchmark,
'framework': framework,
'speedup': median_speedup,
'ci_lower': ci_lower,
'ci_upper': ci_upper,
'error_lower': error_lower,
'error_upper': error_upper,
'validated': validated,
})

results_df = pd.DataFrame(results)

triton_speedups = results_df[results_df['framework'] == 'triton'][['benchmark', 'speedup']]
triton_speedups = triton_speedups.sort_values('speedup', ascending=False)
benchmarks = triton_speedups['benchmark'].tolist()

missing_benchmarks = [b for b in benchmarks_unsorted if b not in benchmarks]
benchmarks.extend(sorted(missing_benchmarks))
n_benchmarks = len(benchmarks)

n_cols = args.cols
n_rows = args.rows
fig_width = 2.5 * n_cols
fig_height = 2.5 * n_rows
fig, axes = plt.subplots(n_rows, n_cols, figsize=(fig_width, fig_height))
if n_rows == 1:
axes = axes.reshape(1, -1)
elif n_cols == 1:
axes = axes.reshape(-1, 1)

row_y_limits = []
for row in range(n_rows):
row_min = float('inf')
row_max = float('-inf')
for col in range(n_cols):
idx = row * n_cols + col
if idx >= len(benchmarks):
continue
benchmark = benchmarks[idx]
bench_results = results_df[results_df['benchmark'] == benchmark]
for fw in frmwrks:
fw_data = bench_results[bench_results['framework'] == fw]
if len(fw_data) > 0:
speedup = fw_data['speedup'].values[0]
err_low = fw_data['error_lower'].values[0]
err_up = fw_data['error_upper'].values[0]
row_min = min(row_min, speedup - err_low)
row_max = max(row_max, speedup + err_up)
if row_min == float('inf'):
row_min = 0.1
if row_max == float('-inf'):
row_max = 10
y_max = 10 ** math.ceil(math.log10(row_max * 3))
y_min = 10 ** math.floor(math.log10(max(row_min * 0.3, 0.001)))
row_y_limits.append((y_min, y_max))

axes_flat = axes.flatten()

framework_colors = {
'cupy': '#17becf',
'dace_cpu': '#1f77b4',
'dace_gpu': '#9467bd',
'numba': '#1f77b4',
'pythran': '#2ca02c',
'triton': '#ff7f0e',
'jax': '#d62728',
}
color_map = {fw: framework_colors.get(fw, '#808080') for fw in frmwrks}

for idx, benchmark in enumerate(benchmarks):
if idx >= n_rows * n_cols:
break

row = idx // n_cols
ax = axes_flat[idx]
bench_results = results_df[results_df['benchmark'] == benchmark]

x_positions = []
speedups = []
bar_colors = []
labels = []
errors_lower = []
errors_upper = []
validated_list = []

for i, fw in enumerate(frmwrks):
fw_data = bench_results[bench_results['framework'] == fw]
if len(fw_data) > 0:
x_positions.append(len(labels))
speedups.append(fw_data['speedup'].values[0])
bar_colors.append(color_map[fw])
labels.append(fw)
errors_lower.append(fw_data['error_lower'].values[0])
errors_upper.append(fw_data['error_upper'].values[0])
validated_list.append(fw_data['validated'].values[0])

if speedups:
bottoms = []
heights = []
for s in speedups:
if s >= 1:
bottoms.append(1)
heights.append(s - 1)
else:
bottoms.append(s)
heights.append(1 - s)

bars = ax.bar(x_positions, heights, bottom=bottoms, color=bar_colors, width=0.7)
ax.axhline(y=1, color='black', linestyle='--', linewidth=0.5, alpha=0.5)
ax.set_yscale('log')

ax.yaxis.set_major_formatter(FuncFormatter(lambda y, _: f'{int(np.log10(y))}' if y > 0 else ''))

for j, (pos, speedup, validated) in enumerate(zip(x_positions, speedups, validated_list)):
if not validated:
ax.scatter(pos, speedup, marker='x', s=50, c='red', linewidths=1.5, zorder=5)

for j, (pos, speedup, err_low, err_up) in enumerate(zip(x_positions, speedups, errors_lower, errors_upper)):
ax.errorbar(pos, speedup, yerr=[[err_low], [err_up]], fmt='none',
ecolor='black', capsize=2, capthick=1, elinewidth=1, zorder=10)

for j, (pos, speedup) in enumerate(zip(x_positions, speedups)):
label_y = speedup * 1.3 if speedup >= 1 else speedup * 0.7
if speedup < 0.1:
label = f'{speedup:.0e}x'
else:
label = f'{speedup:.1f}x'
ax.text(pos, label_y, label, ha='left', va='bottom' if speedup >= 1 else 'top',
fontsize=5, fontweight='bold', rotation=45)

ax.set_ylim(row_y_limits[row])

ax.set_title(benchmark, fontsize=9, fontweight='bold')
ax.set_xticks(range(len(labels)))
ax.set_xticklabels([fw[:3] for fw in labels], fontsize=7, rotation=45)
ax.tick_params(axis='y', labelsize=7)
ax.set_ylabel('log10(Speedup)', fontsize=6)
ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.3)

for idx in range(len(benchmarks), n_rows * n_cols):
axes_flat[idx].axis('off')

fig.suptitle('log10(Speedup) over NumPy by Benchmark', fontsize=16, fontweight='bold', y=0.995)

legend_elements = [plt.Rectangle((0,0),1,1, facecolor=color_map[fw], label=fw) for fw in frmwrks]
legend_elements.append(Line2D([0], [0], marker='x', color='w', markerfacecolor='red',
markeredgecolor='red', markersize=8, markeredgewidth=2, label='Not validated'))
legend_elements.append(Line2D([0], [0], color='black', marker='_', markersize=10,
markeredgewidth=1.5, label='95% CI'))
legend = fig.legend(handles=legend_elements, loc='lower center', ncol=len(frmwrks) + 2,
fontsize=12, bbox_to_anchor=(0.5, 0.01), frameon=True,
fancybox=True, shadow=True, borderpad=1)
legend.get_frame().set_linewidth(1.5)
legend.get_frame().set_edgecolor('gray')

plt.tight_layout(rect=[0, 0.04, 1, 0.98])

filename_base = f'benchmark_grid_{n_rows}x{n_cols}'
plt.savefig(f'{filename_base}.pdf', dpi=300, bbox_inches='tight')
print(f"Saved {filename_base}.pdf")
plt.savefig(f'{filename_base}.png', dpi=300, bbox_inches='tight')
print(f"Saved {filename_base}.png")