Skip to content

Commit 57bbc4d

Browse files
committed
add rusty timings
1 parent f40aa83 commit 57bbc4d

4 files changed

Lines changed: 55 additions & 3 deletions

File tree

examples/debug/plot_timing.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import matplotlib.pyplot as plt
33

4-
# Timings on Frontier
4+
# Timings on Frontier & Rusty
55

66
resolutions = np.array(
77
[
@@ -23,6 +23,8 @@
2323
]
2424
)
2525

26+
runtime_rusty = np.array([1.9, 3.0, np.nan, np.nan]) # , np.nan])
27+
2628
num_nodes = np.array([1, 1, 8, 64]) # 512])
2729
num_gpus = np.array([1, 8, 64, 512]) # 4096]) # really # GCDs (=2 * # gpus)
2830

@@ -31,15 +33,23 @@
3133

3234
def main():
3335
# Compute billion cell updates per second
34-
bcups = 10 * (resolutions**3) / (runtime * 1.0e9)
36+
nsteps = 10
37+
bcups = nsteps * (resolutions**3) / (runtime * 1.0e9)
3538
# bcups_no_awsofirccl = 10 * (resolutions**3) / (runtime_no_awsofirccl * 1.0e9)
39+
bcups_rusty = nsteps * (resolutions**3) / (runtime_rusty * 1.0e9)
3640

3741
plt.figure(figsize=(4, 4))
3842
plt.plot(
3943
resolutions,
4044
bcups,
4145
marker="s",
42-
label="MI250X",
46+
label="MI250X (Frontier)",
47+
)
48+
plt.plot(
49+
resolutions,
50+
bcups_rusty,
51+
marker="o",
52+
label="H100 (Rusty)",
4353
)
4454
for i, (res, bcup, nodes, gpus) in enumerate(
4555
zip(resolutions, bcups, num_nodes, num_gpus)

examples/debug/sbatch_rusty_M.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/bash
2+
#SBATCH --job-name=jaxionM
3+
#SBATCH --output=slurm-%j.out
4+
#SBATCH --error=slurm-%j.err
5+
#SBATCH --partition gpu
6+
#SBATCH --constraint=h100
7+
#SBATCH --nodes=1
8+
#SBATCH --ntasks-per-node=8
9+
#SBATCH --gpus-per-task=1
10+
#SBATCH --cpus-per-task=8
11+
#SBATCH --mem=80G
12+
#SBATCH --time=00-00:05
13+
14+
module purge
15+
module load python/3.12.9
16+
17+
export PYTHONUNBUFFERED=TRUE
18+
19+
source $VENVDIR/jaxion-venv/bin/activate
20+
21+
srun --gpu-bind=none --cpu-bind=cores python debug.py --res=16 --distributed

examples/debug/sbatch_rusty_S.sh

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/bash
2+
#SBATCH --job-name=jaxionS
3+
#SBATCH --output=slurm-%j.out
4+
#SBATCH --error=slurm-%j.err
5+
#SBATCH --partition gpu
6+
#SBATCH --constraint=h100
7+
#SBATCH --nodes=1
8+
#SBATCH --ntasks-per-node=1
9+
#SBATCH --gpus-per-task=1
10+
#SBATCH --cpus-per-task=8
11+
#SBATCH --mem=80G
12+
#SBATCH --time=00-00:05
13+
14+
module purge
15+
module load python/3.12.9
16+
17+
export PYTHONUNBUFFERED=TRUE
18+
19+
source $VENVDIR/jaxion-venv/bin/activate
20+
21+
srun --gpu-bind=none --cpu-bind=cores python debug.py --res=16

examples/debug/timing.png

14.6 KB
Loading

0 commit comments

Comments
 (0)