Skip to content

Commit ea2a7d6

Browse files
authored
Add device limit for B200 GPU (#207)
1 parent ff637c1 commit ea2a7d6

File tree

1 file changed

+15
-0
lines changed

1 file changed

+15
-0
lines changed

autoparallel/compute_estimation.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,21 @@ class DeviceLimit:
7676
torch.int8: 3958 // 2,
7777
},
7878
),
79+
DeviceLimit(
80+
"B200",
81+
"https://nvdam.widen.net/s/wwnsxrhm2w/blackwell-datasheet-3384703",
82+
sm=(10, 0),
83+
gmem_bandwidth=7.7 * (1024**4),
84+
gemm_tflops={
85+
torch.float64: 37,
86+
# NOTE: NVIDIA gives all numbers "with 2:4 sparsity"
87+
# but we want the full GEMM numbers
88+
torch.float32: 2200 // 2,
89+
torch.float16: 4500 // 2,
90+
torch.bfloat16: 4500 // 2,
91+
torch.int8: 9000 // 2,
92+
},
93+
),
7994
DeviceLimit(
8095
"A100",
8196
"https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf",

0 commit comments

Comments
 (0)