We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ff637c1 commit ea2a7d6Copy full SHA for ea2a7d6
autoparallel/compute_estimation.py
@@ -76,6 +76,21 @@ class DeviceLimit:
76
torch.int8: 3958 // 2,
77
},
78
),
79
+ DeviceLimit(
80
+ "B200",
81
+ "https://nvdam.widen.net/s/wwnsxrhm2w/blackwell-datasheet-3384703",
82
+ sm=(10, 0),
83
+ gmem_bandwidth=7.7 * (1024**4),
84
+ gemm_tflops={
85
+ torch.float64: 37,
86
+ # NOTE: NVIDIA gives all numbers "with 2:4 sparsity"
87
+ # but we want the full GEMM numbers
88
+ torch.float32: 2200 // 2,
89
+ torch.float16: 4500 // 2,
90
+ torch.bfloat16: 4500 // 2,
91
+ torch.int8: 9000 // 2,
92
+ },
93
+ ),
94
DeviceLimit(
95
"A100",
96
"https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf",
0 commit comments