Skip to content

Commit ad36e40

Browse files
committed
feat: H20 support
Signed-off-by: LyleLuo <[email protected]>
1 parent d11c0cf commit ad36e40

File tree

10 files changed

+57
-0
lines changed

10 files changed

+57
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:babed36b408253a4ba9ea5031e9314f5cea2f7dd0b849f475d758bf36a45c9fa
3+
size 35171
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:4f0fe8ca8f79fd0b68d567b9cd1576599bb9c53eb5239a9161ca7c6efc56444a
3+
size 6374897
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:abdc05a5446f6f6aca47ab284237e1556cc0543469a5d18e74cb67b2405ec73e
3+
size 459376
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:ffc0b4cd20fd98e79b9d455f3f4a5f7322fec8b8f68ff0ed62a9040afb6eefe6
3+
size 5787
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:29ae0a32a2e37223971db4dc7bb93212787e520117ceea4b1751b0ee58e5ff48
3+
size 9486670
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:12cacb85ed0375d42a95af8a7ee5679e7071cc27dcabd8df81bba676b89f6e8c
3+
size 4543889
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:1824c4fa7a9dfdefa0d63d714081a1ad49bff81b65b064f739697b9bc518ee36
3+
size 993378
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c843b2e7088a9db80f4a054215456861056731fd02bb6193004ef00d3e6ec660
3+
size 136602
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:358c2b6ed8e4e1a47be248a582d8a7e61dd16c8f8a3c4fb76e34714cfdb98ff1
3+
size 667000
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
data_dir: data/h20_3e # relative to systems_dir
5+
gpu:
6+
mem_bw: 4917000000000 # 4917GB/s
7+
mem_bw_empirical_scaling_factor: 0.8 # some nonofficial correction based on observations, you should try to modify based on your own observations
8+
mem_empirical_constant_latency: 0.000003 # 3us some nonofficial correction based on observations, you should try to modify based on your own observations
9+
mem_capacity: 151397597184 # 141GiB
10+
float16_tc_flops: 148000000000000 # 148TFLOPS
11+
int8_tc_flops: 296000000000000 # 296TFLOPS
12+
fp8_tc_flops: 296000000000000 # 296TFLOPS
13+
power: 500 # Watt
14+
sm_version: 90
15+
16+
node:
17+
num_gpus_per_node: 8
18+
inter_node_bw: 25000000000 # Byte/s per GPU, single direction, 1:1 CX7 per node
19+
intra_node_bw: 450000000000 # Byte/s per gpu, single direction
20+
pcie_bw: 64000000000 # Byte/s, single direction, pcie 5.0
21+
p2p_latency: 0.00001 # 10us some nonofficial correction based on observations, you should try to modify based on your own observations
22+
23+
misc:
24+
nccl_mem: # some nonofficial correction based on observations, you should try to modify based on your own observations
25+
1: 0
26+
2: 358612992 # 342MB
27+
4: 411041792 # 392MB
28+
8: 411041792 # 392MB
29+
other_mem: 3758096384 # increase from 551MB to 3.5GB for safer deployment, this will cover part of the inaccurate mem calc.
30+
nccl_version: '2.27.3'

0 commit comments

Comments
 (0)