Skip to content

Commit d9ee5ab

Browse files
authored
Merge pull request #5344 from Akshay-Venkatesh/topic/update-cuda-ipc-iface
UCT/CUDA: Update cuda-ipc bandwidth for Ampere
2 parents 41c8475 + a2f46a5 commit d9ee5ab

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

src/uct/cuda/base/cuda_iface.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,9 @@
7676

7777

7878
typedef enum uct_cuda_base_gen {
79-
UCT_CUDA_BASE_GEN_PASCAL = 6,
80-
UCT_CUDA_BASE_GEN_VOLTA = 7
79+
UCT_CUDA_BASE_GEN_P100 = 6,
80+
UCT_CUDA_BASE_GEN_V100 = 7,
81+
UCT_CUDA_BASE_GEN_A100 = 8
8182
} uct_cuda_base_gen_t;
8283

8384

src/uct/cuda/cuda_ipc/cuda_ipc_iface.c

+9-3
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,18 @@ static double uct_cuda_ipc_iface_get_bw()
100100
return 0;
101101
}
102102

103-
/* TODO: Detect nvswitch */
103+
/*
104+
* TODO: Detect nvswitch
105+
* TODO: Not reporting peak unidirectional bandwidth to avoid dropping other
106+
* transports like cma/knem/ib in rma_bw_lanes
107+
*/
104108
switch (major_version) {
105-
case UCT_CUDA_BASE_GEN_PASCAL:
109+
case UCT_CUDA_BASE_GEN_P100:
106110
return 20000.0 * UCS_MBYTE;
107-
case UCT_CUDA_BASE_GEN_VOLTA:
111+
case UCT_CUDA_BASE_GEN_V100:
108112
return 25000.0 * UCS_MBYTE;
113+
case UCT_CUDA_BASE_GEN_A100:
114+
return 30000.0 * UCS_MBYTE;
109115
default:
110116
return 6911.0 * UCS_MBYTE;
111117
}

0 commit comments

Comments
 (0)