Skip to content

Commit 9a56917

Browse files
authored
Merge pull request #10539 from iyastreb/ucp/proto/lane-selection-mini
UCP/PROTO: Minimal version of protocol lane selection
2 parents a5ade35 + 36a7447 commit 9a56917

File tree

15 files changed

+393
-116
lines changed

15 files changed

+393
-116
lines changed

Diff for: src/ucp/proto/proto_common.c

+6-50
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,6 @@ int ucp_proto_common_init_check_err_handling(
4848
UCP_ERR_HANDLING_MODE_NONE);
4949
}
5050

51-
ucp_rsc_index_t
52-
ucp_proto_common_get_rsc_index(const ucp_proto_init_params_t *params,
53-
ucp_lane_index_t lane)
54-
{
55-
ucs_assert(lane < UCP_MAX_LANES);
56-
return params->ep_config_key->lanes[lane].rsc_index;
57-
}
58-
5951
static size_t
6052
ucp_proto_common_get_seg_size(const ucp_proto_common_init_params_t *params,
6153
ucp_lane_index_t lane)
@@ -372,6 +364,7 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
372364
UCT_PERF_ATTR_FIELD_SEND_POST_OVERHEAD |
373365
UCT_PERF_ATTR_FIELD_RECV_OVERHEAD |
374366
UCT_PERF_ATTR_FIELD_BANDWIDTH |
367+
UCT_PERF_ATTR_FIELD_PATH_BANDWIDTH |
375368
UCT_PERF_ATTR_FIELD_LATENCY;
376369
perf_attr.operation = params->send_op;
377370

@@ -385,6 +378,9 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
385378
tl_perf->recv_overhead = perf_attr.recv_overhead + params->overhead;
386379
tl_perf->bandwidth = ucp_tl_iface_bandwidth(context,
387380
&perf_attr.bandwidth);
381+
tl_perf->path_ratio = ucp_tl_iface_bandwidth(context,
382+
&perf_attr.path_bandwidth) /
383+
tl_perf->bandwidth;
388384
tl_perf->latency = ucp_tl_iface_latency(context,
389385
&perf_attr.latency) +
390386
params->latency;
@@ -456,41 +452,6 @@ ucp_proto_common_get_lane_perf(const ucp_proto_common_init_params_t *params,
456452
return status;
457453
}
458454

459-
/*
460-
* TODO: This is a quickfix, needed to select lanes for multi-lane RNDV
461-
* protocol in the order of rma_bw_lanes (RMA_BW lanes sorted by score).
462-
* The proper solution is to have a generic mechanism to sort lanes based on
463-
* the calculated performance, implemented in proto_multi.
464-
* This function should be removed once the proper solution is implemented.
465-
*/
466-
static inline ucp_lane_index_t
467-
ucp_proto_common_lanes_iter(const ucp_ep_config_key_t *ep_config_key,
468-
ucp_lane_map_t lane_map, ucp_lane_type_t lane_type,
469-
ucp_lane_index_t start, ucp_lane_index_t *lane)
470-
{
471-
if (start >= UCP_MAX_LANES) {
472-
return UCP_MAX_LANES;
473-
}
474-
475-
if (lane_type == UCP_LANE_TYPE_RMA_BW) {
476-
for (; start < ep_config_key->num_lanes; ++start) {
477-
*lane = ep_config_key->rma_bw_lanes[start];
478-
if ((*lane == UCP_NULL_LANE) || (lane_map & UCS_BIT(*lane))) {
479-
break;
480-
}
481-
}
482-
return start;
483-
}
484-
485-
/*
486-
* By default iterate over all lanes in lane_map
487-
* Reset lane_map bits below start position, then find first bit set
488-
*/
489-
lane_map &= ~((1ULL << start) - 1);
490-
*lane = ucs_ffs64_safe(lane_map);
491-
return *lane;
492-
}
493-
494455
ucp_lane_index_t
495456
ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
496457
unsigned flags, ptrdiff_t max_iov_offs,
@@ -505,7 +466,7 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
505466
const ucp_rkey_config_key_t *rkey_config_key = params->rkey_config_key;
506467
const ucp_proto_select_param_t *select_param = params->select_param;
507468
const uct_iface_attr_t *iface_attr;
508-
ucp_lane_index_t lane, num_lanes, i;
469+
ucp_lane_index_t lane, num_lanes;
509470
const uct_md_attr_v2_t *md_attr;
510471
const uct_component_attr_t *cmpt_attr;
511472
ucp_rsc_index_t rsc_index;
@@ -538,12 +499,7 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
538499
}
539500

540501
lane_map = UCS_MASK(ep_config_key->num_lanes) & ~exclude_map;
541-
lane = 0;
542-
for (i = ucp_proto_common_lanes_iter(ep_config_key, lane_map, lane_type,
543-
0, &lane);
544-
(i < ep_config_key->num_lanes) && (lane != UCP_NULL_LANE);
545-
i = ucp_proto_common_lanes_iter(ep_config_key, lane_map, lane_type,
546-
i + 1, &lane)) {
502+
ucs_for_each_bit(lane, lane_map) {
547503
if (num_lanes >= max_lanes) {
548504
break;
549505
}

Diff for: src/ucp/proto/proto_common.h

+25-5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,20 @@
2626
#define UCP_PROTO_MULTI_FRAG_DESC "multi-frag"
2727

2828

29+
#define UCP_PROTO_LANE_FMT \
30+
"lane[%d] " UCT_TL_RESOURCE_DESC_FMT " bw " UCP_PROTO_PERF_FUNC_BW_FMT \
31+
UCP_PROTO_TIME_FMT(latency)
32+
33+
34+
#define UCP_PROTO_LANE_ARG(_params, _lane, _lane_perf) \
35+
(_lane), \
36+
UCT_TL_RESOURCE_DESC_ARG( \
37+
&(_params)->worker->context->tl_rscs[ \
38+
ucp_proto_common_get_rsc_index(_params, _lane)].tl_rsc), \
39+
(_lane_perf)->bandwidth / UCS_MBYTE, \
40+
UCP_PROTO_TIME_ARG((_lane_perf)->latency)
41+
42+
2943
typedef enum {
3044
/* Send buffer is used by zero-copy operations */
3145
UCP_PROTO_COMMON_INIT_FLAG_SEND_ZCOPY = UCS_BIT(0),
@@ -148,6 +162,9 @@ typedef struct {
148162
/* Transport bandwidth (without protocol memory copies) */
149163
double bandwidth;
150164

165+
/* Single path ratio of the full bandwidth */
166+
double path_ratio;
167+
151168
/* Network latency */
152169
double latency;
153170

@@ -162,6 +179,14 @@ typedef struct {
162179
} ucp_proto_common_tl_perf_t;
163180

164181

182+
typedef struct {
183+
ucp_lane_map_t lane_map;
184+
ucp_lane_index_t lanes[UCP_PROTO_MAX_LANES];
185+
ucp_lane_index_t num_lanes;
186+
uint8_t dev_count[UCP_MAX_RESOURCES];
187+
} ucp_proto_lane_selection_t;
188+
189+
165190
/* Private data per lane */
166191
typedef struct {
167192
ucp_lane_index_t lane; /* Lane index in the endpoint */
@@ -211,11 +236,6 @@ ucp_memory_info_t ucp_proto_common_select_param_mem_info(
211236
int ucp_proto_common_init_check_err_handling(
212237
const ucp_proto_common_init_params_t *init_params);
213238

214-
215-
ucp_rsc_index_t
216-
ucp_proto_common_get_rsc_index(const ucp_proto_init_params_t *params,
217-
ucp_lane_index_t lane);
218-
219239
void ucp_proto_common_lane_priv_init(const ucp_proto_common_init_params_t *params,
220240
ucp_md_map_t md_map, ucp_lane_index_t lane,
221241
ucp_proto_common_lane_priv_t *lane_priv);

Diff for: src/ucp/proto/proto_common.inl

+16
Original file line numberDiff line numberDiff line change
@@ -391,4 +391,20 @@ ucp_proto_request_pack_rkey(ucp_request_t *req, ucp_md_map_t md_map,
391391
return packed_rkey_size;
392392
}
393393

394+
static UCS_F_ALWAYS_INLINE ucp_rsc_index_t
395+
ucp_proto_common_get_rsc_index(const ucp_proto_init_params_t *params,
396+
ucp_lane_index_t lane)
397+
{
398+
ucs_assert(lane < UCP_MAX_LANES);
399+
return params->ep_config_key->lanes[lane].rsc_index;
400+
}
401+
402+
static UCS_F_ALWAYS_INLINE ucp_rsc_index_t
403+
ucp_proto_common_get_dev_index(const ucp_proto_init_params_t *params,
404+
ucp_lane_index_t lane)
405+
{
406+
ucp_rsc_index_t rsc_index = ucp_proto_common_get_rsc_index(params, lane);
407+
return params->worker->context->tl_rscs[rsc_index].dev_index;
408+
}
409+
394410
#endif

0 commit comments

Comments
 (0)