Skip to content

Commit 1c1b1bd

Browse files
authored
Merge pull request #10401 from rakhmets/topic/proto-consider-rndv-perf-diff
UCP/PROTO: Consider RNDV_PERF_DIFF
2 parents 969ed1d + 8d06777 commit 1c1b1bd

File tree

4 files changed

+99
-18
lines changed

4 files changed

+99
-18
lines changed

Diff for: src/ucp/proto/proto_perf.c

+54-12
Original file line numberDiff line numberDiff line change
@@ -156,26 +156,42 @@ static ucs_status_t ucp_proto_perf_segment_split(const ucp_proto_perf_t *perf,
156156
return UCS_OK;
157157
}
158158

159+
static void ucp_proto_perf_node_update_factor(ucp_proto_perf_node_t *perf_node,
160+
const char *perf_factor_name,
161+
ucs_linear_func_t perf_factor)
162+
{
163+
if (ucs_linear_func_is_zero(perf_factor, UCP_PROTO_PERF_EPSILON)) {
164+
return;
165+
}
166+
167+
ucp_proto_perf_node_update_data(perf_node, perf_factor_name, perf_factor);
168+
}
169+
159170
static void
160171
ucp_proto_perf_node_update_factors(ucp_proto_perf_node_t *perf_node,
161172
const ucp_proto_perf_factors_t perf_factors)
162173
{
163174
ucp_proto_perf_factor_id_t factor_id;
164-
ucs_linear_func_t perf_factor;
165175

166176
/* Add the functions to the segment and the performance node */
167177
for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; ++factor_id) {
168-
perf_factor = perf_factors[factor_id];
169-
if (ucs_linear_func_is_zero(perf_factor, UCP_PROTO_PERF_EPSILON)) {
170-
continue;
171-
}
172-
173-
ucp_proto_perf_node_update_data(perf_node,
174-
ucp_proto_perf_factor_names[factor_id],
175-
perf_factors[factor_id]);
178+
ucp_proto_perf_node_update_factor(perf_node,
179+
ucp_proto_perf_factor_names[factor_id],
180+
perf_factors[factor_id]);
176181
}
177182
}
178183

184+
static void
185+
ucp_proto_perf_segment_update_factor(ucp_proto_perf_segment_t *seg,
186+
ucp_proto_perf_factor_id_t factor_id,
187+
ucs_linear_func_t perf_factor)
188+
{
189+
seg->perf_factors[factor_id] = perf_factor;
190+
ucp_proto_perf_node_update_factor(seg->node,
191+
ucp_proto_perf_factor_names[factor_id],
192+
perf_factor);
193+
}
194+
179195
static void
180196
ucp_proto_perf_segment_add_funcs(ucp_proto_perf_t *perf,
181197
ucp_proto_perf_segment_t *seg,
@@ -190,11 +206,12 @@ ucp_proto_perf_segment_add_funcs(ucp_proto_perf_t *perf,
190206

191207
/* Add the functions to the segment and the performance node */
192208
for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST; ++factor_id) {
193-
ucs_linear_func_add_inplace(&seg->perf_factors[factor_id],
194-
perf_factors[factor_id]);
209+
ucp_proto_perf_segment_update_factor(
210+
seg, factor_id,
211+
ucs_linear_func_add(seg->perf_factors[factor_id],
212+
perf_factors[factor_id]));
195213
}
196214

197-
ucp_proto_perf_node_update_factors(seg->node, seg->perf_factors);
198215
ucp_proto_perf_node_add_child(seg->node, perf_node);
199216
}
200217

@@ -430,6 +447,31 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name,
430447
return ucp_proto_perf_aggregate(name, perf_elems, 2, perf_p);
431448
}
432449

450+
void ucp_proto_perf_apply_func(ucp_proto_perf_t *perf, ucs_linear_func_t func,
451+
const char *name, const char *desc_fmt, ...)
452+
{
453+
ucp_proto_perf_segment_t *seg;
454+
ucp_proto_perf_factor_id_t factor_id;
455+
va_list ap;
456+
ucp_proto_perf_node_t *func_node;
457+
458+
ucp_proto_perf_segment_foreach(seg, perf) {
459+
for (factor_id = 0; factor_id < UCP_PROTO_PERF_FACTOR_LAST;
460+
++factor_id) {
461+
ucp_proto_perf_segment_update_factor(
462+
seg, factor_id,
463+
ucs_linear_func_compose(func,
464+
seg->perf_factors[factor_id]));
465+
}
466+
467+
va_start(ap, desc_fmt);
468+
func_node = ucp_proto_perf_node_new_data(name, desc_fmt, ap);
469+
va_end(ap);
470+
471+
ucp_proto_perf_node_own_child(seg->node, &func_node);
472+
}
473+
}
474+
433475
/* TODO:
434476
* Reconsider correctness of PPLN perf estimation logic since in case of async
435477
* operations it seems wrong to choose the longest factor without paying

Diff for: src/ucp/proto/proto_perf.h

+15
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,21 @@ ucs_status_t ucp_proto_perf_aggregate2(const char *name,
159159
ucp_proto_perf_t **perf_p);
160160

161161

162+
/**
163+
* Apply function to the performance factors of the given performance structure.
164+
*
165+
* @param [in] perf Performance data structure to update.
166+
* @param [in] func Function to apply to the performance factors of the
167+
* @a perf performance structure.
168+
* @param [in] name Name for the performance node that would be created
169+
* to represent the impact of @a func.
170+
* @param [in] desc_fmt Formatted description for the performance node that
171+
* would be created to represent the impact of @a func.
172+
*/
173+
void ucp_proto_perf_apply_func(ucp_proto_perf_t *perf, ucs_linear_func_t func,
174+
const char *name, const char *desc_fmt, ...);
175+
176+
162177
/**
163178
* Expand given perf by estimation that all messages on interval
164179
* [end of @a frag_seg + 1, @a max_length] would be sent in a pipeline async

Diff for: src/ucp/rndv/proto_rndv.c

+7
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,13 @@ static void ucp_proto_rndv_ctrl_variant_probe(
403403
cfg_thresh = remote_proto->cfg_thresh;
404404
}
405405

406+
if (fabs(params->perf_bias) > UCP_PROTO_PERF_EPSILON) {
407+
ucp_proto_perf_apply_func(perf,
408+
ucs_linear_func_make(0.0,
409+
1.0 - params->perf_bias),
410+
"bias", "%.2f %%", params->perf_bias);
411+
}
412+
406413
ucp_proto_select_add_proto(&params->super.super, cfg_thresh, cfg_priority,
407414
perf, rpriv, priv_size);
408415

Diff for: test/gtest/ucp/test_ucp_proto_mock.cc

+23-6
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,31 @@ class test_ucp_proto_mock_rcx : public test_ucp_proto_mock {
366366
}
367367
};
368368

369-
UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane,
370-
"IB_NUM_PATHS?=1", "MAX_RNDV_LANES=1")
369+
UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane, "IB_NUM_PATHS?=1",
370+
"MAX_RNDV_LANES=1")
371371
{
372372
ucp_proto_select_key_t key = any_key();
373373
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
374374
key.param.op_attr = 0;
375375

376376
/* Prefer mock_0:1 iface for RNDV because it has larger BW */
377+
check_ep_config(sender(), {
378+
{0, 200, "short", "rc_mlx5/mock_1:1"},
379+
{201, 6650, "copy-in", "rc_mlx5/mock_1:1"},
380+
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1"},
381+
{8247, 21991, "multi-frag zero-copy", "rc_mlx5/mock_1:1"},
382+
{21992, INF, "rendezvous zero-copy read from remote",
383+
"rc_mlx5/mock_0:1"},
384+
}, key);
385+
}
386+
387+
UCS_TEST_P(test_ucp_proto_mock_rcx, zero_rndv_perf_diff, "IB_NUM_PATHS?=1",
388+
"MAX_RNDV_LANES=1", "RNDV_PERF_DIFF=0")
389+
{
390+
ucp_proto_select_key_t key = any_key();
391+
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
392+
key.param.op_attr = 0;
393+
377394
check_ep_config(sender(), {
378395
{0, 200, "short", "rc_mlx5/mock_1:1"},
379396
{201, 6650, "copy-in", "rc_mlx5/mock_1:1"},
@@ -384,8 +401,8 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_1_lane,
384401
}, key);
385402
}
386403

387-
UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes,
388-
"IB_NUM_PATHS?=2", "MAX_RNDV_LANES=2")
404+
UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes, "IB_NUM_PATHS?=2",
405+
"MAX_RNDV_LANES=2")
389406
{
390407
ucp_proto_select_key_t key = any_key();
391408
key.param.op_id_flags = UCP_OP_ID_AM_SEND;
@@ -396,8 +413,8 @@ UCS_TEST_P(test_ucp_proto_mock_rcx, rndv_2_lanes,
396413
{0, 200, "short", "rc_mlx5/mock_1:1/path0"},
397414
{201, 6650, "copy-in", "rc_mlx5/mock_1:1/path0"},
398415
{6651, 8246, "zero-copy", "rc_mlx5/mock_1:1/path0"},
399-
{8247, 20300, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"},
400-
{20301, INF, "rendezvous zero-copy read from remote",
416+
{8247, 19883, "multi-frag zero-copy", "rc_mlx5/mock_1:1/path0"},
417+
{19884, INF, "rendezvous zero-copy read from remote",
401418
"47% on rc_mlx5/mock_1:1/path0 and 53% on rc_mlx5/mock_0:1/path0"},
402419
}, key);
403420
}

0 commit comments

Comments
 (0)