Skip to content

Commit 47a5c47

Browse files
committed
UCT/IB/EFA: Add EFA MD as an IB module with UD support
1 parent 55290bb commit 47a5c47

36 files changed

+656
-136
lines changed

configure.ac

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ AS_IF([test "x$with_docs_only" = xyes],
185185
AM_CONDITIONAL([HAVE_STATS], [false])
186186
AM_CONDITIONAL([HAVE_TUNING], [false])
187187
AM_CONDITIONAL([HAVE_IB], [false])
188+
AM_CONDITIONAL([HAVE_EFA], [false])
188189
AM_CONDITIONAL([HAVE_MLX5_HW_UD], [false])
189190
AM_CONDITIONAL([HAVE_MLX5_DV], [false])
190191
AM_CONDITIONAL([HAVE_MLX5_MMO], [false])

contrib/buildrpm.sh

+1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ if [ $opt_binrpm -eq 1 ]; then
116116
with_args+=" $(with_arg fuse)"
117117
with_args+=" $(with_arg mad)"
118118
with_args+=" $(with_arg mlx5)"
119+
with_args+=" $(with_arg efa)"
119120

120121
echo rpmbuild -bb $rpmmacros $rpmopts $rpmspec $defines $with_args | bash -eEx
121122
fi

src/ucp/am/eager_single.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ ucp_am_eager_short_probe_common(const ucp_proto_init_params_t *init_params,
100100
.super.cfg_priority = 0,
101101
.super.min_length = 0,
102102
.super.max_length = SIZE_MAX,
103-
.super.min_iov = 0,
103+
.super.min_iov = 3,
104104
.super.min_frag_offs = UCP_PROTO_COMMON_OFFSET_INVALID,
105105
.super.max_frag_offs = ucs_offsetof(uct_iface_attr_t, cap.am.max_short),
106-
.super.max_iov_offs = UCP_PROTO_COMMON_OFFSET_INVALID,
106+
.super.max_iov_offs = ucs_offsetof(uct_iface_attr_t, cap.am.max_iov),
107107
.super.hdr_size = ucp_am_eager_single_hdr_size(op_id),
108108
.super.send_op = UCT_EP_OP_AM_SHORT,
109109
.super.memtype_op = UCT_EP_OP_LAST,

src/ucp/core/ucp_am.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ ucp_am_send_short(ucp_ep_h ep, uint16_t id, uint16_t flags, const void *header,
479479
int is_reply)
480480
{
481481
size_t iov_cnt = 0ul;
482-
uct_iov_t iov[4];
482+
uct_iov_t iov[UCP_AM_SEND_SHORT_MIN_IOV];
483483
uint8_t am_id;
484484
ucp_am_hdr_t am_hdr;
485485
ucp_am_reply_ftr_t ftr;

src/ucp/core/ucp_am.h

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
((ucp_am_hdr_t*)&(_rts)->hdr); \
2020
})
2121

22+
/*
23+
* Apart from protov1/v2, UCP can try to send specified number of iovs in one
24+
* uct_ep_am_short_iov() call
25+
*/
26+
#define UCP_AM_SEND_SHORT_MIN_IOV 4
2227

2328
enum {
2429
UCP_AM_CB_PRIV_FIRST_FLAG = UCS_BIT(15),

src/ucp/core/ucp_context.c

+67-49
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,10 @@ static ucs_config_field_t ucp_config_table[] = {
650650
ucs_offsetof(ucp_config_t, ctx),
651651
UCS_CONFIG_TYPE_TABLE(ucp_context_config_table)},
652652

653+
{"MAX_COMPONENT_MDS", "16",
654+
"Maximum number of memory domains per component to use.",
655+
ucs_offsetof(ucp_config_t, max_component_mds), UCS_CONFIG_TYPE_ULUNITS},
656+
653657
{NULL}
654658
};
655659
UCS_CONFIG_DECLARE_TABLE(ucp_config_table, "UCP context", NULL, ucp_config_t)
@@ -1561,6 +1565,7 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
15611565
const ucs_string_set_t *aux_tls)
15621566
{
15631567
const ucp_tl_cmpt_t *tl_cmpt = &context->tl_cmpts[cmpt_index];
1568+
size_t avail_mds = config->max_component_mds;
15641569
uct_component_attr_t uct_component_attr;
15651570
unsigned num_tl_resources;
15661571
ucs_status_t status;
@@ -1572,7 +1577,8 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
15721577
const uct_md_attr_v2_t *md_attr;
15731578

15741579
/* List memory domain resources */
1575-
uct_component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES;
1580+
uct_component_attr.field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES |
1581+
UCT_COMPONENT_ATTR_FIELD_NAME;
15761582
uct_component_attr.md_resources =
15771583
ucs_alloca(tl_cmpt->attr.md_resource_count *
15781584
sizeof(*uct_component_attr.md_resources));
@@ -1584,6 +1590,14 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
15841590
/* Open all memory domains */
15851591
mem_type_mask = UCS_BIT(UCS_MEMORY_TYPE_HOST);
15861592
for (i = 0; i < tl_cmpt->attr.md_resource_count; ++i) {
1593+
if (avail_mds == 0) {
1594+
ucs_debug("only first %zu domains kept for component %s with %u "
1595+
"memory domains resources",
1596+
config->max_component_mds, uct_component_attr.name,
1597+
tl_cmpt->attr.md_resource_count);
1598+
break;
1599+
}
1600+
15871601
md_index = context->num_mds;
15881602
md_attr = &context->tl_mds[md_index].attr;
15891603

@@ -1603,67 +1617,71 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
16031617
goto out;
16041618
}
16051619

1606-
if (num_tl_resources > 0) {
1607-
/* List of memory type MDs */
1608-
mem_type_bitmap = md_attr->detect_mem_types;
1609-
if (~mem_type_mask & mem_type_bitmap) {
1610-
context->mem_type_detect_mds[context->num_mem_type_detect_mds] = md_index;
1611-
++context->num_mem_type_detect_mds;
1612-
mem_type_mask |= mem_type_bitmap;
1613-
}
1620+
if (num_tl_resources == 0) {
1621+
/* If the MD does not have transport resources (device or sockaddr),
1622+
* don't use it */
1623+
ucs_debug("closing md %s because it has no selected transport resources",
1624+
context->tl_mds[md_index].rsc.md_name);
1625+
uct_md_close(context->tl_mds[md_index].md);
1626+
continue;
1627+
}
16141628

1615-
ucs_memory_type_for_each(mem_type) {
1616-
if (md_attr->flags & UCT_MD_FLAG_REG) {
1617-
if ((context->config.ext.reg_nb_mem_types & UCS_BIT(mem_type)) &&
1618-
!(md_attr->reg_nonblock_mem_types & UCS_BIT(mem_type))) {
1619-
if (md_attr->reg_mem_types & UCS_BIT(mem_type)) {
1620-
/* Keep map of MDs supporting blocking registration
1621-
* if non-blocking registration is requested for the
1622-
* given memory type. In some cases blocking
1623-
* registration maybe required anyway (e.g. internal
1624-
* staging buffers for rndv pipeline protocols). */
1625-
context->reg_block_md_map[mem_type] |= UCS_BIT(md_index);
1626-
}
1627-
continue;
1628-
}
1629+
avail_mds--;
1630+
1631+
/* List of memory type MDs */
1632+
mem_type_bitmap = md_attr->detect_mem_types;
1633+
if (~mem_type_mask & mem_type_bitmap) {
1634+
context->mem_type_detect_mds[context->num_mem_type_detect_mds] = md_index;
1635+
++context->num_mem_type_detect_mds;
1636+
mem_type_mask |= mem_type_bitmap;
1637+
}
16291638

1639+
ucs_memory_type_for_each(mem_type) {
1640+
if (md_attr->flags & UCT_MD_FLAG_REG) {
1641+
if ((context->config.ext.reg_nb_mem_types & UCS_BIT(mem_type)) &&
1642+
!(md_attr->reg_nonblock_mem_types & UCS_BIT(mem_type))) {
16301643
if (md_attr->reg_mem_types & UCS_BIT(mem_type)) {
1631-
context->reg_md_map[mem_type] |= UCS_BIT(md_index);
1644+
/* Keep map of MDs supporting blocking registration
1645+
* if non-blocking registration is requested for the
1646+
* given memory type. In some cases blocking
1647+
* registration maybe required anyway (e.g. internal
1648+
* staging buffers for rndv pipeline protocols). */
1649+
context->reg_block_md_map[mem_type] |= UCS_BIT(md_index);
16321650
}
1651+
continue;
1652+
}
16331653

1634-
if (md_attr->cache_mem_types & UCS_BIT(mem_type)) {
1635-
context->cache_md_map[mem_type] |= UCS_BIT(md_index);
1636-
}
1654+
if (md_attr->reg_mem_types & UCS_BIT(mem_type)) {
1655+
context->reg_md_map[mem_type] |= UCS_BIT(md_index);
1656+
}
16371657

1638-
if ((context->config.ext.gva_enable != UCS_CONFIG_OFF) &&
1639-
(md_attr->gva_mem_types & UCS_BIT(mem_type))) {
1640-
context->gva_md_map[mem_type] |= UCS_BIT(md_index);
1641-
}
1658+
if (md_attr->cache_mem_types & UCS_BIT(mem_type)) {
1659+
context->cache_md_map[mem_type] |= UCS_BIT(md_index);
16421660
}
1643-
}
16441661

1645-
if (md_attr->flags & UCT_MD_FLAG_EXPORTED_MKEY) {
1646-
context->export_md_map |= UCS_BIT(md_index);
1662+
if ((context->config.ext.gva_enable != UCS_CONFIG_OFF) &&
1663+
(md_attr->gva_mem_types & UCS_BIT(mem_type))) {
1664+
context->gva_md_map[mem_type] |= UCS_BIT(md_index);
1665+
}
16471666
}
1667+
}
16481668

1649-
if (md_attr->flags & UCT_MD_FLAG_REG_DMABUF) {
1650-
context->dmabuf_reg_md_map |= UCS_BIT(md_index);
1651-
}
1669+
if (md_attr->flags & UCT_MD_FLAG_EXPORTED_MKEY) {
1670+
context->export_md_map |= UCS_BIT(md_index);
1671+
}
16521672

1653-
ucs_for_each_bit(mem_type, md_attr->dmabuf_mem_types) {
1654-
/* In case of multiple providers, take the first one */
1655-
if (context->dmabuf_mds[mem_type] == UCP_NULL_RESOURCE) {
1656-
context->dmabuf_mds[mem_type] = md_index;
1657-
}
1673+
if (md_attr->flags & UCT_MD_FLAG_REG_DMABUF) {
1674+
context->dmabuf_reg_md_map |= UCS_BIT(md_index);
1675+
}
1676+
1677+
ucs_for_each_bit(mem_type, md_attr->dmabuf_mem_types) {
1678+
/* In case of multiple providers, take the first one */
1679+
if (context->dmabuf_mds[mem_type] == UCP_NULL_RESOURCE) {
1680+
context->dmabuf_mds[mem_type] = md_index;
16581681
}
1659-
++context->num_mds;
1660-
} else {
1661-
/* If the MD does not have transport resources (device or sockaddr),
1662-
* don't use it */
1663-
ucs_debug("closing md %s because it has no selected transport resources",
1664-
context->tl_mds[md_index].rsc.md_name);
1665-
uct_md_close(context->tl_mds[md_index].md);
16661682
}
1683+
1684+
++context->num_mds;
16671685
}
16681686

16691687
context->mem_type_mask |= mem_type_mask;

src/ucp/core/ucp_context.h

+2
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ struct ucp_config {
240240
ucs_list_link_t cached_key_list;
241241
/** This config environment prefix */
242242
char *env_prefix;
243+
/** Maximum number of memory domains to use per component **/
244+
size_t max_component_mds;
243245
};
244246

245247

src/ucp/core/ucp_ep.c

+10-7
Original file line numberDiff line numberDiff line change
@@ -2872,9 +2872,11 @@ ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config,
28722872
iface_attr->cap.am.max_short, sizeof(ucp_am_hdr_t),
28732873
config->am.zcopy_thresh[0], &config->rndv.am_thresh);
28742874

2875-
ucp_ep_config_set_memtype_thresh(&config->am_u.max_eager_short,
2876-
am_max_eager_short,
2877-
context->num_mem_type_detect_mds);
2875+
if (iface_attr->cap.am.max_iov >= UCP_AM_SEND_SHORT_MIN_IOV) {
2876+
ucp_ep_config_set_memtype_thresh(
2877+
&config->am_u.max_eager_short, am_max_eager_short,
2878+
context->num_mem_type_detect_mds);
2879+
}
28782880

28792881
/* All keys must fit in RNDV packet.
28802882
* TODO remove some MDs if they don't
@@ -2907,9 +2909,11 @@ ucs_status_t ucp_ep_config_init(ucp_worker_h worker, ucp_ep_config_t *config,
29072909
sizeof(ucp_am_hdr_t) + sizeof(ucp_am_reply_ftr_t),
29082910
config->am.zcopy_thresh[0], &config->rndv.am_thresh);
29092911

2910-
ucp_ep_config_set_memtype_thresh(&config->am_u.max_reply_eager_short,
2911-
am_max_eager_short,
2912-
context->num_mem_type_detect_mds);
2912+
if (iface_attr->cap.am.max_iov >= UCP_AM_SEND_SHORT_MIN_IOV) {
2913+
ucp_ep_config_set_memtype_thresh(
2914+
&config->am_u.max_reply_eager_short, am_max_eager_short,
2915+
context->num_mem_type_detect_mds);
2916+
}
29132917
} else {
29142918
/* Stub endpoint */
29152919
config->am.max_bcopy = UCP_MIN_BCOPY;
@@ -3889,7 +3893,6 @@ static void ucp_ep_config_proto_init(ucp_worker_h worker,
38893893
{
38903894
ucp_ep_config_t *ep_config = ucp_worker_ep_config(worker, cfg_index);
38913895
ucp_ep_config_key_t *key = &ep_config->key;
3892-
38933896
ucp_memtype_thresh_t *tag_max_short;
38943897
ucp_lane_index_t tag_exp_lane;
38953898
unsigned tag_proto_flags;

src/uct/base/uct_iface.h

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
* is extended by a system namespace information */
3434
#define UCT_IFACE_LOCAL_ADDR_FLAG_NS UCS_BIT(63)
3535

36-
3736
enum {
3837
UCT_EP_STAT_AM,
3938
UCT_EP_STAT_PUT,

src/uct/ib/Makefile.am

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
if HAVE_IB
77

8-
SUBDIRS = . mlx5 rdmacm
8+
SUBDIRS = . mlx5 efa rdmacm
99

1010
module_LTLIBRARIES = libuct_ib.la
1111
libuct_ib_la_CPPFLAGS = $(BASE_CPPFLAGS) $(IBVERBS_CPPFLAGS)

src/uct/ib/base/ib_device.c

+5
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@ ucs_status_t uct_ib_device_init(uct_ib_device_t *dev,
569569

570570
dev->async_events = async_events;
571571

572+
if (!dev->req_notify_cq_support) {
573+
ucs_trace("%s does not support async event handling",
574+
uct_ib_device_name(dev));
575+
}
576+
572577
uct_ib_device_get_locality(ibv_get_device_name(ibv_device),
573578
&dev->local_cpus);
574579

src/uct/ib/base/ib_device.h

+7
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,13 @@ typedef struct uct_ib_device {
238238
uint8_t pci_cswap_arg_sizes;
239239
uint8_t atomic_align;
240240
uint8_t lag_level;
241+
uint8_t req_notify_cq_support; /* Also indicates
242+
IBV_SEND_SOLICITED
243+
support */
244+
uint8_t ordered_send_comp;
245+
uint64_t mr_access_flags;
246+
uint32_t max_inline_data;
247+
241248
/* AH hash */
242249
khash_t(uct_ib_ah) ah_hash;
243250
ucs_recursive_spinlock_t ah_lock;

0 commit comments

Comments
 (0)