@@ -650,6 +650,10 @@ static ucs_config_field_t ucp_config_table[] = {
650
650
ucs_offsetof (ucp_config_t , ctx ),
651
651
UCS_CONFIG_TYPE_TABLE (ucp_context_config_table )},
652
652
653
+ {"MAX_COMPONENT_MDS" , "16" ,
654
+ "Maximum number of memory domains per component to use." ,
655
+ ucs_offsetof (ucp_config_t , max_component_mds ), UCS_CONFIG_TYPE_ULUNITS },
656
+
653
657
{NULL }
654
658
};
655
659
UCS_CONFIG_DECLARE_TABLE (ucp_config_table , "UCP context" , NULL , ucp_config_t )
@@ -1561,6 +1565,7 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
1561
1565
const ucs_string_set_t * aux_tls )
1562
1566
{
1563
1567
const ucp_tl_cmpt_t * tl_cmpt = & context -> tl_cmpts [cmpt_index ];
1568
+ size_t avail_mds = config -> max_component_mds ;
1564
1569
uct_component_attr_t uct_component_attr ;
1565
1570
unsigned num_tl_resources ;
1566
1571
ucs_status_t status ;
@@ -1572,7 +1577,8 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
1572
1577
const uct_md_attr_v2_t * md_attr ;
1573
1578
1574
1579
/* List memory domain resources */
1575
- uct_component_attr .field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES ;
1580
+ uct_component_attr .field_mask = UCT_COMPONENT_ATTR_FIELD_MD_RESOURCES |
1581
+ UCT_COMPONENT_ATTR_FIELD_NAME ;
1576
1582
uct_component_attr .md_resources =
1577
1583
ucs_alloca (tl_cmpt -> attr .md_resource_count *
1578
1584
sizeof (* uct_component_attr .md_resources ));
@@ -1584,6 +1590,14 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
1584
1590
/* Open all memory domains */
1585
1591
mem_type_mask = UCS_BIT (UCS_MEMORY_TYPE_HOST );
1586
1592
for (i = 0 ; i < tl_cmpt -> attr .md_resource_count ; ++ i ) {
1593
+ if (avail_mds == 0 ) {
1594
+ ucs_debug ("only first %zu domains kept for component %s with %u "
1595
+ "memory domains resources" ,
1596
+ config -> max_component_mds , uct_component_attr .name ,
1597
+ tl_cmpt -> attr .md_resource_count );
1598
+ break ;
1599
+ }
1600
+
1587
1601
md_index = context -> num_mds ;
1588
1602
md_attr = & context -> tl_mds [md_index ].attr ;
1589
1603
@@ -1603,67 +1617,71 @@ ucp_add_component_resources(ucp_context_h context, ucp_rsc_index_t cmpt_index,
1603
1617
goto out ;
1604
1618
}
1605
1619
1606
- if (num_tl_resources > 0 ) {
1607
- /* List of memory type MDs */
1608
- mem_type_bitmap = md_attr -> detect_mem_types ;
1609
- if (~ mem_type_mask & mem_type_bitmap ) {
1610
- context -> mem_type_detect_mds [ context -> num_mem_type_detect_mds ] = md_index ;
1611
- ++ context -> num_mem_type_detect_mds ;
1612
- mem_type_mask |= mem_type_bitmap ;
1613
- }
1620
+ if (num_tl_resources == 0 ) {
1621
+ /* If the MD does not have transport resources (device or sockaddr),
1622
+ * don't use it */
1623
+ ucs_debug ( "closing md %s because it has no selected transport resources" ,
1624
+ context -> tl_mds [ md_index ]. rsc . md_name ) ;
1625
+ uct_md_close ( context -> tl_mds [ md_index ]. md ) ;
1626
+ continue ;
1627
+ }
1614
1628
1615
- ucs_memory_type_for_each (mem_type ) {
1616
- if (md_attr -> flags & UCT_MD_FLAG_REG ) {
1617
- if ((context -> config .ext .reg_nb_mem_types & UCS_BIT (mem_type )) &&
1618
- !(md_attr -> reg_nonblock_mem_types & UCS_BIT (mem_type ))) {
1619
- if (md_attr -> reg_mem_types & UCS_BIT (mem_type )) {
1620
- /* Keep map of MDs supporting blocking registration
1621
- * if non-blocking registration is requested for the
1622
- * given memory type. In some cases blocking
1623
- * registration maybe required anyway (e.g. internal
1624
- * staging buffers for rndv pipeline protocols). */
1625
- context -> reg_block_md_map [mem_type ] |= UCS_BIT (md_index );
1626
- }
1627
- continue ;
1628
- }
1629
+ avail_mds -- ;
1630
+
1631
+ /* List of memory type MDs */
1632
+ mem_type_bitmap = md_attr -> detect_mem_types ;
1633
+ if (~mem_type_mask & mem_type_bitmap ) {
1634
+ context -> mem_type_detect_mds [context -> num_mem_type_detect_mds ] = md_index ;
1635
+ ++ context -> num_mem_type_detect_mds ;
1636
+ mem_type_mask |= mem_type_bitmap ;
1637
+ }
1629
1638
1639
+ ucs_memory_type_for_each (mem_type ) {
1640
+ if (md_attr -> flags & UCT_MD_FLAG_REG ) {
1641
+ if ((context -> config .ext .reg_nb_mem_types & UCS_BIT (mem_type )) &&
1642
+ !(md_attr -> reg_nonblock_mem_types & UCS_BIT (mem_type ))) {
1630
1643
if (md_attr -> reg_mem_types & UCS_BIT (mem_type )) {
1631
- context -> reg_md_map [mem_type ] |= UCS_BIT (md_index );
1644
+ /* Keep map of MDs supporting blocking registration
1645
+ * if non-blocking registration is requested for the
1646
+ * given memory type. In some cases blocking
1647
+ * registration maybe required anyway (e.g. internal
1648
+ * staging buffers for rndv pipeline protocols). */
1649
+ context -> reg_block_md_map [mem_type ] |= UCS_BIT (md_index );
1632
1650
}
1651
+ continue ;
1652
+ }
1633
1653
1634
- if (md_attr -> cache_mem_types & UCS_BIT (mem_type )) {
1635
- context -> cache_md_map [mem_type ] |= UCS_BIT (md_index );
1636
- }
1654
+ if (md_attr -> reg_mem_types & UCS_BIT (mem_type )) {
1655
+ context -> reg_md_map [mem_type ] |= UCS_BIT (md_index );
1656
+ }
1637
1657
1638
- if ((context -> config .ext .gva_enable != UCS_CONFIG_OFF ) &&
1639
- (md_attr -> gva_mem_types & UCS_BIT (mem_type ))) {
1640
- context -> gva_md_map [mem_type ] |= UCS_BIT (md_index );
1641
- }
1658
+ if (md_attr -> cache_mem_types & UCS_BIT (mem_type )) {
1659
+ context -> cache_md_map [mem_type ] |= UCS_BIT (md_index );
1642
1660
}
1643
- }
1644
1661
1645
- if (md_attr -> flags & UCT_MD_FLAG_EXPORTED_MKEY ) {
1646
- context -> export_md_map |= UCS_BIT (md_index );
1662
+ if ((context -> config .ext .gva_enable != UCS_CONFIG_OFF ) &&
1663
+ (md_attr -> gva_mem_types & UCS_BIT (mem_type ))) {
1664
+ context -> gva_md_map [mem_type ] |= UCS_BIT (md_index );
1665
+ }
1647
1666
}
1667
+ }
1648
1668
1649
- if (md_attr -> flags & UCT_MD_FLAG_REG_DMABUF ) {
1650
- context -> dmabuf_reg_md_map |= UCS_BIT (md_index );
1651
- }
1669
+ if (md_attr -> flags & UCT_MD_FLAG_EXPORTED_MKEY ) {
1670
+ context -> export_md_map |= UCS_BIT (md_index );
1671
+ }
1652
1672
1653
- ucs_for_each_bit (mem_type , md_attr -> dmabuf_mem_types ) {
1654
- /* In case of multiple providers, take the first one */
1655
- if (context -> dmabuf_mds [mem_type ] == UCP_NULL_RESOURCE ) {
1656
- context -> dmabuf_mds [mem_type ] = md_index ;
1657
- }
1673
+ if (md_attr -> flags & UCT_MD_FLAG_REG_DMABUF ) {
1674
+ context -> dmabuf_reg_md_map |= UCS_BIT (md_index );
1675
+ }
1676
+
1677
+ ucs_for_each_bit (mem_type , md_attr -> dmabuf_mem_types ) {
1678
+ /* In case of multiple providers, take the first one */
1679
+ if (context -> dmabuf_mds [mem_type ] == UCP_NULL_RESOURCE ) {
1680
+ context -> dmabuf_mds [mem_type ] = md_index ;
1658
1681
}
1659
- ++ context -> num_mds ;
1660
- } else {
1661
- /* If the MD does not have transport resources (device or sockaddr),
1662
- * don't use it */
1663
- ucs_debug ("closing md %s because it has no selected transport resources" ,
1664
- context -> tl_mds [md_index ].rsc .md_name );
1665
- uct_md_close (context -> tl_mds [md_index ].md );
1666
1682
}
1683
+
1684
+ ++ context -> num_mds ;
1667
1685
}
1668
1686
1669
1687
context -> mem_type_mask |= mem_type_mask ;
0 commit comments