diff --git a/src/tbbbind/tbb_bind.cpp b/src/tbbbind/tbb_bind.cpp index 143d143344..5c7d2c449a 100644 --- a/src/tbbbind/tbb_bind.cpp +++ b/src/tbbbind/tbb_bind.cpp @@ -1,5 +1,6 @@ /* - Copyright (c) 2019-2024 Intel Corporation + Copyright (c) 2019-2025 Intel Corporation + Copyright (c) 2025 UXL Foundation Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -200,15 +201,12 @@ class system_topology { bool core_types_parsing_broken = core_types_number <= 0; if (!core_types_parsing_broken) { core_types_affinity_masks_list.resize(core_types_number); - int efficiency{-1}; for (int core_type = 0; core_type < core_types_number; ++core_type) { hwloc_cpuset_t& current_mask = core_types_affinity_masks_list[core_type]; current_mask = hwloc_bitmap_alloc(); - if (!hwloc_cpukinds_get_info(topology, core_type, current_mask, &efficiency, nullptr, nullptr, 0) - && efficiency >= 0 - ) { + if (!hwloc_cpukinds_get_info(topology, core_type, current_mask, nullptr, nullptr, nullptr, 0)) { hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); if (hwloc_bitmap_weight(current_mask) > 0) { @@ -221,6 +219,35 @@ class system_topology { } } } + // On hybrid CPUs, check if there are cores without L3 cache. + if (!core_types_parsing_broken && core_types_number > 1) { + // The first core type mask (least performant cores) + hwloc_cpuset_t& front = core_types_affinity_masks_list.front(); + hwloc_cpuset_t lp_mask = hwloc_bitmap_dup(front); + + // Iterate through all L3 cache objects and remove their cores from lp_mask. + hwloc_obj_t l3_package = nullptr; + while ((l3_package = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_L3CACHE, l3_package)) != nullptr ) { + hwloc_bitmap_andnot(lp_mask, lp_mask, l3_package->cpuset); + } + + if (hwloc_bitmap_iszero(lp_mask)) { + // All cores in the front mask have L3 cache, so no need to create a separate core type. + hwloc_bitmap_free(lp_mask); + } else { + hwloc_bitmap_andnot(front, front, lp_mask); + if (hwloc_bitmap_iszero(front)) { + // No cores with L3 cache in the front mask, so replace it with the L3-less cores. + hwloc_bitmap_free(front); + front = lp_mask; + } else { + // The front mask has SOME cores with L3 cache. + // Create a new least performant (L3-less) core type and add it to the front. + core_types_affinity_masks_list.insert(core_types_affinity_masks_list.begin(), lp_mask); + core_types_indexes_list.push_back(core_types_number++); + } + } + } #else /*!__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/ bool core_types_parsing_broken{true}; #endif /*__TBBBIND_HWLOC_HYBRID_CPUS_INTERFACES_PRESENT*/ diff --git a/test/common/common_arena_constraints.h b/test/common/common_arena_constraints.h index 22c0d05309..ab68d4807f 100644 --- a/test/common/common_arena_constraints.h +++ b/test/common/common_arena_constraints.h @@ -1,5 +1,6 @@ /* - Copyright (c) 2019-2024 Intel Corporation + Copyright (c) 2019-2025 Intel Corporation + Copyright (c) 2025 UXL Foundation Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -226,7 +227,6 @@ class system_info { "HWLOC cannot detect the number of cpukinds.(reference)"); core_types_parsing_broken = num_cpu_kinds == 0; - int current_efficiency = -1; cpu_kind_infos.resize(num_cpu_kinds); for (auto kind_index = 0; kind_index < num_cpu_kinds; ++kind_index) { auto& cki = cpu_kind_infos[kind_index]; @@ -238,13 +238,9 @@ class system_info { ); hwloc_require_ex( - hwloc_cpukinds_get_info, topology, kind_index, cki.cpuset, ¤t_efficiency, + hwloc_cpukinds_get_info, topology, kind_index, cki.cpuset, /*efficiency*/nullptr, /*nr_infos*/nullptr, /*infos*/nullptr, /*flags*/0 ); - if (current_efficiency < 0) { - core_types_parsing_broken = true; - break; - } hwloc_bitmap_and(cki.cpuset, cki.cpuset, process_cpuset); cki.index = hwloc_cpukinds_get_by_cpuset(topology, cki.cpuset, /*flags*/0); @@ -253,6 +249,43 @@ class system_info { cki.concurrency = hwloc_bitmap_weight(cki.cpuset); } + // On hybrid CPUs, check if there are cores without L3 cache. + if (!core_types_parsing_broken && num_cpu_kinds > 1) { + // The first core type mask (least performant cores) + auto& cki = cpu_kind_infos.front(); + hwloc_cpuset_t lp_mask = hwloc_bitmap_dup(cki.cpuset); + + // Iterate through all L3 cache objects and remove their cores from lp_mask. + hwloc_obj_t l3_package = nullptr; + while ((l3_package = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_L3CACHE, l3_package)) != nullptr ) { + hwloc_bitmap_andnot(lp_mask, lp_mask, l3_package->cpuset); + } + + if (hwloc_bitmap_iszero(lp_mask)) { + // All cores in the front mask have L3 cache, so no need to create a separate core type. + hwloc_bitmap_free(lp_mask); + } else { + hwloc_bitmap_andnot(cki.cpuset, cki.cpuset, lp_mask); + if (hwloc_bitmap_iszero(cki.cpuset)) { + // No cores with L3 cache in the front mask, so replace it with the L3-less cores. + hwloc_bitmap_free(cki.cpuset); + cki.cpuset = lp_mask; + } else { + // The front mask has SOME cores with L3 cache. + cki.concurrency = hwloc_bitmap_weight(cki.cpuset); + + // Create a new least performant (L3-less) core type and add it to the front. + auto& lp_cki = *cpu_kind_infos.emplace(cpu_kind_infos.begin()); + lp_cki.cpuset = lp_mask; + lp_cki.concurrency = hwloc_bitmap_weight(lp_cki.cpuset); + + // Increment all CPU kind indices after inserting a new one at beginning. + for (auto& i : cpu_kind_infos) { + i.index++; + } + } + } + } #endif /*__HYBRID_CPUS_TESTING*/ if (core_types_parsing_broken) {