Skip to content

Commit 6938263

Browse files
authored
Merge pull request #3 from lipari/hwloc_version_upgrade
Hwloc version upgrade
2 parents 6e8e661 + 6bd2644 commit 6938263

File tree

1 file changed

+33
-10
lines changed

1 file changed

+33
-10
lines changed

mpibind.c

+33-10
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,14 @@ static char *get_cuda_str (int32_t gpus, uint32_t gpu_bits)
509509
int slurm_spank_init (spank_t sp, int ac, char **av)
510510
{
511511
int i;
512+
uint32_t hwloc_version = hwloc_get_api_version ();
513+
514+
if (hwloc_version != HWLOC_API_VERSION) {
515+
if (verbose)
516+
slurm_error ("mpibind plugin written for hwloc API 0x%x but running"
517+
"with hwloc library 0x%x", HWLOC_API_VERSION,
518+
hwloc_version);
519+
}
512520

513521
if (!spank_remote (sp))
514522
return (0);
@@ -671,29 +679,35 @@ int slurm_spank_task_init (spank_t sp, int32_t ac, char **av)
671679
}
672680
}
673681

674-
for(obj = hwloc_get_next_osdev (topology, NULL); obj;
675-
obj = hwloc_get_next_osdev (topology, obj)) {
682+
for (obj = hwloc_get_next_osdev (topology, NULL); obj;
683+
obj = hwloc_get_next_osdev (topology, obj)) {
676684
if (!strncmp (obj->name, "ib0", 3)) {
677685
/* NIC Affinity support goes here */
678686
}
679687
}
680688

681689
/* count the GPUS */
682-
for(obj = hwloc_get_next_pcidev (topology, NULL); obj;
683-
obj = hwloc_get_next_pcidev (topology, obj)) {
684-
if (!strncmp (obj->name, "NVIDIA", 6)) {
690+
for (obj = hwloc_get_next_osdev (topology, NULL); obj;
691+
obj = hwloc_get_next_osdev (topology, obj)) {
692+
if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_GPU) {
685693
gpus++;
686694
}
687695
}
688696

689697
if (gpus) {
690698
gpusets = calloc (gpus, sizeof (hwloc_cpuset_t));
691699
gpus = 0;
692-
for(obj = hwloc_get_next_pcidev (topology, NULL); obj;
693-
obj = hwloc_get_next_pcidev (topology, obj)) {
694-
if (!strncmp (obj->name, "NVIDIA", 6)) {
695-
hwloc_obj_t numaobj = hwloc_get_ancestor_obj_by_type (topology,
696-
HWLOC_OBJ_NODE, obj);
700+
for (obj = hwloc_get_next_osdev (topology, NULL); obj;
701+
obj = hwloc_get_next_osdev (topology, obj)) {
702+
if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_GPU) {
703+
hwloc_obj_t numaobj;
704+
#if HWLOC_API_VERSION < 0x00010b00
705+
numaobj = hwloc_get_ancestor_obj_by_type (topology,
706+
HWLOC_OBJ_NODE, obj);
707+
#else
708+
numaobj = hwloc_get_ancestor_obj_by_type (topology,
709+
HWLOC_OBJ_NUMANODE, obj);
710+
#endif
697711
if (numaobj) {
698712
gpusets[gpus] = hwloc_bitmap_dup (numaobj->cpuset);
699713
gpus++;
@@ -704,7 +718,11 @@ int slurm_spank_task_init (spank_t sp, int32_t ac, char **av)
704718
}
705719
}
706720
}
721+
#if HWLOC_API_VERSION < 0x00010b00
707722
numaobjs = hwloc_get_nbobjs_by_type (topology, HWLOC_OBJ_NODE);
723+
#else
724+
numaobjs = hwloc_get_nbobjs_by_type (topology, HWLOC_OBJ_NUMANODE);
725+
#endif
708726
decimate_gpusets (gpusets, numaobjs, gpus);
709727
}
710728

@@ -767,8 +785,13 @@ int slurm_spank_task_init (spank_t sp, int32_t ac, char **av)
767785

768786
if (verbose) {
769787
/* An MPI task with threads should not span more than one NUMA domain */
788+
#if HWLOC_API_VERSION < 0x00010b00
770789
numaobjs = hwloc_get_nbobjs_inside_cpuset_by_type (topology, cpuset,
771790
HWLOC_OBJ_NODE);
791+
#else
792+
numaobjs = hwloc_get_nbobjs_inside_cpuset_by_type (topology, cpuset,
793+
HWLOC_OBJ_NUMANODE);
794+
#endif
772795
if ((local_size < numaobjs) && (num_threads > 1)) {
773796
slurm_error ("mpibind: rank %d spans %d NUMA domains",
774797
local_rank, numaobjs);

0 commit comments

Comments
 (0)