Skip to content

Commit a508d8a

Browse files
authored
Merge pull request #4 from lipari/misc_fixes
Misc fixes in mpibind
2 parents 6938263 + b77b998 commit a508d8a

File tree

1 file changed

+15
-9
lines changed

1 file changed

+15
-9
lines changed

mpibind.c

+15-9
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ mpibind: Automatically assign CPU and GPU affinity using best-guess defaults.\n\
4444
\n\
4545
The default behavior attempts to bind MPI tasks to specific processing\n\
4646
units. If OMP_NUM_THREADS is set, each thread will be similarly bound\n\
47-
to a processing unit. MPI+OpenMP programs must set OMP_NUM_THREADS.\n\
47+
to a processing unit.\n\
4848
\n\
4949
Option Usage: --mpibind[=args...]\n\
5050
where args... is a period (.) separated list of one or more of the\n\
@@ -649,6 +649,7 @@ int slurm_spank_task_init (spank_t sp, int32_t ac, char **av)
649649
cpusets[j] = hwloc_bitmap_dup (obj->cpuset);
650650
} else {
651651
slurm_error ("mpibind: failed to get core %d", i);
652+
return (ESPANK_ERROR);
652653
}
653654
j++;
654655
}
@@ -675,6 +676,7 @@ int slurm_spank_task_init (spank_t sp, int32_t ac, char **av)
675676
} else {
676677
slurm_error ("mpibind: failed to get object %d at depth %d", i,
677678
depth);
679+
return (ESPANK_ERROR);
678680
}
679681
}
680682
}
@@ -700,21 +702,25 @@ int slurm_spank_task_init (spank_t sp, int32_t ac, char **av)
700702
for (obj = hwloc_get_next_osdev (topology, NULL); obj;
701703
obj = hwloc_get_next_osdev (topology, obj)) {
702704
if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_GPU) {
703-
hwloc_obj_t numaobj;
705+
hwloc_obj_t ancestor;
704706
#if HWLOC_API_VERSION < 0x00010b00
705-
numaobj = hwloc_get_ancestor_obj_by_type (topology,
707+
ancestor = hwloc_get_ancestor_obj_by_type (topology,
706708
HWLOC_OBJ_NODE, obj);
707709
#else
708-
numaobj = hwloc_get_ancestor_obj_by_type (topology,
710+
ancestor = hwloc_get_ancestor_obj_by_type (topology,
709711
HWLOC_OBJ_NUMANODE, obj);
710712
#endif
711-
if (numaobj) {
712-
gpusets[gpus] = hwloc_bitmap_dup (numaobj->cpuset);
713+
if (!ancestor)
714+
/* The parent of GPUs on KNL nodes may be the
715+
* machine instead of a NUMA node*/
716+
ancestor = hwloc_get_ancestor_obj_by_type (topology,
717+
HWLOC_OBJ_MACHINE, obj);
718+
if (ancestor) {
719+
gpusets[gpus] = hwloc_bitmap_dup (ancestor->cpuset);
713720
gpus++;
714721
} else {
715-
slurm_error ("mpibind: failed to get numa parent of NVIDIA "
716-
"obj");
717-
break;
722+
slurm_error ("mpibind: failed to find ancestor of GPU obj");
723+
return (ESPANK_ERROR);
718724
}
719725
}
720726
}

0 commit comments

Comments
 (0)