Skip to content

Commit 1268c51

Browse files
committed
config: add SVE detection alongside NEON in aarch64 op component
- Introduce AC_CACHE_CHECK probes for ARM Scalable Vector Extension (SVE) using both a default compile test and a second test with __attribute__((__target__("+sve"))). - Define variables op_cv_sve_support and op_cv_sve_add_flags - Update AM_CONDITIONAL and AC_DEFINE to expose SVE support macros (OMPI_MCA_OP_HAVE_SVE, OMPI_MCA_OP_SVE_EXTRA_FLAGS). - Extend final AS_IF to enable the component when either NEON or SVE is available. - Add a preprocessor guard around SVE-specific function attributes - Encapsulate the +sve attribute behind OMPI_MCA_OP_SVE_EXTRA_FLAGS, ensuring that only builds which detected and enabled compiler SVE support will compile with SVE-targeted code paths. - Simplifies later code by using SVE_ATTR in function declarations instead of repeating the attribute clause. - apply SVE_ATTR macro in C source for conditional +sve targeting Signed-off-by: Marco Vogel <[email protected]>
1 parent f6fe1d4 commit 1268c51

File tree

4 files changed

+68
-18
lines changed

4 files changed

+68
-18
lines changed

ompi/mca/op/aarch64/configure.m4

+59-17
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#
1414

1515
# MCA_ompi_op_arm_CONFIG([action-if-can-compile],
16-
# [action-if-cant-compile])
16+
# [action-if-cant-compile])
1717
# ------------------------------------------------
1818
AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
1919
AC_CONFIG_FILES([ompi/mca/op/aarch64/Makefile])
@@ -74,33 +74,70 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
7474
#
7575
# Check for SVE support
7676
#
77-
AC_CACHE_CHECK([for SVE support], op_cv_sve_support,
78-
[AS_IF([test "$op_cv_neon_support" = "yes"],
79-
[
80-
AC_LINK_IFELSE(
81-
[AC_LANG_PROGRAM([[
77+
AC_CACHE_CHECK([for SVE support], [op_cv_sve_support], [
78+
AC_MSG_RESULT([])
79+
# initialize result variables
80+
op_cv_sve_support=no
81+
op_cv_sve_add_flags=no
82+
83+
# first attempt: no extra flags
84+
AC_MSG_CHECKING([for SVE support (no additional flags)])
85+
AC_LINK_IFELSE(
86+
[AC_LANG_SOURCE([[
8287
#if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
8388
#include <arm_sve.h>
8489
#else
8590
#error "No support for __aarch64__ or SVE"
8691
#endif
87-
]],
88-
[[
89-
#if defined(__aarch64__) && defined(_ARM_FEATURE_SVE)
90-
svfloat32_t vA;
91-
vA = svdup_n_f32(0)
92+
93+
int main(void) {
94+
svfloat32_t vA;
95+
vA = svdup_n_f32(0);
96+
return 0;
97+
}
98+
]])],
99+
[ op_cv_sve_support=yes
100+
AC_MSG_RESULT([yes]) ],
101+
[ AC_MSG_RESULT([no ]) ]
102+
)
103+
104+
# second attempt: use +sve attribute
105+
AS_IF([test "$op_cv_sve_support" = "no"],[
106+
AC_MSG_CHECKING([for SVE support (with +sve)])
107+
AC_LINK_IFELSE(
108+
[AC_LANG_SOURCE([[
109+
#if defined(__aarch64__)
110+
#include <arm_sve.h>
111+
#else
112+
#error "not on aarch64"
92113
#endif
93-
]])],
94-
[op_cv_sve_support=yes],
95-
[op_cv_sve_support=no])])])
96-
])
97114

115+
__attribute__((__target__("+sve")))
116+
int main(void) {
117+
svbool_t pg = svptrue_b32();
118+
svuint32_t a = svdup_u32(0);
119+
svuint32_t b = svdup_u32(0);
120+
svuint32_t c = svadd_u32_m(pg, a, b);
121+
return (int)svaddv_u32(pg, c);
122+
}
123+
]])],
124+
[ op_cv_sve_support=yes
125+
op_cv_sve_add_flags=yes
126+
AC_MSG_RESULT([yes]) ],
127+
[ AC_MSG_RESULT([no ]) ]
128+
)
129+
])
130+
])
131+
132+
AC_LANG_POP
133+
])
98134
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_support],
99135
[test "$op_cv_neon_support" = "yes"])
100136
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_neon_fp_support],
101137
[test "$op_cv_neon_fp_support" = "yes"])
102138
AM_CONDITIONAL([MCA_BUILD_ompi_op_has_sve_support],
103139
[test "$op_cv_sve_support" = "yes"])
140+
104141
AC_SUBST(MCA_BUILD_ompi_op_has_neon_support)
105142
AC_SUBST(MCA_BUILD_ompi_op_has_neon_fp_support)
106143
AC_SUBST(MCA_BUILD_ompi_op_has_sve_support)
@@ -111,9 +148,14 @@ AC_DEFUN([MCA_ompi_op_aarch64_CONFIG],[
111148
[AC_DEFINE([OMPI_MCA_OP_HAVE_NEON_FP], [1],[NEON FP supported in the current build])])
112149
AS_IF([test "$op_cv_sve_support" = "yes"],
113150
[AC_DEFINE([OMPI_MCA_OP_HAVE_SVE], [1],[SVE supported in the current build])])
151+
AS_IF([test "$op_cv_sve_add_flags" = "yes"],
152+
[AC_DEFINE([OMPI_MCA_OP_SVE_EXTRA_FLAGS], [1],[SVE supported with additional compile attributes])],
153+
[AC_DEFINE([OMPI_MCA_OP_SVE_EXTRA_FLAGS], [0],[SVE not supported])])
114154

115-
# If we have at least support for Neon
116-
AS_IF([test "$op_cv_neon_support" = "yes"],
155+
156+
157+
# If we have at least support for Neon or SVE
158+
AS_IF([test "$op_cv_neon_support" = "yes" || test "$op_cv_sve_support" = "yes" ],
117159
[$1],
118160
[$2])
119161
])dnl

ompi/mca/op/aarch64/op_aarch64.h

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424

2525
BEGIN_C_DECLS
2626

27+
#if OMPI_MCA_OP_SVE_EXTRA_FLAGS
28+
#define SVE_ATTR __attribute__ ((__target__ ("+sve")))
29+
#else
30+
#define SVE_ATTR
31+
#endif
32+
2733
/**
2834
* Derive a struct from the base op component struct, allowing us to
2935
* cache some component-specific information on our well-known

ompi/mca/op/aarch64/op_aarch64_component.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static int mca_op_aarch64_component_close(void)
101101
/*
102102
* Register MCA params.
103103
*/
104-
static int mca_op_aarch64_component_register(void)
104+
SVE_ATTR static int mca_op_aarch64_component_register(void)
105105
{
106106

107107
mca_op_aarch64_component.hardware_available = 1; /* Check for Neon */

ompi/mca/op/aarch64/op_aarch64_functions.c

+2
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ _Generic((*(out)), \
137137
}
138138
#elif defined(GENERATE_SVE_CODE)
139139
#define OP_AARCH64_FUNC(name, type_name, type_size, type_cnt, type, op) \
140+
SVE_ATTR \
140141
static void OP_CONCAT(ompi_op_aarch64_2buff_##name##_##type##type_size##_t, APPEND) \
141142
(const void *_in, void *_out, int *count, \
142143
struct ompi_datatype_t **dtype, \
@@ -303,6 +304,7 @@ static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPE
303304
}
304305
#elif defined(GENERATE_SVE_CODE)
305306
#define OP_AARCH64_FUNC_3BUFF(name, type_name, type_size, type_cnt, type, op) \
307+
SVE_ATTR \
306308
static void OP_CONCAT(ompi_op_aarch64_3buff_##name##_##type##type_size##_t, APPEND) \
307309
(const void *_in1, const void *_in2, void *_out, int *count, \
308310
struct ompi_datatype_t **dtype, \

0 commit comments

Comments
 (0)