Skip to content

Commit f770beb

Browse files
derek-gerstmannDerek Gerstmann
and
Derek Gerstmann
authored
[bug] Fix halide_get_cpu_features() linkage to avoid name mangling issues (#8573)
* Move halide_get_cpu_features() into extern "C" linkage to avoid name mangling issues. Change CpuFeatures to POD struct passed by pointer. Update all backend cpu feature implementations. * Clang format pass * Push halide_get_cpu_features back into the Halide::Runtime::Internal namespace. * Clang format pass * Abort if halide_get_cpu_features fails (since we can't return a proper error code from can_use_target_features). --------- Co-authored-by: Derek Gerstmann <[email protected]>
1 parent 4815121 commit f770beb

9 files changed

+121
-128
lines changed

src/runtime/aarch64_cpu_features.cpp

+22-23
Original file line numberDiff line numberDiff line change
@@ -21,24 +21,24 @@ extern "C" unsigned long getauxval(unsigned long type);
2121

2222
namespace {
2323

24-
void set_platform_features(CpuFeatures &features) {
24+
void set_platform_features(CpuFeatures *features) {
2525
unsigned long hwcaps = getauxval(AT_HWCAP);
2626
unsigned long hwcaps2 = getauxval(AT_HWCAP2);
2727

2828
if (hwcaps & HWCAP_ASIMDDP) {
29-
features.set_available(halide_target_feature_arm_dot_prod);
29+
halide_set_available_cpu_feature(features, halide_target_feature_arm_dot_prod);
3030
}
3131

3232
if (hwcaps & HWCAP_ASIMDHP) {
33-
features.set_available(halide_target_feature_arm_fp16);
33+
halide_set_available_cpu_feature(features, halide_target_feature_arm_fp16);
3434
}
3535

3636
if (hwcaps & HWCAP_SVE) {
37-
features.set_available(halide_target_feature_sve);
37+
halide_set_available_cpu_feature(features, halide_target_feature_sve);
3838
}
3939

4040
if (hwcaps2 & HWCAP2_SVE2) {
41-
features.set_available(halide_target_feature_sve2);
41+
halide_set_available_cpu_feature(features, halide_target_feature_sve2);
4242
}
4343
}
4444

@@ -56,13 +56,13 @@ bool sysctl_is_set(const char *name) {
5656
return sysctlbyname(name, &enabled, &enabled_len, nullptr, 0) == 0 && enabled;
5757
}
5858

59-
void set_platform_features(CpuFeatures &features) {
59+
void set_platform_features(CpuFeatures *features) {
6060
if (sysctl_is_set("hw.optional.arm.FEAT_DotProd")) {
61-
features.set_available(halide_target_feature_arm_dot_prod);
61+
halide_set_available_cpu_feature(features, halide_target_feature_arm_dot_prod);
6262
}
6363

6464
if (sysctl_is_set("hw.optional.arm.FEAT_FP16")) {
65-
features.set_available(halide_target_feature_arm_fp16);
65+
halide_set_available_cpu_feature(features, halide_target_feature_arm_fp16);
6666
}
6767
}
6868

@@ -84,20 +84,20 @@ extern "C" BOOL IsProcessorFeaturePresent(DWORD feature);
8484

8585
namespace {
8686

87-
void set_platform_features(CpuFeatures &features) {
87+
void set_platform_features(CpuFeatures *features) {
8888
// This is the strategy used by Google's cpuinfo library for
8989
// detecting fp16 arithmetic support on Windows.
9090
if (!IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED) &&
9191
IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE)) {
92-
features.set_available(halide_target_feature_arm_fp16);
92+
halide_set_available_cpu_feature(features, halide_target_feature_arm_fp16);
9393
}
9494

9595
if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) {
96-
features.set_available(halide_target_feature_arm_dot_prod);
96+
halide_set_available_cpu_feature(features, halide_target_feature_arm_dot_prod);
9797
}
9898

9999
if (IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE)) {
100-
features.set_available(halide_target_feature_sve);
100+
halide_set_available_cpu_feature(features, halide_target_feature_sve);
101101
}
102102
}
103103

@@ -107,28 +107,27 @@ void set_platform_features(CpuFeatures &features) {
107107

108108
namespace {
109109

110-
void set_platform_features(CpuFeatures &) {
110+
void set_platform_features(CpuFeatures *) {
111111
}
112112

113113
} // namespace
114114

115115
#endif
116116

117-
WEAK CpuFeatures halide_get_cpu_features() {
118-
CpuFeatures features;
119-
features.set_known(halide_target_feature_arm_dot_prod);
120-
features.set_known(halide_target_feature_arm_fp16);
121-
features.set_known(halide_target_feature_armv7s);
122-
features.set_known(halide_target_feature_no_neon);
123-
features.set_known(halide_target_feature_sve);
124-
features.set_known(halide_target_feature_sve2);
117+
extern "C" WEAK int halide_get_cpu_features(CpuFeatures *features) {
118+
halide_set_known_cpu_feature(features, halide_target_feature_arm_dot_prod);
119+
halide_set_known_cpu_feature(features, halide_target_feature_arm_fp16);
120+
halide_set_known_cpu_feature(features, halide_target_feature_armv7s);
121+
halide_set_known_cpu_feature(features, halide_target_feature_no_neon);
122+
halide_set_known_cpu_feature(features, halide_target_feature_sve);
123+
halide_set_known_cpu_feature(features, halide_target_feature_sve2);
125124

126125
// All ARM architectures support "No Neon".
127-
features.set_available(halide_target_feature_no_neon);
126+
halide_set_available_cpu_feature(features, halide_target_feature_no_neon);
128127

129128
set_platform_features(features);
130129

131-
return features;
130+
return halide_error_code_success;
132131
}
133132

134133
} // namespace Internal

src/runtime/arm_cpu_features.cpp

+17-18
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ extern "C" unsigned long getauxval(unsigned long type);
1818

1919
namespace {
2020

21-
void set_platform_features(CpuFeatures &features) {
21+
void set_platform_features(CpuFeatures *features) {
2222
unsigned long hwcaps = getauxval(AT_HWCAP);
2323

2424
if (hwcaps & HWCAP_ASIMDDP) {
25-
features.set_available(halide_target_feature_arm_dot_prod);
25+
halide_set_available_cpu_feature(features, halide_target_feature_arm_dot_prod);
2626
}
2727

2828
if (hwcaps & HWCAP_ASIMDHP) {
29-
features.set_available(halide_target_feature_arm_fp16);
29+
halide_set_available_cpu_feature(features, halide_target_feature_arm_fp16);
3030
}
3131
}
3232

@@ -68,17 +68,17 @@ bool is_armv7s() {
6868
return type == CPU_TYPE_ARM && subtype == CPU_SUBTYPE_ARM_V7S;
6969
}
7070

71-
void set_platform_features(CpuFeatures &features) {
71+
void set_platform_features(CpuFeatures *features) {
7272
if (is_armv7s()) {
73-
features.set_available(halide_target_feature_armv7s);
73+
halide_set_available_cpu_feature(features, halide_target_feature_armv7s);
7474
}
7575

7676
if (sysctl_is_set("hw.optional.arm.FEAT_DotProd")) {
77-
features.set_available(halide_target_feature_arm_dot_prod);
77+
halide_set_available_cpu_feature(features, halide_target_feature_arm_dot_prod);
7878
}
7979

8080
if (sysctl_is_set("hw.optional.arm.FEAT_FP16")) {
81-
features.set_available(halide_target_feature_arm_fp16);
81+
halide_set_available_cpu_feature(features, halide_target_feature_arm_fp16);
8282
}
8383
}
8484

@@ -88,28 +88,27 @@ void set_platform_features(CpuFeatures &features) {
8888

8989
namespace {
9090

91-
void set_platform_features(CpuFeatures &) {
91+
void set_platform_features(CpuFeatures *) {
9292
}
9393

9494
} // namespace
9595

9696
#endif
9797

98-
WEAK CpuFeatures halide_get_cpu_features() {
99-
CpuFeatures features;
100-
features.set_known(halide_target_feature_arm_dot_prod);
101-
features.set_known(halide_target_feature_arm_fp16);
102-
features.set_known(halide_target_feature_armv7s);
103-
features.set_known(halide_target_feature_no_neon);
104-
features.set_known(halide_target_feature_sve);
105-
features.set_known(halide_target_feature_sve2);
98+
extern "C" WEAK int halide_get_cpu_features(CpuFeatures *features) {
99+
halide_set_known_cpu_feature(features, halide_target_feature_arm_dot_prod);
100+
halide_set_known_cpu_feature(features, halide_target_feature_arm_fp16);
101+
halide_set_known_cpu_feature(features, halide_target_feature_armv7s);
102+
halide_set_known_cpu_feature(features, halide_target_feature_no_neon);
103+
halide_set_known_cpu_feature(features, halide_target_feature_sve);
104+
halide_set_known_cpu_feature(features, halide_target_feature_sve2);
106105

107106
// All ARM architectures support "No Neon".
108-
features.set_available(halide_target_feature_no_neon);
107+
halide_set_available_cpu_feature(features, halide_target_feature_no_neon);
109108

110109
set_platform_features(features);
111110

112-
return features;
111+
return halide_error_code_success;
113112
}
114113

115114
} // namespace Internal

src/runtime/can_use_target.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -40,21 +40,23 @@ WEAK int halide_default_can_use_target_features(int count, const uint64_t *featu
4040

4141
static_assert(sizeof(halide_cpu_features_storage) == sizeof(CpuFeatures), "CpuFeatures Mismatch");
4242
if (!halide_cpu_features_initialized) {
43-
CpuFeatures tmp = halide_get_cpu_features();
43+
CpuFeatures tmp;
44+
int error = halide_get_cpu_features(&tmp);
45+
halide_abort_if_false(nullptr, error == halide_error_code_success);
4446
memcpy(&halide_cpu_features_storage, &tmp, sizeof(tmp));
4547
halide_cpu_features_initialized = true;
4648
}
4749
}
4850

49-
if (count != CpuFeatures::kWordCount) {
51+
if (count != cpu_feature_mask_size) {
5052
// This should not happen unless our runtime is out of sync with the rest of libHalide.
5153
#ifdef DEBUG_RUNTIME
52-
debug(nullptr) << "count " << count << " CpuFeatures::kWordCount " << CpuFeatures::kWordCount << "\n";
54+
debug(nullptr) << "count " << count << " cpu_feature_mask_size " << cpu_feature_mask_size << "\n";
5355
#endif
5456
halide_error(nullptr, "Internal error: wrong structure size passed to halide_can_use_target_features()\n");
5557
}
5658
const CpuFeatures *cpu_features = reinterpret_cast<const CpuFeatures *>(&halide_cpu_features_storage[0]);
57-
for (int i = 0; i < CpuFeatures::kWordCount; ++i) {
59+
for (int i = 0; i < cpu_feature_mask_size; ++i) {
5860
uint64_t m;
5961
if ((m = (features[i] & cpu_features->known[i])) != 0) {
6062
if ((m & cpu_features->available[i]) != m) {

src/runtime/cpu_features.h

+26-31
Original file line numberDiff line numberDiff line change
@@ -8,42 +8,37 @@ namespace Halide {
88
namespace Runtime {
99
namespace Internal {
1010

11-
// Return two masks:
11+
// Size of CPU feature mask large enough to cover all Halide target features
12+
static constexpr int cpu_feature_mask_size = (halide_target_feature_end + 63) / (sizeof(uint64_t) * 8);
13+
14+
// Contains two masks:
1215
// One with all the CPU-specific features that might possible be available on this architecture ('known'),
1316
// and one with the subset that are actually present ('available').
1417
struct CpuFeatures {
15-
static const int kWordCount = (halide_target_feature_end + 63) / (sizeof(uint64_t) * 8);
16-
17-
ALWAYS_INLINE void set_known(int i) {
18-
known[i >> 6] |= ((uint64_t)1) << (i & 63);
19-
}
20-
21-
ALWAYS_INLINE void set_available(int i) {
22-
available[i >> 6] |= ((uint64_t)1) << (i & 63);
23-
}
24-
25-
ALWAYS_INLINE bool test_known(int i) const {
26-
return (known[i >> 6] & ((uint64_t)1) << (i & 63)) != 0;
27-
}
28-
29-
ALWAYS_INLINE bool test_available(int i) const {
30-
return (available[i >> 6] & ((uint64_t)1) << (i & 63)) != 0;
31-
}
32-
33-
ALWAYS_INLINE
34-
CpuFeatures() {
35-
for (int i = 0; i < kWordCount; ++i) {
36-
known[i] = 0;
37-
available[i] = 0;
38-
}
39-
}
40-
41-
uint64_t known[kWordCount]; // mask of the CPU features we know how to detect
42-
uint64_t available[kWordCount]; // mask of the CPU features that are available
43-
// (always a subset of 'known')
18+
uint64_t known[cpu_feature_mask_size] = {0}; // mask of the CPU features we know how to detect
19+
uint64_t available[cpu_feature_mask_size] = {0}; // mask of the CPU features that are available
20+
// (always a subset of 'known')
4421
};
4522

46-
extern WEAK CpuFeatures halide_get_cpu_features();
23+
ALWAYS_INLINE void halide_set_known_cpu_feature(CpuFeatures *features, int i) {
24+
features->known[i >> 6] |= ((uint64_t)1) << (i & 63);
25+
}
26+
27+
ALWAYS_INLINE void halide_set_available_cpu_feature(CpuFeatures *features, int i) {
28+
features->available[i >> 6] |= ((uint64_t)1) << (i & 63);
29+
}
30+
31+
ALWAYS_INLINE bool halide_test_known_cpu_feature(CpuFeatures *features, int i) {
32+
return (features->known[i >> 6] & ((uint64_t)1) << (i & 63)) != 0;
33+
}
34+
35+
ALWAYS_INLINE bool halide_test_available_cpu_feature(CpuFeatures *features, int i) {
36+
return (features->available[i >> 6] & ((uint64_t)1) << (i & 63)) != 0;
37+
}
38+
39+
// NOTE: This method is not part of the public API, but we push it into extern "C" to
40+
// avoid name mangling mismatches between platforms. See: https://github.com/halide/Halide/issues/8565
41+
extern "C" WEAK int halide_get_cpu_features(CpuFeatures *features);
4742

4843
} // namespace Internal
4944
} // namespace Runtime

src/runtime/hexagon_cpu_features.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ namespace Halide {
55
namespace Runtime {
66
namespace Internal {
77

8-
WEAK CpuFeatures halide_get_cpu_features() {
8+
extern "C" WEAK int halide_get_cpu_features(CpuFeatures *features) {
99
// Hexagon has no CPU-specific Features.
10-
return CpuFeatures();
10+
return halide_error_code_success;
1111
}
1212

1313
} // namespace Internal

src/runtime/powerpc_cpu_features.cpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -8,27 +8,29 @@
88

99
#define PPC_FEATURE2_ARCH_2_07 0x80000000
1010

11-
extern "C" unsigned long int getauxval(unsigned long int);
11+
extern "C" {
12+
13+
unsigned long int getauxval(unsigned long int);
14+
}
1215

1316
namespace Halide {
1417
namespace Runtime {
1518
namespace Internal {
1619

17-
WEAK CpuFeatures halide_get_cpu_features() {
18-
CpuFeatures features;
19-
features.set_known(halide_target_feature_vsx);
20-
features.set_known(halide_target_feature_power_arch_2_07);
20+
extern "C" WEAK int halide_get_cpu_features(CpuFeatures *features) {
21+
halide_set_known_cpu_feature(features, halide_target_feature_vsx);
22+
halide_set_known_cpu_feature(features, halide_target_feature_power_arch_2_07);
2123

2224
const unsigned long hwcap = getauxval(AT_HWCAP);
2325
const unsigned long hwcap2 = getauxval(AT_HWCAP2);
2426

2527
if (hwcap & PPC_FEATURE_HAS_VSX) {
26-
features.set_available(halide_target_feature_vsx);
28+
halide_set_available_cpu_feature(features, halide_target_feature_vsx);
2729
}
2830
if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
29-
features.set_available(halide_target_feature_power_arch_2_07);
31+
halide_set_available_cpu_feature(features, halide_target_feature_power_arch_2_07);
3032
}
31-
return features;
33+
return halide_error_code_success;
3234
}
3335

3436
} // namespace Internal

src/runtime/riscv_cpu_features.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ namespace Halide {
55
namespace Runtime {
66
namespace Internal {
77

8-
WEAK CpuFeatures halide_get_cpu_features() {
8+
extern "C" WEAK int halide_get_cpu_features(Halide::Runtime::Internal::CpuFeatures *features) {
99
// For now, no version specific features, though RISCV promises to have many.
10-
return CpuFeatures();
10+
return halide_error_code_success;
1111
}
1212

1313
} // namespace Internal

src/runtime/wasm_cpu_features.cpp

+4-7
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@ namespace Halide {
55
namespace Runtime {
66
namespace Internal {
77

8-
WEAK CpuFeatures halide_get_cpu_features() {
9-
CpuFeatures features;
10-
8+
extern "C" WEAK int halide_get_cpu_features(CpuFeatures *features) {
119
// There isn't a way to determine what features are available --
1210
// if a feature we need isn't available, we couldn't
1311
// even load. So just declare that all wasm-related features are
1412
// known and available.
15-
features.set_known(halide_target_feature_wasm_simd128);
16-
features.set_available(halide_target_feature_wasm_simd128);
17-
18-
return features;
13+
halide_set_known_cpu_feature(features, halide_target_feature_wasm_simd128);
14+
halide_set_available_cpu_feature(features, halide_target_feature_wasm_simd128);
15+
return halide_error_code_success;
1916
}
2017

2118
} // namespace Internal

0 commit comments

Comments
 (0)