Skip to content

Commit 9542577

Browse files
authored
Add feature detection for ARM/MacOS (#41924)
1 parent cc345f6 commit 9542577

File tree

1 file changed

+59
-19
lines changed

1 file changed

+59
-19
lines changed

src/processor_arm.cpp

+59-19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
# undef USE_DYN_GETAUXVAL
2020
# include <sys/auxv.h>
2121
# endif
22+
#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_
23+
#include <sys/sysctl.h>
24+
#include <string.h>
2225
#endif
2326

2427
namespace ARM {
@@ -160,6 +163,8 @@ enum class CPU : uint32_t {
160163
apple_a11,
161164
apple_a12,
162165
apple_a13,
166+
apple_a14,
167+
apple_m1,
163168
apple_s4,
164169
apple_s5,
165170

@@ -240,6 +245,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
240245
constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
241246
constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
242247
constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
248+
constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2);
243249
constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
244250

245251
// For ARM cores, the features required can be found in the technical reference manual
@@ -342,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
342348
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
343349
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
344350
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
351+
constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
352+
constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
353+
// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
354+
// and sysctl -a hw.optional
345355
constexpr auto apple_s4 = apple_a12;
346356
constexpr auto apple_s5 = apple_a12;
347357

@@ -420,6 +430,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
420430
{"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11},
421431
{"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
422432
{"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
433+
{"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
434+
{"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
423435
{"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
424436
{"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
425437
{"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
@@ -662,13 +674,47 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
662674
{"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2},
663675
{"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3},
664676
{"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4},
665-
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000,
666-
Feature::samsung_exynos_m5},
677+
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5},
667678
{"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7},
668679
};
669680
#endif
670681
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
671682

683+
static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
684+
{
685+
return ::find_cpu(cpu, cpus, ncpu_names);
686+
}
687+
688+
static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
689+
{
690+
return ::find_cpu(name, cpus, ncpu_names);
691+
}
692+
693+
static inline const char *find_cpu_name(uint32_t cpu)
694+
{
695+
return ::find_cpu_name(cpu, cpus, ncpu_names);
696+
}
697+
698+
#if defined _CPU_AARCH64_ && defined _OS_DARWIN_
699+
700+
static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
701+
{
702+
char buffer[128];
703+
size_t bufferlen = 128;
704+
sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);
705+
706+
if(strcmp(buffer,"Apple M1") == 0)
707+
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
708+
else if(strcmp(buffer,"Apple M1 Max") == 0)
709+
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
710+
else if(strcmp(buffer,"Apple M1 Pro") == 0)
711+
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
712+
else
713+
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
714+
}
715+
716+
#else
717+
672718
// auxval reader
673719

674720
#ifndef AT_HWCAP
@@ -974,7 +1020,7 @@ static CPU get_cpu_name(CPUID cpuid)
9741020
default: return CPU::generic;
9751021
}
9761022
case 0x61: // 'a': Apple
977-
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
1023+
// https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
9781024
switch (cpuid.part) {
9791025
case 0x0: // Swift
9801026
return CPU::apple_swift;
@@ -1002,6 +1048,12 @@ static CPU get_cpu_name(CPUID cpuid)
10021048
case 0x12: // Lightning
10031049
case 0x13: // Thunder
10041050
return CPU::apple_a13;
1051+
case 0x20: // Icestorm
1052+
case 0x21: // Firestorm
1053+
return CPU::apple_a14;
1054+
case 0x22: // Icestorm m1
1055+
case 0x23: // Firestorm m1
1056+
return CPU::apple_m1;
10051057
default: return CPU::generic;
10061058
}
10071059
case 0x68: // 'h': Huaxintong Semiconductor
@@ -1019,6 +1071,9 @@ static CPU get_cpu_name(CPUID cpuid)
10191071
}
10201072
}
10211073

1074+
1075+
1076+
10221077
namespace {
10231078

10241079
struct arm_arch {
@@ -1062,21 +1117,6 @@ static arm_arch get_elf_arch(void)
10621117
#endif
10631118
}
10641119

1065-
static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
1066-
{
1067-
return ::find_cpu(cpu, cpus, ncpu_names);
1068-
}
1069-
1070-
static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
1071-
{
1072-
return ::find_cpu(name, cpus, ncpu_names);
1073-
}
1074-
1075-
static inline const char *find_cpu_name(uint32_t cpu)
1076-
{
1077-
return ::find_cpu_name(cpu, cpus, ncpu_names);
1078-
}
1079-
10801120
static arm_arch feature_arch_version(const FeatureList<feature_sz> &feature)
10811121
{
10821122
#ifdef _CPU_AARCH64_
@@ -1303,9 +1343,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
13031343
}
13041344
// Ignore feature bits that we are not interested in.
13051345
mask_features(feature_masks, &features[0]);
1306-
13071346
return std::make_pair(cpu, features);
13081347
}
1348+
#endif
13091349

13101350
static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu()
13111351
{

0 commit comments

Comments
 (0)