[compiler-rt] [compiler-rt][AArch64][FMV] Use the hw.optional.arm.caps fast path (PR #95275)
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 10:39:26 PDT 2024
https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/95275
>From 9235a34b9110f881a52ef05c937f1b869fa9d71c Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 12 Jun 2024 10:29:50 -0700
Subject: [PATCH 1/2] [compiler-rt][AArch64][FMV] Use the hw.optional.arm.caps
fast path when available.
MacOS 15.0 and iOS 18.0 added a new sysctl to fetch a bitvector of all the
hw.optional.arm.FEAT_*'s in one go. Using this has a perf advantage over doing
multiple round-trips to the kernel and back, but since it's not present in
older oses, we still need the slow fallback.
---
.../builtins/cpu_model/aarch64/fmv/apple.inc | 62 +++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 6fef109567b61..6c6a71eed49d0 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -2,6 +2,11 @@
#if TARGET_OS_OSX || TARGET_OS_IPHONE
#include <sys/sysctl.h>
+#if __has_include(<arm/cpu_capabilities_public.h>)
+# include <arm/cpu_capabilities_public.h>
+# define HAS_CPU_CAPABILITIES_PUBLIC_H 1
+#endif
+
static bool isKnownAndSupported(const char *name) {
int32_t val = 0;
size_t size = sizeof(val);
@@ -21,6 +26,63 @@ void __init_cpu_features_resolver(void) {
uint64_t features = 0;
+#ifdef HAS_CPU_CAPABILITIES_PUBLIC_H
+ uint8_t feats_bitvec[(CAP_BIT_NB + 7) / 8] = {0};
+ size_t len = sizeof(feats_bitvec);
+ // When hw.optional.arm.feats is available (macOS 15.0+, iOS 18.0+), use the
+ // fast path to get all the feature bits, otherwise fall back to the slow
+ // ~20-something sysctls path.
+ if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) {
+
+ #define CHECK_BIT(FROM, TO) do { \
+ _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE); \
+ if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \
+ features |= (1ULL << TO); \
+ } \
+ } while(0)
+
+ CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM);
+ CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2);
+ CHECK_BIT(CAP_BIT_FEAT_FHM, FEAT_FP16FML);
+ CHECK_BIT(CAP_BIT_FEAT_DotProd, FEAT_DOTPROD);
+ CHECK_BIT(CAP_BIT_FEAT_SHA3, FEAT_SHA3);
+ CHECK_BIT(CAP_BIT_FEAT_RDM, FEAT_RDM);
+ CHECK_BIT(CAP_BIT_FEAT_LSE, FEAT_LSE);
+ CHECK_BIT(CAP_BIT_FEAT_SHA256, FEAT_SHA2);
+ CHECK_BIT(CAP_BIT_FEAT_SHA1, FEAT_SHA1);
+ CHECK_BIT(CAP_BIT_FEAT_AES, FEAT_AES);
+ CHECK_BIT(CAP_BIT_FEAT_PMULL, FEAT_PMULL);
+ CHECK_BIT(CAP_BIT_FEAT_SPECRES, FEAT_PREDRES);
+ CHECK_BIT(CAP_BIT_FEAT_SB, FEAT_SB);
+ CHECK_BIT(CAP_BIT_FEAT_FRINTTS, FEAT_FRINTTS);
+ CHECK_BIT(CAP_BIT_FEAT_LRCPC, FEAT_RCPC);
+ CHECK_BIT(CAP_BIT_FEAT_LRCPC2, FEAT_RCPC2);
+ CHECK_BIT(CAP_BIT_FEAT_FCMA, FEAT_FCMA);
+ CHECK_BIT(CAP_BIT_FEAT_JSCVT, FEAT_JSCVT);
+ CHECK_BIT(CAP_BIT_FEAT_DPB, FEAT_DPB);
+ CHECK_BIT(CAP_BIT_FEAT_DPB2, FEAT_DPB2);
+ CHECK_BIT(CAP_BIT_FEAT_BF16, FEAT_BF16);
+ CHECK_BIT(CAP_BIT_FEAT_I8MM, FEAT_I8MM);
+ CHECK_BIT(CAP_BIT_FEAT_DIT, FEAT_DIT);
+ CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16);
+ CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2);
+ CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI);
+ features |= (1ULL << FEAT_FP);
+ CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD);
+ CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC);
+ CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME);
+ CHECK_BIT(CAP_BIT_FEAT_SME2, FEAT_SME2);
+ CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64);
+ CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64);
+
+ features |= (1ULL << FEAT_INIT);
+
+ __atomic_store(&__aarch64_cpu_features.features, &features,
+ __ATOMIC_RELAXED);
+ return;
+ }
+#endif
+
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
static const struct {
const char *sysctl_name;
>From 69c48867fd47b613fc882e26dd87096c56a71051 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 12 Jun 2024 10:39:10 -0700
Subject: [PATCH 2/2] clang-format
---
.../builtins/cpu_model/aarch64/fmv/apple.inc | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 6c6a71eed49d0..0d6ab123d1052 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -3,8 +3,8 @@
#include <sys/sysctl.h>
#if __has_include(<arm/cpu_capabilities_public.h>)
-# include <arm/cpu_capabilities_public.h>
-# define HAS_CPU_CAPABILITIES_PUBLIC_H 1
+#include <arm/cpu_capabilities_public.h>
+#define HAS_CPU_CAPABILITIES_PUBLIC_H 1
#endif
static bool isKnownAndSupported(const char *name) {
@@ -34,12 +34,13 @@ void __init_cpu_features_resolver(void) {
// ~20-something sysctls path.
if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) {
- #define CHECK_BIT(FROM, TO) do { \
- _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE); \
- if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \
- features |= (1ULL << TO); \
- } \
- } while(0)
+#define CHECK_BIT(FROM, TO) \
+ do { \
+ _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE); \
+ if (feats_bitvec[FROM / 8] & (1u << ((FROM)&7))) { \
+ features |= (1ULL << TO); \
+ } \
+ } while (0)
CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM);
CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2);
@@ -78,7 +79,7 @@ void __init_cpu_features_resolver(void) {
features |= (1ULL << FEAT_INIT);
__atomic_store(&__aarch64_cpu_features.features, &features,
- __ATOMIC_RELAXED);
+ __ATOMIC_RELAXED);
return;
}
#endif
More information about the llvm-commits
mailing list