[compiler-rt] [compiler-rt][AArch64][FMV] Use the hw.optional.arm.caps fast path (PR #95275)

Jon Roelofs via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 07:06:17 PDT 2024


https://github.com/jroelofs updated https://github.com/llvm/llvm-project/pull/95275

>From 9235a34b9110f881a52ef05c937f1b869fa9d71c Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 12 Jun 2024 10:29:50 -0700
Subject: [PATCH 1/5] [compiler-rt][AArch64][FMV] Use the hw.optional.arm.caps
 fast path when available.

MacOS 15.0 and iOS 18.0 added a new sysctl to fetch a bitvector of all the
hw.optional.arm.FEAT_*'s in one go.  Using this has a perf advantage over doing
multiple round-trips to the kernel and back, but since it's not present in
older oses, we still need the slow fallback.
---
 .../builtins/cpu_model/aarch64/fmv/apple.inc  | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 6fef109567b61..6c6a71eed49d0 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -2,6 +2,11 @@
 #if TARGET_OS_OSX || TARGET_OS_IPHONE
 #include <sys/sysctl.h>
 
+#if __has_include(<arm/cpu_capabilities_public.h>)
+#  include <arm/cpu_capabilities_public.h>
+#  define HAS_CPU_CAPABILITIES_PUBLIC_H 1
+#endif
+
 static bool isKnownAndSupported(const char *name) {
   int32_t val = 0;
   size_t size = sizeof(val);
@@ -21,6 +26,63 @@ void __init_cpu_features_resolver(void) {
 
   uint64_t features = 0;
 
+#ifdef HAS_CPU_CAPABILITIES_PUBLIC_H
+  uint8_t feats_bitvec[(CAP_BIT_NB + 7) / 8] = {0};
+  size_t len = sizeof(feats_bitvec);
+  // When hw.optional.arm.feats is available (macOS 15.0+, iOS 18.0+), use the
+  // fast path to get all the feature bits, otherwise fall back to the slow
+  // ~20-something sysctls path.
+  if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) {
+
+    #define CHECK_BIT(FROM, TO) do { \
+      _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE); \
+      if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \
+        features |= (1ULL << TO); \
+      } \
+    } while(0)
+
+    CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM);
+    CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2);
+    CHECK_BIT(CAP_BIT_FEAT_FHM, FEAT_FP16FML);
+    CHECK_BIT(CAP_BIT_FEAT_DotProd, FEAT_DOTPROD);
+    CHECK_BIT(CAP_BIT_FEAT_SHA3, FEAT_SHA3);
+    CHECK_BIT(CAP_BIT_FEAT_RDM, FEAT_RDM);
+    CHECK_BIT(CAP_BIT_FEAT_LSE, FEAT_LSE);
+    CHECK_BIT(CAP_BIT_FEAT_SHA256, FEAT_SHA2);
+    CHECK_BIT(CAP_BIT_FEAT_SHA1, FEAT_SHA1);
+    CHECK_BIT(CAP_BIT_FEAT_AES, FEAT_AES);
+    CHECK_BIT(CAP_BIT_FEAT_PMULL, FEAT_PMULL);
+    CHECK_BIT(CAP_BIT_FEAT_SPECRES, FEAT_PREDRES);
+    CHECK_BIT(CAP_BIT_FEAT_SB, FEAT_SB);
+    CHECK_BIT(CAP_BIT_FEAT_FRINTTS, FEAT_FRINTTS);
+    CHECK_BIT(CAP_BIT_FEAT_LRCPC, FEAT_RCPC);
+    CHECK_BIT(CAP_BIT_FEAT_LRCPC2, FEAT_RCPC2);
+    CHECK_BIT(CAP_BIT_FEAT_FCMA, FEAT_FCMA);
+    CHECK_BIT(CAP_BIT_FEAT_JSCVT, FEAT_JSCVT);
+    CHECK_BIT(CAP_BIT_FEAT_DPB, FEAT_DPB);
+    CHECK_BIT(CAP_BIT_FEAT_DPB2, FEAT_DPB2);
+    CHECK_BIT(CAP_BIT_FEAT_BF16, FEAT_BF16);
+    CHECK_BIT(CAP_BIT_FEAT_I8MM, FEAT_I8MM);
+    CHECK_BIT(CAP_BIT_FEAT_DIT, FEAT_DIT);
+    CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16);
+    CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2);
+    CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI);
+    features |= (1ULL << FEAT_FP);
+    CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD);
+    CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC);
+    CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME);
+    CHECK_BIT(CAP_BIT_FEAT_SME2, FEAT_SME2);
+    CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64);
+    CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64);
+
+    features |= (1ULL << FEAT_INIT);
+
+    __atomic_store(&__aarch64_cpu_features.features, &features,
+                    __ATOMIC_RELAXED);
+    return;
+  }
+#endif
+
   // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
   static const struct {
     const char *sysctl_name;

>From 69c48867fd47b613fc882e26dd87096c56a71051 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 12 Jun 2024 10:39:10 -0700
Subject: [PATCH 2/5] clang-format

---
 .../builtins/cpu_model/aarch64/fmv/apple.inc  | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 6c6a71eed49d0..0d6ab123d1052 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -3,8 +3,8 @@
 #include <sys/sysctl.h>
 
 #if __has_include(<arm/cpu_capabilities_public.h>)
-#  include <arm/cpu_capabilities_public.h>
-#  define HAS_CPU_CAPABILITIES_PUBLIC_H 1
+#include <arm/cpu_capabilities_public.h>
+#define HAS_CPU_CAPABILITIES_PUBLIC_H 1
 #endif
 
 static bool isKnownAndSupported(const char *name) {
@@ -34,12 +34,13 @@ void __init_cpu_features_resolver(void) {
   // ~20-something sysctls path.
   if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) {
 
-    #define CHECK_BIT(FROM, TO) do { \
-      _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE); \
-      if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \
-        features |= (1ULL << TO); \
-      } \
-    } while(0)
+#define CHECK_BIT(FROM, TO)                                                    \
+  do {                                                                         \
+    _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE);                     \
+    if (feats_bitvec[FROM / 8] & (1u << ((FROM)&7))) {                         \
+      features |= (1ULL << TO);                                                \
+    }                                                                          \
+  } while (0)
 
     CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM);
     CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2);
@@ -78,7 +79,7 @@ void __init_cpu_features_resolver(void) {
     features |= (1ULL << FEAT_INIT);
 
     __atomic_store(&__aarch64_cpu_features.features, &features,
-                    __ATOMIC_RELAXED);
+                   __ATOMIC_RELAXED);
     return;
   }
 #endif

>From b8384610d485301aeaaa500f657ba7bcaf3222db Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 12 Jun 2024 10:40:34 -0700
Subject: [PATCH 3/5] remove internal static assert

---
 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 0d6ab123d1052..678fad5e23a4e 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -36,7 +36,6 @@ void __init_cpu_features_resolver(void) {
 
 #define CHECK_BIT(FROM, TO)                                                    \
   do {                                                                         \
-    _Static_assert(FROM < CAP_BIT_NB, BROKEN_ABI_MESSAGE);                     \
     if (feats_bitvec[FROM / 8] & (1u << ((FROM)&7))) {                         \
       features |= (1ULL << TO);                                                \
     }                                                                          \

>From 09cea6930f68a8600eb4c8533cdfd999d1b0c4c6 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Wed, 12 Jun 2024 11:33:02 -0700
Subject: [PATCH 4/5] clang-format again

---
 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 678fad5e23a4e..458c282c05b5b 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -36,7 +36,7 @@ void __init_cpu_features_resolver(void) {
 
 #define CHECK_BIT(FROM, TO)                                                    \
   do {                                                                         \
-    if (feats_bitvec[FROM / 8] & (1u << ((FROM)&7))) {                         \
+    if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) {                       \
       features |= (1ULL << TO);                                                \
     }                                                                          \
   } while (0)

>From 7d922f997ffacb93e0fc2a233a3e1f274f17df4b Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Thu, 13 Jun 2024 07:05:40 -0700
Subject: [PATCH 5/5] ssbs2 implies ssbs. fp is always enabled

---
 .../builtins/cpu_model/aarch64/fmv/apple.inc  | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
index 458c282c05b5b..6f4b9ab37e36b 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -15,6 +15,19 @@ static bool isKnownAndSupported(const char *name) {
   return val;
 }
 
+static uint64_t deriveImplicitFeatures(uint64_t features) {
+  // FEAT_SSBS2 implies FEAT_SSBS
+  if ((1ULL << FEAT_SSBS2) & features)
+    features |= (1ULL << FEAT_SSBS);
+
+  // FEAT_FP is always enabled
+  features |= (1ULL << FEAT_FP);
+
+  features |= (1ULL << FEAT_INIT);
+
+  return features;
+}
+
 void __init_cpu_features_resolver(void) {
   // On Darwin platforms, this may be called concurrently by multiple threads
   // because the resolvers that use it are called lazily at runtime (unlike on
@@ -67,7 +80,6 @@ void __init_cpu_features_resolver(void) {
     CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16);
     CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2);
     CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI);
-    features |= (1ULL << FEAT_FP);
     CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD);
     CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC);
     CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME);
@@ -75,7 +87,7 @@ void __init_cpu_features_resolver(void) {
     CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64);
     CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64);
 
-    features |= (1ULL << FEAT_INIT);
+    features = deriveImplicitFeatures(features);
 
     __atomic_store(&__aarch64_cpu_features.features, &features,
                    __ATOMIC_RELAXED);
@@ -94,7 +106,6 @@ void __init_cpu_features_resolver(void) {
       {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD},
       {"hw.optional.arm.FEAT_RDM", FEAT_RDM},
       {"hw.optional.arm.FEAT_LSE", FEAT_LSE},
-      {"hw.optional.floatingpoint", FEAT_FP},
       {"hw.optional.AdvSIMD", FEAT_SIMD},
       {"hw.optional.armv8_crc32", FEAT_CRC},
       {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1},
@@ -124,7 +135,7 @@ void __init_cpu_features_resolver(void) {
     if (isKnownAndSupported(feature_checks[I].sysctl_name))
       features |= (1ULL << feature_checks[I].feature);
 
-  features |= (1ULL << FEAT_INIT);
+  features = deriveImplicitFeatures(features);
 
   __atomic_store(&__aarch64_cpu_features.features, &features,
                   __ATOMIC_RELAXED);



More information about the llvm-commits mailing list