[compiler-rt] [llvm] [AArch64] Extend Windows CPU feature detection with more features. (PR #171930)

Eli Friedman via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 11 15:46:24 PST 2025


https://github.com/efriedma-quic created https://github.com/llvm/llvm-project/pull/171930

Mostly adding feature flags from the newest SDK.

(Note that in addition to the obvious, this also affects the compiler-rt SME ABI routines, which rely on FEAT_SME and FEAT_SME2.)

>From cdec18cd48cdfe37f81e3b209bb6cd0d0e4e2eae Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma at qti.qualcomm.com>
Date: Thu, 11 Dec 2025 15:32:07 -0800
Subject: [PATCH] [AArch64] Extend Windows CPU feature detection with more
 features.

Mostly adding feature flags from the newest SDK.
---
 .../cpu_model/aarch64/fmv/windows.inc         | 49 +++++++++++++++++--
 llvm/lib/TargetParser/Host.cpp                | 49 +++++++++++++++++--
 2 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index 2ca18242fba3e..cf245818a0d11 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -18,24 +18,48 @@
 #ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
 #endif
+#ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48
+#endif
 #ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
 #endif
+#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
+#endif
 #ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
 #endif
 #ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
 #endif
-#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
-#endif
 #ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
 #endif
 #ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
 #endif
+#ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE  66
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
+#endif
+#ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
+#endif
+#ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
+#endif
+#ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
+#endif
+#ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
+#endif
 
 void __init_cpu_features_resolver(unsigned long hwcap,
                                   const __ifunc_arg_t *arg) {}
@@ -68,15 +92,30 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
       {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
       {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
       {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
+      {PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE, FEAT_SVE2_1},
       {PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128},
+      {PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BITPERM},
       {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
       {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
       {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
       {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
-      // There is no I8MM flag, but when SVE_I8MM is available, I8MM is too.
-      {PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
+      {PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
+      {PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE, FEAT_FP16},
+      {PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE, FEAT_BF16},
+      {PF_ARM_SME_INSTRUCTIONS_AVAILABLE, FEAT_SME},
+      {PF_ARM_SME2_INSTRUCTIONS_AVAILABLE, FEAT_SME2},
+      {PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE, FEAT_SME_I64},
+      {PF_ARM_SME_F16F64_INSTRUCTIONS_AVAILABLE, FEAT_SME_F64},
   };
 
+  // The following features are never detected because there is no known way
+  // to detect them on Windows:
+  //
+  // FEAT_RNG, FEAT_FLAGM, FEAT_FLAGM2, FEAT_FP16FML, FEAT_RDM, FEAT_CSSC,
+  // FEAT_DIT, FEAT_DPB, FEAT_DPB2, FEAT_FCMA, FEAT_RCPC2, FEAT_FRINTTS,
+  // FEAT_MEMTAG2, FEAT_SB, FEAT_SSBS2, FEAT_BTI, FEAT_WFXT, FEAT_RCPC3,
+  // FEAT_MOPS.
+
   for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)
     if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
       setCPUFeature(FeatMap[I].CPUFeature);
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index cb793d60a286f..aacaaa4879f4d 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -2304,24 +2304,49 @@ StringMap<bool> sys::getHostCPUFeatures() {
 #ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
 #endif
+#ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48
+#endif
 #ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
 #endif
+#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
+#endif
 #ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
 #endif
 #ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
 #endif
-#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
-#endif
 #ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
 #endif
 #ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
 #define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
 #endif
+#ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE  66
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
+#endif
+#ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
+#endif
+#ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
+#endif
+#ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
+#endif
+#ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
+#endif
+
 StringMap<bool> sys::getHostCPUFeatures() {
   StringMap<bool> Features;
 
@@ -2340,8 +2365,12 @@ StringMap<bool> sys::getHostCPUFeatures() {
       IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE);
   Features["sve2"] =
       IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE);
+  Features["sve2p1"] =
+      IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE);
   Features["sve-aes"] =
       IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE);
+  Features["sve-bitperm"] =
+    IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE);
   Features["sve-sha3"] =
       IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE);
   Features["sve-sm4"] =
@@ -2351,7 +2380,19 @@ StringMap<bool> sys::getHostCPUFeatures() {
   Features["f64mm"] =
       IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE);
   Features["i8mm"] =
-      IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE);
+      IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE);
+  Features["fp16"] =
+      IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE);
+  Features["bf16"] =
+      IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE);
+  Features["sme"] =
+      IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE);
+  Features["sme2"] =
+      IsProcessorFeaturePresent(PF_ARM_SME2_INSTRUCTIONS_AVAILABLE);
+  Features["sme-i16i64"] =
+      IsProcessorFeaturePresent(PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE);
+  Features["sme-f64f64"] =
+      IsProcessorFeaturePresent(PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE);
 
   // Avoid inferring "crypto" means more than the traditional AES + SHA2
   bool TradCrypto =



More information about the llvm-commits mailing list