[compiler-rt] [llvm] [AArch64] Extend Windows CPU feature detection with more features. (PR #171930)
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 15:46:24 PST 2025
https://github.com/efriedma-quic created https://github.com/llvm/llvm-project/pull/171930
Mostly adding feature flags from the newest SDK.
(Note that in addition to the obvious, this also affects the compiler-rt SME ABI routines, which rely on FEAT_SME and FEAT_SME2.)
>From cdec18cd48cdfe37f81e3b209bb6cd0d0e4e2eae Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma at qti.qualcomm.com>
Date: Thu, 11 Dec 2025 15:32:07 -0800
Subject: [PATCH] [AArch64] Extend Windows CPU feature detection with more
features.
Mostly adding feature flags from the newest SDK.
---
.../cpu_model/aarch64/fmv/windows.inc | 49 +++++++++++++++++--
llvm/lib/TargetParser/Host.cpp | 49 +++++++++++++++++--
2 files changed, 89 insertions(+), 9 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index 2ca18242fba3e..cf245818a0d11 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -18,24 +18,48 @@
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
#endif
+#ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48
+#endif
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
#endif
+#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
+#endif
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
#endif
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
#endif
-#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
-#endif
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
#endif
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
#endif
+#ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
+#endif
+#ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
+#endif
+#ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
+#endif
+#ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
+#endif
+#ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
+#endif
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {}
@@ -68,15 +92,30 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
{PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
{PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
{PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
+ {PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE, FEAT_SVE2_1},
{PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128},
+ {PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BITPERM},
{PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
{PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
{PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
{PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
- // There is no I8MM flag, but when SVE_I8MM is available, I8MM is too.
- {PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
+ {PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
+ {PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE, FEAT_FP16},
+ {PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE, FEAT_BF16},
+ {PF_ARM_SME_INSTRUCTIONS_AVAILABLE, FEAT_SME},
+ {PF_ARM_SME2_INSTRUCTIONS_AVAILABLE, FEAT_SME2},
+ {PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE, FEAT_SME_I64},
+ {PF_ARM_SME_F16F64_INSTRUCTIONS_AVAILABLE, FEAT_SME_F64},
};
+ // The following features are never detected because there is no known way
+ // to detect them on Windows:
+ //
+ // FEAT_RNG, FEAT_FLAGM, FEAT_FLAGM2, FEAT_FP16FML, FEAT_RDM, FEAT_CSSC,
+ // FEAT_DIT, FEAT_DPB, FEAT_DPB2, FEAT_FCMA, FEAT_RCPC2, FEAT_FRINTTS,
+ // FEAT_MEMTAG2, FEAT_SB, FEAT_SSBS2, FEAT_BTI, FEAT_WFXT, FEAT_RCPC3,
+ // FEAT_MOPS.
+
for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)
if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
setCPUFeature(FeatMap[I].CPUFeature);
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index cb793d60a286f..aacaaa4879f4d 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -2304,24 +2304,49 @@ StringMap<bool> sys::getHostCPUFeatures() {
#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
#endif
+#ifndef PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE 48
+#endif
#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
#endif
+#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
+#endif
#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
#endif
#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
#endif
-#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
-#endif
#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
#endif
#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
#endif
+#ifndef PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE 66
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE 67
+#endif
+#ifndef PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE 68
+#endif
+#ifndef PF_ARM_SME_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_INSTRUCTIONS_AVAILABLE 70
+#endif
+#ifndef PF_ARM_SME2_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME2_INSTRUCTIONS_AVAILABLE 71
+#endif
+#ifndef PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE 85
+#endif
+#ifndef PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE 86
+#endif
+
StringMap<bool> sys::getHostCPUFeatures() {
StringMap<bool> Features;
@@ -2340,8 +2365,12 @@ StringMap<bool> sys::getHostCPUFeatures() {
IsProcessorFeaturePresent(PF_ARM_SVE_INSTRUCTIONS_AVAILABLE);
Features["sve2"] =
IsProcessorFeaturePresent(PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE);
+ Features["sve2p1"] =
+ IsProcessorFeaturePresent(PF_ARM_SVE2_1_INSTRUCTIONS_AVAILABLE);
Features["sve-aes"] =
IsProcessorFeaturePresent(PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE);
+ Features["sve-bitperm"] =
+ IsProcessorFeaturePresent(PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE);
Features["sve-sha3"] =
IsProcessorFeaturePresent(PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE);
Features["sve-sm4"] =
@@ -2351,7 +2380,19 @@ StringMap<bool> sys::getHostCPUFeatures() {
Features["f64mm"] =
IsProcessorFeaturePresent(PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE);
Features["i8mm"] =
- IsProcessorFeaturePresent(PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE);
+ IsProcessorFeaturePresent(PF_ARM_V82_I8MM_INSTRUCTIONS_AVAILABLE);
+ Features["fp16"] =
+ IsProcessorFeaturePresent(PF_ARM_V82_FP16_INSTRUCTIONS_AVAILABLE);
+ Features["bf16"] =
+ IsProcessorFeaturePresent(PF_ARM_V86_BF16_INSTRUCTIONS_AVAILABLE);
+ Features["sme"] =
+ IsProcessorFeaturePresent(PF_ARM_SME_INSTRUCTIONS_AVAILABLE);
+ Features["sme2"] =
+ IsProcessorFeaturePresent(PF_ARM_SME2_INSTRUCTIONS_AVAILABLE);
+ Features["sme-i16i64"] =
+ IsProcessorFeaturePresent(PF_ARM_SME_I16I64_INSTRUCTIONS_AVAILABLE);
+ Features["sme-f64f64"] =
+ IsProcessorFeaturePresent(PF_ARM_SME_F64F64_INSTRUCTIONS_AVAILABLE);
// Avoid inferring "crypto" means more than the traditional AES + SHA2
bool TradCrypto =
More information about the llvm-commits
mailing list