[compiler-rt] [compiler-rt] Add cpu model init for Windows. (PR #111961)

Daniel Kiss via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 01:59:25 PST 2024


https://github.com/DanielKristofKiss updated https://github.com/llvm/llvm-project/pull/111961

>From 889175da54a7769c23f8390b213cfcc22369c365 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Tue, 8 Oct 2024 22:55:07 +0200
Subject: [PATCH 1/7] Add cpu model init for Windows.

---
 compiler-rt/lib/builtins/cpu_model/aarch64.c  |  2 +
 .../cpu_model/aarch64/fmv/windows.inc         | 42 +++++++++++++++++++
 .../lib/builtins/cpu_model/cpu_model.h        | 10 ++++-
 3 files changed, 53 insertions(+), 1 deletion(-)
 create mode 100644 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index ea2da23a95278f..def11f88c4854d 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -76,6 +76,8 @@ struct {
 #elif defined(__linux__) && __has_include(<sys/auxv.h>)
 #include "aarch64/fmv/mrs.inc"
 #include "aarch64/fmv/getauxval.inc"
+#elif defined(_WIN32)
+#include "aarch64/fmv/windows.inc"
 #else
 #include "aarch64/fmv/unimplemented.inc"
 #endif
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
new file mode 100644
index 00000000000000..fba4d8aed89bba
--- /dev/null
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -0,0 +1,42 @@
+#ifndef _ARM64_
+#define _ARM64_
+#endif
+#include <processthreadsapi.h>
+#include <stdint.h>
+
+void __init_cpu_features_resolver(unsigned long hwcap,
+                                  const __ifunc_arg_t *arg) {}
+
+void  CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+  if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+    return;
+
+  #define setCPUFeature(F) features |= 1ULL << F
+
+  uint64_t features = 0;
+
+  setCPUFeature(FEAT_INIT);
+  setCPUFeature(FEAT_FP);
+
+  // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+  if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
+    setCPUFeature(FEAT_CRC);
+  if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
+    setCPUFeature(FEAT_LSE);
+  if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+    setCPUFeature(FEAT_DOTPROD);
+
+  if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
+    setCPUFeature(FEAT_AES);
+    setCPUFeature(FEAT_SHA2);
+    setCPUFeature(FEAT_PMULL);
+  }
+  if (IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE))
+    setCPUFeature(FEAT_JSCVT);
+
+  if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE))
+    setCPUFeature(FEAT_RCPC);
+
+  __atomic_store(&__aarch64_cpu_features.features, &features,
+                  __ATOMIC_RELAXED);
+}
diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
index 924ca89cf60f5c..5f9079c4e67aea 100644
--- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h
+++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
@@ -31,7 +31,15 @@
 // We're choosing init priority 90 to force our constructors to run before any
 // constructors in the end user application (starting at priority 101). This
 // value matches the libgcc choice for the same functions.
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
+#ifdef _WIN64
+// Contructor that replaces the ifunc runs currently with prio 10, see
+// the LowerIFuncPass. The resolver of FMV depends on the cpu features so set
+// the priority to 9.
+#define CONSTRUCTOR_PRIOTITY 9
+#else
+#define CONSTRUCTOR_PRIOTITY 90
+#endif
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIOTITY)))
 #else
 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
 // this runs during initialization.

>From ab190811c62f089587b65a74a8c87cd94e6259a6 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Fri, 11 Oct 2024 17:52:58 +0200
Subject: [PATCH 2/7] Add more features, refactor

---
 .../cpu_model/aarch64/fmv/windows.inc         | 81 ++++++++++++++++---
 1 file changed, 70 insertions(+), 11 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index fba4d8aed89bba..44ed8510b3515b 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -4,6 +4,52 @@
 #include <processthreadsapi.h>
 #include <stdint.h>
 
+#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE -1
+#endif
+
 void __init_cpu_features_resolver(unsigned long hwcap,
                                   const __ifunc_arg_t *arg) {}
 
@@ -19,23 +65,36 @@ void  CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   setCPUFeature(FEAT_FP);
 
   // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
-  if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
-    setCPUFeature(FEAT_CRC);
-  if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
-    setCPUFeature(FEAT_LSE);
-  if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
-    setCPUFeature(FEAT_DOTPROD);
-
   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
     setCPUFeature(FEAT_AES);
     setCPUFeature(FEAT_SHA2);
     setCPUFeature(FEAT_PMULL);
   }
-  if (IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE))
-    setCPUFeature(FEAT_JSCVT);
 
-  if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE))
-    setCPUFeature(FEAT_RCPC);
+  static const struct ProcessFeatureToFeatMap_t {
+    int WinApiFeature;
+    enum CPUFeatures CPUFeature;
+  } FeatMap[] = {
+    {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC},
+    {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE},
+    {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD},
+    {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT},
+    {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
+    {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
+    {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
+    {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES},
+    {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16},
+    {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16},
+    {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
+    {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
+    {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
+    {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
+  };
+
+  for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]);
+        I != E; ++I)
+    if ((FeatMap[I].WinApiFeature != -1) && IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
+      setCPUFeature(FeatMap[I].CPUFeature);
 
   __atomic_store(&__aarch64_cpu_features.features, &features,
                   __ATOMIC_RELAXED);

>From df55f291786f22610fdcbbc5b8ab87e0bd122bd1 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Fri, 11 Oct 2024 22:53:52 +0200
Subject: [PATCH 3/7] clang-format

---
 .../cpu_model/aarch64/fmv/windows.inc         | 41 +++++++++----------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index 44ed8510b3515b..dc8eaff5fc5290 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -53,11 +53,11 @@
 void __init_cpu_features_resolver(unsigned long hwcap,
                                   const __ifunc_arg_t *arg) {}
 
-void  CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
     return;
 
-  #define setCPUFeature(F) features |= 1ULL << F
+#define setCPUFeature(F) features |= 1ULL << F
 
   uint64_t features = 0;
 
@@ -75,27 +75,26 @@ void  CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
     int WinApiFeature;
     enum CPUFeatures CPUFeature;
   } FeatMap[] = {
-    {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC},
-    {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE},
-    {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD},
-    {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT},
-    {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
-    {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
-    {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
-    {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES},
-    {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16},
-    {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16},
-    {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
-    {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
-    {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
-    {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
+      {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC},
+      {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE},
+      {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD},
+      {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT},
+      {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
+      {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
+      {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
+      {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES},
+      {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16},
+      {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16},
+      {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
+      {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
+      {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
+      {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
   };
 
-  for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]);
-        I != E; ++I)
-    if ((FeatMap[I].WinApiFeature != -1) && IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
+  for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)
+    if ((FeatMap[I].WinApiFeature != -1) &&
+        IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
       setCPUFeature(FeatMap[I].CPUFeature);
 
-  __atomic_store(&__aarch64_cpu_features.features, &features,
-                  __ATOMIC_RELAXED);
+  __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED);
 }

>From eda389814bc59c5d5063d25be526fc223f3ecc34 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Mon, 14 Oct 2024 17:48:40 +0200
Subject: [PATCH 4/7] Address review comments

---
 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc | 5 ++---
 compiler-rt/lib/builtins/cpu_model/cpu_model.h             | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index dc8eaff5fc5290..109a0b9cdea00e 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -1,6 +1,5 @@
-#ifndef _ARM64_
-#define _ARM64_
-#endif
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
 #include <processthreadsapi.h>
 #include <stdint.h>
 
diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
index 5f9079c4e67aea..5c63175dfe9cf1 100644
--- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h
+++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
@@ -31,7 +31,7 @@
 // We're choosing init priority 90 to force our constructors to run before any
 // constructors in the end user application (starting at priority 101). This
 // value matches the libgcc choice for the same functions.
-#ifdef _WIN64
+#ifdef _WIN32
 // Contructor that replaces the ifunc runs currently with prio 10, see
 // the LowerIFuncPass. The resolver of FMV depends on the cpu features so set
 // the priority to 9.

>From 767064172738daea91eef97c81a1705e8144cdc6 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Tue, 15 Oct 2024 08:30:39 +0200
Subject: [PATCH 5/7] fix spelling

---
 compiler-rt/lib/builtins/cpu_model/cpu_model.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
index 5c63175dfe9cf1..3bc4e63c4f25ab 100644
--- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h
+++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
@@ -35,11 +35,11 @@
 // Contructor that replaces the ifunc runs currently with prio 10, see
 // the LowerIFuncPass. The resolver of FMV depends on the cpu features so set
 // the priority to 9.
-#define CONSTRUCTOR_PRIOTITY 9
+#define CONSTRUCTOR_PRIORITY 9
 #else
-#define CONSTRUCTOR_PRIOTITY 90
+#define CONSTRUCTOR_PRIORITY 90
 #endif
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIOTITY)))
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIORITY)))
 #else
 // FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
 // this runs during initialization.

>From 56ccce4fb033e86b2999742498732d3812ae1bf1 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Tue, 15 Oct 2024 08:32:31 +0200
Subject: [PATCH 6/7] Use SDK values for even when they are not defined

---
 .../cpu_model/aarch64/fmv/windows.inc         | 33 +++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index 109a0b9cdea00e..d6368c29d98723 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -4,49 +4,49 @@
 #include <stdint.h>
 
 #ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE 43 
 #endif
 #ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE 44
 #endif
 #ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE 45
 #endif
 #ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE 46
 #endif
 #ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE 47
 #endif
 #ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE 49
 #endif
 #ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE 50
 #endif
 #ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE 51
 #endif
 #ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE 52
 #endif
 #ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE 53
 #endif
 #ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE 55
 #endif
 #ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE 56
 #endif
 #ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE 57
 #endif
 #ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE 58
 #endif
 #ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
-#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE -1
+#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE 59
 #endif
 
 void __init_cpu_features_resolver(unsigned long hwcap,
@@ -91,8 +91,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
   };
 
   for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)
-    if ((FeatMap[I].WinApiFeature != -1) &&
-        IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
+    if (IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
       setCPUFeature(FeatMap[I].CPUFeature);
 
   __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED);

>From 8f784a5452f010a06c9644f38d989f0032c8177d Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Fri, 8 Nov 2024 10:58:17 +0100
Subject: [PATCH 7/7] address review comments

---
 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index d6368c29d98723..dd30667deb686e 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -65,7 +65,6 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
 
   // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
   if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
-    setCPUFeature(FEAT_AES);
     setCPUFeature(FEAT_SHA2);
     setCPUFeature(FEAT_PMULL);
   }
@@ -81,13 +80,13 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
       {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
       {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
       {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
-      {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES},
-      {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16},
-      {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16},
+      {PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE, FEAT_SVE_PMULL128},
       {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
       {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
       {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
       {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
+      // There is no I8MM flag, but when SVE_I8MM is available, I8MM is too.
+      {PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE, FEAT_I8MM},
   };
 
   for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]); I != E; ++I)



More information about the llvm-commits mailing list