[compiler-rt] [compiler-rt] Add cpu model init for Windows. (PR #111961)
Daniel Kiss via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 08:54:17 PDT 2024
https://github.com/DanielKristofKiss updated https://github.com/llvm/llvm-project/pull/111961
>From 889175da54a7769c23f8390b213cfcc22369c365 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Tue, 8 Oct 2024 22:55:07 +0200
Subject: [PATCH 1/2] Add cpu model init for Windows.
---
compiler-rt/lib/builtins/cpu_model/aarch64.c | 2 +
.../cpu_model/aarch64/fmv/windows.inc | 42 +++++++++++++++++++
.../lib/builtins/cpu_model/cpu_model.h | 10 ++++-
3 files changed, 53 insertions(+), 1 deletion(-)
create mode 100644 compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64.c b/compiler-rt/lib/builtins/cpu_model/aarch64.c
index ea2da23a95278f..def11f88c4854d 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64.c
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64.c
@@ -76,6 +76,8 @@ struct {
#elif defined(__linux__) && __has_include(<sys/auxv.h>)
#include "aarch64/fmv/mrs.inc"
#include "aarch64/fmv/getauxval.inc"
+#elif defined(_WIN32)
+#include "aarch64/fmv/windows.inc"
#else
#include "aarch64/fmv/unimplemented.inc"
#endif
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
new file mode 100644
index 00000000000000..fba4d8aed89bba
--- /dev/null
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -0,0 +1,42 @@
+#ifndef _ARM64_
+#define _ARM64_
+#endif
+#include <processthreadsapi.h>
+#include <stdint.h>
+
+void __init_cpu_features_resolver(unsigned long hwcap,
+ const __ifunc_arg_t *arg) {}
+
+void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ #define setCPUFeature(F) features |= 1ULL << F
+
+ uint64_t features = 0;
+
+ setCPUFeature(FEAT_INIT);
+ setCPUFeature(FEAT_FP);
+
+ // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
+ if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
+ setCPUFeature(FEAT_CRC);
+ if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
+ setCPUFeature(FEAT_LSE);
+ if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
+ setCPUFeature(FEAT_DOTPROD);
+
+ if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
+ setCPUFeature(FEAT_AES);
+ setCPUFeature(FEAT_SHA2);
+ setCPUFeature(FEAT_PMULL);
+ }
+ if (IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE))
+ setCPUFeature(FEAT_JSCVT);
+
+ if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE))
+ setCPUFeature(FEAT_RCPC);
+
+ __atomic_store(&__aarch64_cpu_features.features, &features,
+ __ATOMIC_RELAXED);
+}
diff --git a/compiler-rt/lib/builtins/cpu_model/cpu_model.h b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
index 924ca89cf60f5c..5f9079c4e67aea 100644
--- a/compiler-rt/lib/builtins/cpu_model/cpu_model.h
+++ b/compiler-rt/lib/builtins/cpu_model/cpu_model.h
@@ -31,7 +31,15 @@
// We're choosing init priority 90 to force our constructors to run before any
// constructors in the end user application (starting at priority 101). This
// value matches the libgcc choice for the same functions.
-#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(90)))
+#ifdef _WIN64
+// Contructor that replaces the ifunc runs currently with prio 10, see
+// the LowerIFuncPass. The resolver of FMV depends on the cpu features so set
+// the priority to 9.
+#define CONSTRUCTOR_PRIOTITY 9
+#else
+#define CONSTRUCTOR_PRIOTITY 90
+#endif
+#define CONSTRUCTOR_ATTRIBUTE __attribute__((constructor(CONSTRUCTOR_PRIOTITY)))
#else
// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
// this runs during initialization.
>From ab190811c62f089587b65a74a8c87cd94e6259a6 Mon Sep 17 00:00:00 2001
From: Daniel Kiss <daniel.kiss at arm.com>
Date: Fri, 11 Oct 2024 17:52:58 +0200
Subject: [PATCH 2/2] Add more features, refactor
---
.../cpu_model/aarch64/fmv/windows.inc | 81 ++++++++++++++++---
1 file changed, 70 insertions(+), 11 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
index fba4d8aed89bba..44ed8510b3515b 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/windows.inc
@@ -4,6 +4,52 @@
#include <processthreadsapi.h>
#include <stdint.h>
+#ifndef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_PMULL128_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BITPERM_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_I8MM_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE -1
+#endif
+#ifndef PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE
+#define PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE -1
+#endif
+
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {}
@@ -19,23 +65,36 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
setCPUFeature(FEAT_FP);
// https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
- if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE))
- setCPUFeature(FEAT_CRC);
- if (IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE))
- setCPUFeature(FEAT_LSE);
- if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE))
- setCPUFeature(FEAT_DOTPROD);
-
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
setCPUFeature(FEAT_AES);
setCPUFeature(FEAT_SHA2);
setCPUFeature(FEAT_PMULL);
}
- if (IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE))
- setCPUFeature(FEAT_JSCVT);
- if (IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE))
- setCPUFeature(FEAT_RCPC);
+ static const struct ProcessFeatureToFeatMap_t {
+ int WinApiFeature;
+ enum CPUFeatures CPUFeature;
+ } FeatMap[] = {
+ {PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE, FEAT_CRC},
+ {PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE, FEAT_LSE},
+ {PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE, FEAT_DOTPROD},
+ {PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE, FEAT_JSCVT},
+ {PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE, FEAT_RCPC},
+ {PF_ARM_SVE_INSTRUCTIONS_AVAILABLE, FEAT_SVE},
+ {PF_ARM_SVE2_INSTRUCTIONS_AVAILABLE, FEAT_SVE2},
+ {PF_ARM_SVE_AES_INSTRUCTIONS_AVAILABLE, FEAT_SVE_AES},
+ {PF_ARM_SVE_BF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_BF16},
+ {PF_ARM_SVE_EBF16_INSTRUCTIONS_AVAILABLE, FEAT_SVE_EBF16},
+ {PF_ARM_SVE_SHA3_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SHA3},
+ {PF_ARM_SVE_SM4_INSTRUCTIONS_AVAILABLE, FEAT_SVE_SM4},
+ {PF_ARM_SVE_F32MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F32MM},
+ {PF_ARM_SVE_F64MM_INSTRUCTIONS_AVAILABLE, FEAT_SVE_F64MM},
+ };
+
+ for (size_t I = 0, E = sizeof(FeatMap) / sizeof(FeatMap[0]);
+ I != E; ++I)
+ if ((FeatMap[I].WinApiFeature != -1) && IsProcessorFeaturePresent(FeatMap[I].WinApiFeature))
+ setCPUFeature(FeatMap[I].CPUFeature);
__atomic_store(&__aarch64_cpu_features.features, &features,
__ATOMIC_RELAXED);
More information about the llvm-commits
mailing list