[compiler-rt] [llvm] Unify get available features (PR #97872)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 5 19:32:07 PDT 2024
https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/97872
This patch unifies the implementations of getAvailableFeatures between LLVM and compiler-rt.
This patch is intended to be a stepping stone towards a unified implementation of several functions for compiler-rt and LLVM, implemented in duplicated .inc files, with them being the same enforced through tests.
>From 1d0935ca48226d4825f6e005ebbc9e1df2d8cc2e Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 5 Jul 2024 20:01:30 +0000
Subject: [PATCH 1/3] [compiler-rt][X86] Better unify ProcessorFeatures and
X86TargetPraser
compiler-rt's ProcessorFeatures and the definitions in X86TargetParser
need to be kpet in sync to ensure correct functionality. Currently there
are quite a few differences between the two (not currently impacting
functionality). These have primarily stemmed from updates made on the
LLVM side that did not touch the compiler-rt side (like the removal of
the knights landing features). This patch attempts to rectify the
situation and better unify the structs.
---
compiler-rt/lib/builtins/cpu_model/x86.c | 17 ++++++-----------
.../llvm/TargetParser/X86TargetParser.def | 6 +++---
llvm/lib/TargetParser/X86TargetParser.cpp | 18 +++++++++---------
3 files changed, 18 insertions(+), 23 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 7e8acb3e73eda..5dbb1004d0fda 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -125,8 +125,8 @@ enum ProcessorFeatures {
FEATURE_AVX512BW,
FEATURE_AVX512DQ,
FEATURE_AVX512CD,
- FEATURE_AVX512ER,
- FEATURE_AVX512PF,
+ FEATURE_NF,
+ FEATURE_CF,
FEATURE_AVX512VBMI,
FEATURE_AVX512IFMA,
FEATURE_AVX5124VNNIW,
@@ -142,7 +142,7 @@ enum ProcessorFeatures {
// FIXME: Below Features has some missings comparing to gcc, it's because gcc
// has some not one-to-one mapped in llvm.
FEATURE_3DNOW,
- // FEATURE_3DNOWP,
+ // FEATURE_3DNOWA,
FEATURE_ADX = 40,
// FEATURE_ABM,
FEATURE_CLDEMOTE = 42,
@@ -171,7 +171,7 @@ enum ProcessorFeatures {
// FEATURE_OSXSAVE,
FEATURE_PCONFIG = 63,
FEATURE_PKU,
- FEATURE_PREFETCHWT1,
+ FEATURE_EVEX512,
FEATURE_PRFCHW,
FEATURE_PTWRITE,
FEATURE_RDPID,
@@ -205,6 +205,7 @@ enum ProcessorFeatures {
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
+ FEATURE_APXF,
FEATURE_AVXIFMA,
FEATURE_AVXVNNIINT8,
FEATURE_AVXNECONVERT,
@@ -217,7 +218,7 @@ enum ProcessorFeatures {
FEATURE_SM3,
FEATURE_SHA512,
FEATURE_SM4,
- FEATURE_APXF,
+ FEATUE_EGPR,
FEATURE_USERMSR,
FEATURE_AVX10_1_256,
FEATURE_AVX10_1_512,
@@ -870,10 +871,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AVX512IFMA);
if (HasLeaf7 && ((EBX >> 24) & 1))
setFeature(FEATURE_CLWB);
- if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
- setFeature(FEATURE_AVX512PF);
- if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
- setFeature(FEATURE_AVX512ER);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512CD);
if (HasLeaf7 && ((EBX >> 29) & 1))
@@ -883,8 +880,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VL);
- if (HasLeaf7 && ((ECX >> 0) & 1))
- setFeature(FEATURE_PREFETCHWT1);
if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VBMI);
if (HasLeaf7 && ((ECX >> 4) & 1))
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 0e4ad873e3639..3fb901fccbd65 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -191,7 +191,7 @@ X86_FEATURE_COMPAT(FSGSBASE, "fsgsbase", 0)
X86_FEATURE (CRC32, "crc32")
X86_FEATURE (INVPCID, "invpcid")
X86_FEATURE (RDPRU, "rdpru")
-X86_FEATURE (SAHF, "sahf")
+X86_FEATURE (LAHF_LM, "sahf")
X86_FEATURE (VZEROUPPER, "vzeroupper")
X86_FEATURE_COMPAT(LWP, "lwp", 0)
X86_FEATURE_COMPAT(LZCNT, "lzcnt", 0)
@@ -250,7 +250,7 @@ X86_FEATURE_COMPAT(SHA512, "sha512", 0)
X86_FEATURE_COMPAT(SM4, "sm4", 0)
X86_FEATURE (EGPR, "egpr")
X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
-X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36)
+X86_FEATURE_COMPAT(AVX10_1_256, "avx10.1-256", 36)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
X86_FEATURE (ZU, "zu")
// These features aren't really CPU features, but the frontend can set them.
@@ -264,7 +264,7 @@ X86_MICROARCH_LEVEL(X86_64_BASELINE,"x86-64", 95)
X86_MICROARCH_LEVEL(X86_64_V2, "x86-64-v2", 96)
X86_MICROARCH_LEVEL(X86_64_V3, "x86-64-v3", 97)
X86_MICROARCH_LEVEL(X86_64_V4, "x86-64-v4", 98)
-X86_MICROARCH_LEVEL(APXF, "apxf", 111)
+X86_MICROARCH_LEVEL(APXF, "apxf", 99)
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
#undef X86_MICROARCH_LEVEL
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 141ecb936b708..a5d0fa953979a 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -66,7 +66,7 @@ constexpr FeatureBitset FeaturesNocona =
// Basic 64-bit capable CPU.
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
-constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
+constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureLAHF_LM |
FeaturePOPCNT | FeatureCRC32 |
FeatureSSE4_2 | FeatureCMPXCHG16B;
constexpr FeatureBitset FeaturesX86_64_V3 =
@@ -78,7 +78,7 @@ constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 | FeatureEVEX512 |
// Intel Core CPUs
constexpr FeatureBitset FeaturesCore2 =
- FeaturesNocona | FeatureSAHF | FeatureSSSE3;
+ FeaturesNocona | FeatureLAHF_LM | FeatureSSSE3;
constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
constexpr FeatureBitset FeaturesNehalem =
FeaturesPenryn | FeaturePOPCNT | FeatureCRC32 | FeatureSSE4_2;
@@ -186,14 +186,14 @@ constexpr FeatureBitset FeaturesK8 =
constexpr FeatureBitset FeaturesK8SSE3 = FeaturesK8 | FeatureSSE3;
constexpr FeatureBitset FeaturesAMDFAM10 =
FeaturesK8SSE3 | FeatureCMPXCHG16B | FeatureLZCNT | FeaturePOPCNT |
- FeaturePRFCHW | FeatureSAHF | FeatureSSE4_A;
+ FeaturePRFCHW | FeatureLAHF_LM | FeatureSSE4_A;
// Bobcat architecture processors.
constexpr FeatureBitset FeaturesBTVER1 =
FeatureX87 | FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT |
FeatureFXSR | FeatureLZCNT | FeatureMMX | FeaturePOPCNT | FeaturePRFCHW |
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A |
- FeatureSAHF;
+ FeatureLAHF_LM;
constexpr FeatureBitset FeaturesBTVER2 =
FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureCRC32 |
FeatureF16C | FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
@@ -203,7 +203,7 @@ constexpr FeatureBitset FeaturesBDVER1 =
FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B |
FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | FeatureFMA4 |
FeatureFXSR | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL |
- FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 |
+ FeaturePOPCNT | FeaturePRFCHW | FeatureLAHF_LM | FeatureSSE | FeatureSSE2 |
FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A |
FeatureXOP | FeatureXSAVE;
constexpr FeatureBitset FeaturesBDVER2 =
@@ -221,7 +221,7 @@ constexpr FeatureBitset FeaturesZNVER1 =
FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT |
FeatureF16C | FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT |
FeatureMMX | FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
- FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | FeatureSHA |
+ FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureLAHF_LM | FeatureSHA |
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
FeatureXSAVEOPT | FeatureXSAVES;
@@ -507,7 +507,7 @@ constexpr FeatureBitset ImpliedFeaturesRDPRU = {};
constexpr FeatureBitset ImpliedFeaturesRDRND = {};
constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
constexpr FeatureBitset ImpliedFeaturesRTM = {};
-constexpr FeatureBitset ImpliedFeaturesSAHF = {};
+constexpr FeatureBitset ImpliedFeaturesLAHF_LM = {};
constexpr FeatureBitset ImpliedFeaturesSERIALIZE = {};
constexpr FeatureBitset ImpliedFeaturesSGX = {};
constexpr FeatureBitset ImpliedFeaturesSHSTK = {};
@@ -615,13 +615,13 @@ constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
constexpr FeatureBitset ImpliedFeaturesAVXVNNI = FeatureAVX2;
// AVX10 Features
-constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
+constexpr FeatureBitset ImpliedFeaturesAVX10_1_256 =
FeatureAVX512CD | FeatureAVX512VBMI | FeatureAVX512IFMA |
FeatureAVX512VNNI | FeatureAVX512BF16 | FeatureAVX512VPOPCNTDQ |
FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureVAES | FeatureVPCLMULQDQ |
FeatureAVX512FP16;
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 =
- FeatureAVX10_1 | FeatureEVEX512;
+ FeatureAVX10_1_256 | FeatureEVEX512;
// APX Features
constexpr FeatureBitset ImpliedFeaturesEGPR = {};
>From bc5ec931ccc3833192fac26322ea8bf6662285ae Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Sat, 6 Jul 2024 01:49:56 +0000
Subject: [PATCH 2/3] Rename VZEROUPPER
---
llvm/include/llvm/TargetParser/X86TargetParser.def | 2 +-
llvm/lib/TargetParser/X86TargetParser.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 3fb901fccbd65..21628ed9697c2 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -192,7 +192,7 @@ X86_FEATURE (CRC32, "crc32")
X86_FEATURE (INVPCID, "invpcid")
X86_FEATURE (RDPRU, "rdpru")
X86_FEATURE (LAHF_LM, "sahf")
-X86_FEATURE (VZEROUPPER, "vzeroupper")
+X86_FEATURE (LM, "lm")
X86_FEATURE_COMPAT(LWP, "lwp", 0)
X86_FEATURE_COMPAT(LZCNT, "lzcnt", 0)
X86_FEATURE_COMPAT(MOVBE, "movbe", 0)
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index a5d0fa953979a..07e85d3a8f4a8 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -508,6 +508,7 @@ constexpr FeatureBitset ImpliedFeaturesRDRND = {};
constexpr FeatureBitset ImpliedFeaturesRDSEED = {};
constexpr FeatureBitset ImpliedFeaturesRTM = {};
constexpr FeatureBitset ImpliedFeaturesLAHF_LM = {};
+constexpr FeatureBitset ImpliedFeaturesLM = {};
constexpr FeatureBitset ImpliedFeaturesSERIALIZE = {};
constexpr FeatureBitset ImpliedFeaturesSGX = {};
constexpr FeatureBitset ImpliedFeaturesSHSTK = {};
@@ -517,7 +518,6 @@ constexpr FeatureBitset ImpliedFeaturesUINTR = {};
constexpr FeatureBitset ImpliedFeaturesUSERMSR = {};
constexpr FeatureBitset ImpliedFeaturesWAITPKG = {};
constexpr FeatureBitset ImpliedFeaturesWBNOINVD = {};
-constexpr FeatureBitset ImpliedFeaturesVZEROUPPER = {};
constexpr FeatureBitset ImpliedFeaturesX87 = {};
constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
>From d05d91b6d737c6dd046ac93d8c49015f5412a47a Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Sat, 6 Jul 2024 02:29:17 +0000
Subject: [PATCH 3/3] [compiler-rt][X86] unify getAvailableFeatures
This patch unifies the implementations of getAvailableFeatures between
LLVM and compiler-rt.
This patch is intended to be a stepping stone towards a unified
implementation of several functions for compiler-rt and LLVM,
implemented in duplicated .inc files, with them being the same enforced
through tests.
---
compiler-rt/lib/builtins/cpu_model/x86.c | 20 ++-
llvm/lib/TargetParser/Host.cpp | 206 ++++++++++++++++++++---
2 files changed, 197 insertions(+), 29 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 5dbb1004d0fda..50b0f141cf4d3 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -144,8 +144,8 @@ enum ProcessorFeatures {
FEATURE_3DNOW,
// FEATURE_3DNOWA,
FEATURE_ADX = 40,
- // FEATURE_ABM,
- FEATURE_CLDEMOTE = 42,
+ FEATURE_64BIT,
+ FEATURE_CLDEMOTE,
FEATURE_CLFLUSHOPT,
FEATURE_CLWB,
FEATURE_CLZERO,
@@ -157,7 +157,7 @@ enum ProcessorFeatures {
FEATURE_ENQCMD = 48,
FEATURE_F16C,
FEATURE_FSGSBASE,
- // FEATURE_FXSAVE,
+ FEATURE_CRC32,
// FEATURE_HLE,
// FEATURE_IBT,
FEATURE_LAHF_LM = 54,
@@ -805,8 +805,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_CMPXCHG16B);
if ((ECX >> 19) & 1)
setFeature(FEATURE_SSE4_1);
- if ((ECX >> 20) & 1)
+ if ((ECX >> 20) & 1) {
setFeature(FEATURE_SSE4_2);
+ setFeature(FEATURE_CRC32);
+ }
if ((ECX >> 22) & 1)
setFeature(FEATURE_MOVBE);
if ((ECX >> 23) & 1)
@@ -859,8 +861,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_BMI2);
if (HasLeaf7 && ((EBX >> 11) & 1))
setFeature(FEATURE_RTM);
- if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
+ if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
setFeature(FEATURE_AVX512F);
+ setFeature(FEATURE_EVEX512);
+ }
if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512DQ);
if (HasLeaf7 && ((EBX >> 18) & 1))
@@ -869,6 +873,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_ADX);
if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512IFMA);
+ if (HasLeaf7 && ((EBX >> 23) & 1))
+ setFeature(FEATURE_CLFLUSHOPT);
if (HasLeaf7 && ((EBX >> 24) & 1))
setFeature(FEATURE_CLWB);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
@@ -1023,7 +1029,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_MWAITX);
if (((EDX >> 29) & 1))
- setFeature(FEATURE_LM);
+ setFeature(FEATURE_64BIT);
}
bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
@@ -1043,7 +1049,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
setFeature(FEATURE_WIDEKL);
- if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
+ if (hasFeature(FEATURE_64BIT) && hasFeature(FEATURE_SSE2)) {
setFeature(FEATURE_X86_64_BASELINE);
if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
hasFeature(FEATURE_LAHF_LM) && hasFeature(FEATURE_SSE4_2)) {
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 2ea56746aff24..0a1ebd9ec0921 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1225,11 +1225,10 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
unsigned *Features) {
- unsigned EAX, EBX;
+ unsigned EAX = 0, EBX = 0;
- auto setFeature = [&](unsigned F) {
- Features[F / 32] |= 1U << (F % 32);
- };
+#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
+#define setFeature(F) Features[F / 32] |= 1U << (F % 32)
if ((EDX >> 15) & 1)
setFeature(X86::FEATURE_CMOV);
@@ -1248,26 +1247,31 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_SSSE3);
if ((ECX >> 12) & 1)
setFeature(X86::FEATURE_FMA);
+ if ((ECX >> 13) & 1)
+ setFeature(X86::FEATURE_CMPXCHG16B);
if ((ECX >> 19) & 1)
setFeature(X86::FEATURE_SSE4_1);
if ((ECX >> 20) & 1) {
setFeature(X86::FEATURE_SSE4_2);
setFeature(X86::FEATURE_CRC32);
}
+ if ((ECX >> 22) & 1)
+ setFeature(X86::FEATURE_MOVBE);
if ((ECX >> 23) & 1)
setFeature(X86::FEATURE_POPCNT);
if ((ECX >> 25) & 1)
setFeature(X86::FEATURE_AES);
-
- if ((ECX >> 22) & 1)
- setFeature(X86::FEATURE_MOVBE);
+ if ((ECX >> 29) & 1)
+ setFeature(X86::FEATURE_F16C);
+ if ((ECX >> 30) & 1)
+ setFeature(X86::FEATURE_RDRND);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
// switch, then we have full AVX support.
const unsigned AVXBits = (1 << 27) | (1 << 28);
- bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
- ((EAX & 0x6) == 0x6);
+ bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+ ((EAX & 0x6) == 0x6);
#if defined(__APPLE__)
// Darwin lazily saves the AVX512 context on first use: trust that the OS will
// save the AVX512 context if we use AVX512 instructions, even the bit is not
@@ -1275,33 +1279,50 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasAVX512Save = true;
#else
// AVX512 requires additional context to be saved by the OS.
- bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+ bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
#endif
+ // AMX requires additional context to be saved by the OS.
+ const unsigned AMXBits = (1 << 17) | (1 << 18);
+ bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
+ bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
- if (HasAVX)
+ if (HasAVXSave)
setFeature(X86::FEATURE_AVX);
+ if (((ECX >> 26) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_XSAVE);
+
bool HasLeaf7 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && ((EBX >> 0) & 1))
+ setFeature(X86::FEATURE_FSGSBASE);
+ if (HasLeaf7 && ((EBX >> 2) & 1))
+ setFeature(X86::FEATURE_SGX);
if (HasLeaf7 && ((EBX >> 3) & 1))
setFeature(X86::FEATURE_BMI);
- if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
setFeature(X86::FEATURE_AVX2);
if (HasLeaf7 && ((EBX >> 8) & 1))
setFeature(X86::FEATURE_BMI2);
+ if (HasLeaf7 && ((EBX >> 11) & 1))
+ setFeature(X86::FEATURE_RTM);
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) {
setFeature(X86::FEATURE_AVX512F);
setFeature(X86::FEATURE_EVEX512);
}
if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512DQ);
+ if (HasLeaf7 && ((EBX >> 18) & 1))
+ setFeature(X86::FEATURE_RDSEED);
if (HasLeaf7 && ((EBX >> 19) & 1))
setFeature(X86::FEATURE_ADX);
if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512IFMA);
if (HasLeaf7 && ((EBX >> 23) & 1))
setFeature(X86::FEATURE_CLFLUSHOPT);
+ if (HasLeaf7 && ((EBX >> 24) & 1))
+ setFeature(X86::FEATURE_CLWB);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512CD);
if (HasLeaf7 && ((EBX >> 29) & 1))
@@ -1313,11 +1334,19 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512VBMI);
+ if (HasLeaf7 && ((ECX >> 4) & 1))
+ setFeature(X86::FEATURE_PKU);
+ if (HasLeaf7 && ((ECX >> 5) & 1))
+ setFeature(X86::FEATURE_WAITPKG);
if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512VBMI2);
+ if (HasLeaf7 && ((ECX >> 7) & 1))
+ setFeature(X86::FEATURE_SHSTK);
if (HasLeaf7 && ((ECX >> 8) & 1))
setFeature(X86::FEATURE_GFNI);
- if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
+ if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_VAES);
+ if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
setFeature(X86::FEATURE_VPCLMULQDQ);
if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512VNNI);
@@ -1325,36 +1354,169 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_AVX512BITALG);
if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512VPOPCNTDQ);
+ if (HasLeaf7 && ((ECX >> 22) & 1))
+ setFeature(X86::FEATURE_RDPID);
+ if (HasLeaf7 && ((ECX >> 23) & 1))
+ setFeature(X86::FEATURE_KL);
+ if (HasLeaf7 && ((ECX >> 25) & 1))
+ setFeature(X86::FEATURE_CLDEMOTE);
+ if (HasLeaf7 && ((ECX >> 27) & 1))
+ setFeature(X86::FEATURE_MOVDIRI);
+ if (HasLeaf7 && ((ECX >> 28) & 1))
+ setFeature(X86::FEATURE_MOVDIR64B);
+ if (HasLeaf7 && ((ECX >> 29) & 1))
+ setFeature(X86::FEATURE_ENQCMD);
if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 5) & 1))
+ setFeature(X86::FEATURE_UINTR);
if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512VP2INTERSECT);
+ if (HasLeaf7 && ((EDX >> 14) & 1))
+ setFeature(X86::FEATURE_SERIALIZE);
+ if (HasLeaf7 && ((EDX >> 16) & 1))
+ setFeature(X86::FEATURE_TSXLDTRK);
+ if (HasLeaf7 && ((EDX >> 18) & 1))
+ setFeature(X86::FEATURE_PCONFIG);
+ if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
+ setFeature(X86::FEATURE_AMX_BF16);
+ if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
+ setFeature(X86::FEATURE_AVX512FP16);
+ if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
+ setFeature(X86::FEATURE_AMX_TILE);
+ if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
+ setFeature(X86::FEATURE_AMX_INT8);
// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
// return all 0s for invalid subleaves so check the limit.
bool HasLeaf7Subleaf1 =
HasLeaf7 && EAX >= 1 &&
!getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
+ setFeature(X86::FEATURE_SHA512);
+ if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
+ setFeature(X86::FEATURE_SM3);
+ if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
+ setFeature(X86::FEATURE_SM4);
+ if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
+ setFeature(X86::FEATURE_RAOINT);
+ if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_AVXVNNI);
if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512BF16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
+ setFeature(X86::FEATURE_CMPCCXADD);
+ if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
+ setFeature(X86::FEATURE_AMX_FP16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
+ setFeature(X86::FEATURE_HRESET);
+ if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_AVXIFMA);
+
+ if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_AVXVNNIINT8);
+ if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_AVXNECONVERT);
+ if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
+ setFeature(X86::FEATURE_AMX_COMPLEX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_AVXVNNIINT16);
+ if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
+ setFeature(X86::FEATURE_PREFETCHI);
+ if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
+ setFeature(X86::FEATURE_USERMSR);
+ if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
+ setFeature(X86::FEATURE_AVX10_1_256);
+ if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
+ setFeature(X86::FEATURE_APXF);
+
+ unsigned MaxLevel;
+ getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
+ bool HasLeafD = MaxLevel >= 0xd &&
+ !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_XSAVEOPT);
+ if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_XSAVEC);
+ if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
+ setFeature(X86::FEATURE_XSAVES);
+
+ bool HasLeaf24 =
+ MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
+ setFeature(X86::FEATURE_AVX10_1_512);
unsigned MaxExtLevel;
getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 &&
!getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- if (HasExtLeaf1 && ((ECX >> 6) & 1))
- setFeature(X86::FEATURE_SSE4_A);
- if (HasExtLeaf1 && ((ECX >> 11) & 1))
- setFeature(X86::FEATURE_XOP);
- if (HasExtLeaf1 && ((ECX >> 16) & 1))
- setFeature(X86::FEATURE_FMA4);
-
- if (HasExtLeaf1 && ((EDX >> 29) & 1))
- setFeature(X86::FEATURE_64BIT);
+ if (HasExtLeaf1) {
+ if (ECX & 1)
+ setFeature(X86::FEATURE_LAHF_LM);
+ if ((ECX >> 5) & 1)
+ setFeature(X86::FEATURE_LZCNT);
+ if (((ECX >> 6) & 1))
+ setFeature(X86::FEATURE_SSE4_A);
+ if (((ECX >> 8) & 1))
+ setFeature(X86::FEATURE_PRFCHW);
+ if (((ECX >> 11) & 1))
+ setFeature(X86::FEATURE_XOP);
+ if (((ECX >> 15) & 1))
+ setFeature(X86::FEATURE_LWP);
+ if (((ECX >> 16) & 1))
+ setFeature(X86::FEATURE_FMA4);
+ if (((ECX >> 21) & 1))
+ setFeature(X86::FEATURE_TBM);
+ if (((ECX >> 29) & 1))
+ setFeature(X86::FEATURE_MWAITX);
+
+ if (((EDX >> 29) & 1))
+ setFeature(X86::FEATURE_64BIT);
+ }
+
+ bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
+ !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf8 && ((EBX >> 0) & 1))
+ setFeature(X86::FEATURE_CLZERO);
+ if (HasExtLeaf8 && ((EBX >> 9) & 1))
+ setFeature(X86::FEATURE_WBNOINVD);
+
+ bool HasLeaf14 = MaxLevel >= 0x14 &&
+ !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf14 && ((EBX >> 4) & 1))
+ setFeature(X86::FEATURE_PTWRITE);
+
+ bool HasLeaf19 =
+ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
+ setFeature(X86::FEATURE_WIDEKL);
+
+ if (hasFeature(X86::FEATURE_64BIT) && hasFeature(X86::FEATURE_SSE2)) {
+ setFeature(X86::FEATURE_X86_64_BASELINE);
+ if (hasFeature(X86::FEATURE_CMPXCHG16B) &&
+ hasFeature(X86::FEATURE_POPCNT) && hasFeature(X86::FEATURE_LAHF_LM) &&
+ hasFeature(X86::FEATURE_SSE4_2)) {
+ setFeature(X86::FEATURE_X86_64_V2);
+ if (hasFeature(X86::FEATURE_AVX2) && hasFeature(X86::FEATURE_BMI) &&
+ hasFeature(X86::FEATURE_BMI2) && hasFeature(X86::FEATURE_F16C) &&
+ hasFeature(X86::FEATURE_FMA) && hasFeature(X86::FEATURE_LZCNT) &&
+ hasFeature(X86::FEATURE_MOVBE)) {
+ setFeature(X86::FEATURE_X86_64_V3);
+ if (hasFeature(X86::FEATURE_AVX512BW) &&
+ hasFeature(X86::FEATURE_AVX512CD) &&
+ hasFeature(X86::FEATURE_AVX512DQ) &&
+ hasFeature(X86::FEATURE_AVX512VL))
+ setFeature(X86::FEATURE_X86_64_V4);
+ }
+ }
+ }
+
+#undef hasFeature
+#undef setFeature
}
StringRef sys::getHostCPUName() {
More information about the llvm-commits
mailing list