[compiler-rt] [llvm] [clang] [X86] Support more ISAs to enable __builtin_cpu_supports (PR #79086)
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 30 01:29:11 PST 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/79086
>From b5c8579c5c8e7ea1e8436348bbf60ecee9c3c799 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 19 Jan 2024 09:22:27 +0800
Subject: [PATCH 1/3] [X86] Support more ISAs to enable __builtin_cpu_supports
This patch will also expand supports for attribute/target, while
the priority of newly supported ISAs will be set to zero.
---
clang/test/CodeGen/target-builtin-noerror.c | 59 +++++
compiler-rt/lib/builtins/cpu_model/x86.c | 209 +++++++++++++++++-
.../llvm/TargetParser/X86TargetParser.def | 135 ++++++-----
3 files changed, 324 insertions(+), 79 deletions(-)
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 06bb40a2b71ea..16097fa08cb9c 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -84,6 +84,65 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx512vp2intersect");
(void)__builtin_cpu_supports("f16c");
(void)__builtin_cpu_supports("avx512fp16");
+ (void)__builtin_cpu_supports("3dnow");
+ (void)__builtin_cpu_supports("adx");
+ (void)__builtin_cpu_supports("cldemote");
+ (void)__builtin_cpu_supports("clflushopt");
+ (void)__builtin_cpu_supports("clwb");
+ (void)__builtin_cpu_supports("clzero");
+ (void)__builtin_cpu_supports("cx16");
+ (void)__builtin_cpu_supports("cx8");
+ (void)__builtin_cpu_supports("enqcmd");
+ (void)__builtin_cpu_supports("fsgsbase");
+ (void)__builtin_cpu_supports("lwp");
+ (void)__builtin_cpu_supports("lzcnt");
+ (void)__builtin_cpu_supports("movbe");
+ (void)__builtin_cpu_supports("movdir64b");
+ (void)__builtin_cpu_supports("movdiri");
+ (void)__builtin_cpu_supports("mwaitx");
+ (void)__builtin_cpu_supports("pconfig");
+ (void)__builtin_cpu_supports("pku");
+ (void)__builtin_cpu_supports("prefetchwt1");
+ (void)__builtin_cpu_supports("prfchw");
+ (void)__builtin_cpu_supports("ptwrite");
+ (void)__builtin_cpu_supports("rdpid");
+ (void)__builtin_cpu_supports("rdrnd");
+ (void)__builtin_cpu_supports("rdseed");
+ (void)__builtin_cpu_supports("rtm");
+ (void)__builtin_cpu_supports("serialize");
+ (void)__builtin_cpu_supports("sgx");
+ (void)__builtin_cpu_supports("sha");
+ (void)__builtin_cpu_supports("shstk");
+ (void)__builtin_cpu_supports("tbm");
+ (void)__builtin_cpu_supports("tsxldtrk");
+ (void)__builtin_cpu_supports("vaes");
+ (void)__builtin_cpu_supports("waitpkg");
+ (void)__builtin_cpu_supports("wbnoinvd");
+ (void)__builtin_cpu_supports("xsave");
+ (void)__builtin_cpu_supports("xsavec");
+ (void)__builtin_cpu_supports("xsaveopt");
+ (void)__builtin_cpu_supports("xsaves");
+ (void)__builtin_cpu_supports("amx-tile");
+ (void)__builtin_cpu_supports("amx-int8");
+ (void)__builtin_cpu_supports("amx-bf16");
+ (void)__builtin_cpu_supports("uintr");
+ (void)__builtin_cpu_supports("hreset");
+ (void)__builtin_cpu_supports("kl");
+ (void)__builtin_cpu_supports("widekl");
+ (void)__builtin_cpu_supports("avxvnni");
+ (void)__builtin_cpu_supports("avxifma");
+ (void)__builtin_cpu_supports("avxvnniint8");
+ (void)__builtin_cpu_supports("avxneconvert");
+ (void)__builtin_cpu_supports("cmpccxadd");
+ (void)__builtin_cpu_supports("amx-fp16");
+ (void)__builtin_cpu_supports("prefetchi");
+ (void)__builtin_cpu_supports("raoint");
+ (void)__builtin_cpu_supports("amx-complex");
+ (void)__builtin_cpu_supports("avxvnniint16");
+ (void)__builtin_cpu_supports("sm3");
+ (void)__builtin_cpu_supports("sha512");
+ (void)__builtin_cpu_supports("sm4");
+ (void)__builtin_cpu_supports("usermsr");
}
void verifycpustrings(void) {
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 0750e29f989a8..c499754890343 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -139,20 +139,77 @@ enum ProcessorFeatures {
FEATURE_AVX512BITALG,
FEATURE_AVX512BF16,
FEATURE_AVX512VP2INTERSECT,
+ FEATURE_3DNOW,
+ FEATURE_ADX = 40,
+ FEATURE_CLDEMOTE = 42,
+ FEATURE_CLFLUSHOPT,
+ FEATURE_CLWB,
+ FEATURE_CLZERO,
+ FEATURE_CMPXCHG16B,
+ FEATURE_CMPXCHG8B,
+ FEATURE_ENQCMD,
+ FEATURE_F16C,
+ FEATURE_FSGSBASE,
- FEATURE_CMPXCHG16B = 46,
- FEATURE_F16C = 49,
FEATURE_LAHF_LM = 54,
FEATURE_LM,
- FEATURE_WP,
+ FEATURE_LWP,
FEATURE_LZCNT,
FEATURE_MOVBE,
-
- FEATURE_AVX512FP16 = 94,
+ FEATURE_MOVDIR64B,
+ FEATURE_MOVDIRI,
+ FEATURE_MWAITX,
+
+ FEATURE_PCONFIG = 63,
+ FEATURE_PKU,
+ FEATURE_PREFETCHWT1,
+ FEATURE_PRFCHW,
+ FEATURE_PTWRITE,
+ FEATURE_RDPID,
+ FEATURE_RDRND,
+ FEATURE_RDSEED,
+ FEATURE_RTM,
+ FEATURE_SERIALIZE,
+ FEATURE_SGX,
+ FEATURE_SHA,
+ FEATURE_SHSTK,
+ FEATURE_TBM,
+ FEATURE_TSXLDTRK,
+ FEATURE_VAES,
+ FEATURE_WAITPKG,
+ FEATURE_WBNOINVD,
+ FEATURE_XSAVE,
+ FEATURE_XSAVEC,
+ FEATURE_XSAVEOPT,
+ FEATURE_XSAVES,
+ FEATURE_AMX_TILE,
+ FEATURE_AMX_INT8,
+ FEATURE_AMX_BF16,
+ FEATURE_UINTR,
+ FEATURE_HRESET,
+ FEATURE_KL,
+
+ FEATURE_WIDEKL = 92,
+ FEATURE_AVXVNNI,
+ FEATURE_AVX512FP16,
FEATURE_X86_64_BASELINE,
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
+ FEATURE_AVXIFMA,
+ FEATURE_AVXVNNIINT8,
+ FEATURE_AVXNECONVERT,
+ FEATURE_CMPCCXADD,
+ FEATURE_AMX_FP16,
+ FEATURE_PREFETCHI,
+ FEATURE_RAOINT,
+ FEATURE_AMX_COMPLEX,
+ FEATURE_AVXVNNIINT16,
+ FEATURE_SM3,
+ FEATURE_SHA512,
+ FEATURE_SM4,
+
+ FEATURE_USERMSR = 112,
CPU_FEATURE_MAX
};
@@ -715,6 +772,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
#define setFeature(F) Features[F / 32] |= 1U << (F % 32)
+ if ((EDX >> 8) & 1)
+ setFeature(FEATURE_CMPXCHG8B);
if ((EDX >> 15) & 1)
setFeature(FEATURE_CMOV);
if ((EDX >> 23) & 1)
@@ -746,13 +805,15 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AES);
if ((ECX >> 29) & 1)
setFeature(FEATURE_F16C);
+ if ((ECX >> 30) & 1)
+ setFeature(FEATURE_RDRND);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
// switch, then we have full AVX support.
const unsigned AVXBits = (1 << 27) | (1 << 28);
- bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
- ((EAX & 0x6) == 0x6);
+ bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+ ((EAX & 0x6) == 0x6);
#if defined(__APPLE__)
// Darwin lazily saves the AVX512 context on first use: trust that the OS will
// save the AVX512 context if we use AVX512 instructions, even the bit is not
@@ -760,45 +821,76 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasAVX512Save = true;
#else
// AVX512 requires additional context to be saved by the OS.
- bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+ bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
#endif
+ // AMX requires additional context to be saved by the OS.
+ const unsigned AMXBits = (1 << 17) | (1 << 18);
+ bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
+ bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
- if (HasAVX)
+ if (HasAVXSave)
setFeature(FEATURE_AVX);
+ if (((ECX >> 26) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVE);
+
bool HasLeaf7 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && ((EBX >> 0) & 1))
+ setFeature(FEATURE_FSGSBASE);
+ if (HasLeaf7 && ((EBX >> 2) & 1))
+ setFeature(FEATURE_SGX);
if (HasLeaf7 && ((EBX >> 3) & 1))
setFeature(FEATURE_BMI);
- if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
setFeature(FEATURE_AVX2);
if (HasLeaf7 && ((EBX >> 8) & 1))
setFeature(FEATURE_BMI2);
+ if (HasLeaf7 && ((EBX >> 11) & 1))
+ setFeature(FEATURE_RTM);
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512F);
if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512DQ);
+ if (HasLeaf7 && ((EBX >> 18) & 1))
+ setFeature(FEATURE_RDSEED);
+ if (HasLeaf7 && ((EBX >> 19) & 1))
+ setFeature(FEATURE_ADX);
if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512IFMA);
+ if (HasLeaf7 && ((EBX >> 24) & 1))
+ setFeature(FEATURE_CLWB);
if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512PF);
if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512ER);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512CD);
+ if (HasLeaf7 && ((EBX >> 29) & 1))
+ setFeature(FEATURE_SHA);
if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512BW);
if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VL);
+ if (HasLeaf7 && ((ECX >> 0) & 1))
+ setFeature(FEATURE_PREFETCHWT1);
if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VBMI);
+ if (HasLeaf7 && ((ECX >> 4) & 1))
+ setFeature(FEATURE_PKU);
+ if (HasLeaf7 && ((ECX >> 5) & 1))
+ setFeature(FEATURE_WAITPKG);
if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VBMI2);
+ if (HasLeaf7 && ((ECX >> 7) & 1))
+ setFeature(FEATURE_SHSTK);
if (HasLeaf7 && ((ECX >> 8) & 1))
setFeature(FEATURE_GFNI);
- if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
+ if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
+ setFeature(FEATURE_VAES);
+ if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
setFeature(FEATURE_VPCLMULQDQ);
if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VNNI);
@@ -806,23 +898,92 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AVX512BITALG);
if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VPOPCNTDQ);
+ if (HasLeaf7 && ((ECX >> 22) & 1))
+ setFeature(FEATURE_RDPID);
+ if (HasLeaf7 && ((ECX >> 23) & 1))
+ setFeature(FEATURE_KL);
+ if (HasLeaf7 && ((ECX >> 25) & 1))
+ setFeature(FEATURE_CLDEMOTE);
+ if (HasLeaf7 && ((ECX >> 27) & 1))
+ setFeature(FEATURE_MOVDIRI);
+ if (HasLeaf7 && ((ECX >> 28) & 1))
+ setFeature(FEATURE_MOVDIR64B);
+ if (HasLeaf7 && ((ECX >> 29) & 1))
+ setFeature(FEATURE_ENQCMD);
if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 5) & 1))
+ setFeature(FEATURE_UINTR);
if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VP2INTERSECT);
+ if (HasLeaf7 && ((EDX >> 14) & 1))
+ setFeature(FEATURE_SERIALIZE);
+ if (HasLeaf7 && ((EDX >> 16) & 1))
+ setFeature(FEATURE_TSXLDTRK);
+ if (HasLeaf7 && ((EDX >> 18) & 1))
+ setFeature(FEATURE_PCONFIG);
+ if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_BF16);
if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512FP16);
+ if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_TILE);
+ if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_INT8);
// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
// return all 0s for invalid subleaves so check the limit.
bool HasLeaf7Subleaf1 =
HasLeaf7 && EAX >= 1 &&
!getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
+ setFeature(FEATURE_SHA512);
+ if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
+ setFeature(FEATURE_SM3);
+ if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
+ setFeature(FEATURE_SM4);
+ if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
+ setFeature(FEATURE_RAOINT);
+ if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNI);
if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512BF16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
+ setFeature(FEATURE_CMPCCXADD);
+ if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_FP16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
+ setFeature(FEATURE_HRESET);
+ if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXIFMA);
+
+ if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNIINT8);
+ if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXNECONVERT);
+ if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_COMPLEX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNIINT16);
+ if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
+ setFeature(FEATURE_PREFETCHI);
+ if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
+ setFeature(FEATURE_USERMSR);
+
+ unsigned MaxLevel;
+ if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
+ return false;
+ bool HasLeafD = MaxLevel >= 0xd &&
+ !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVEOPT);
+ if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVEC);
+ if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVES);
unsigned MaxExtLevel;
getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -836,14 +997,40 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_LZCNT);
if (((ECX >> 6) & 1))
setFeature(FEATURE_SSE4_A);
+ if (((ECX >> 8) & 1))
+ setFeature(FEATURE_PRFCHW);
if (((ECX >> 11) & 1))
setFeature(FEATURE_XOP);
+ if (((ECX >> 15) & 1))
+ setFeature(FEATURE_LWP);
if (((ECX >> 16) & 1))
setFeature(FEATURE_FMA4);
+ if (((ECX >> 21) & 1))
+ setFeature(FEATURE_TBM);
+ if (((ECX >> 29) & 1))
+ setFeature(FEATURE_MWAITX);
+
if (((EDX >> 29) & 1))
setFeature(FEATURE_LM);
}
+ bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
+ !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf8 && ((EBX >> 0) & 1))
+ setFeature(FEATURE_CLZERO);
+ if (HasExtLeaf8 && ((EBX >> 9) & 1))
+ setFeature(FEATURE_WBNOINVD);
+
+ bool HasLeaf14 = MaxLevel >= 0x14 &&
+ !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf14 && ((EBX >> 4) & 1))
+ setFeature(FEATURE_PTWRITE);
+
+ bool HasLeaf19 =
+ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
+ setFeature(FEATURE_WIDEKL);
+
if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
setFeature(FEATURE_X86_64_BASELINE);
if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 43162f2b52eba..7f230920351e3 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -173,85 +173,84 @@ X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni", 34)
X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg", 35)
X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 36)
X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 37)
-// Features below here are not in libgcc/compiler-rt.
-X86_FEATURE (3DNOW, "3dnow")
+X86_FEATURE_COMPAT(3DNOW, "3dnow", 0)
X86_FEATURE (3DNOWA, "3dnowa")
+X86_FEATURE_COMPAT(ADX, "adx", 0)
X86_FEATURE (64BIT, "64bit")
-X86_FEATURE (ADX, "adx")
-X86_FEATURE (AMX_BF16, "amx-bf16")
-X86_FEATURE (AMX_COMPLEX, "amx-complex")
-X86_FEATURE (AMX_INT8, "amx-int8")
-X86_FEATURE (AMX_TILE, "amx-tile")
-X86_FEATURE (CLDEMOTE, "cldemote")
-X86_FEATURE (CLFLUSHOPT, "clflushopt")
-X86_FEATURE (CLWB, "clwb")
-X86_FEATURE_COMPAT(F16C, "f16c", 38)
-X86_FEATURE (CLZERO, "clzero")
-X86_FEATURE (CMPXCHG16B, "cx16")
-X86_FEATURE (CMPXCHG8B, "cx8")
+X86_FEATURE_COMPAT(CLDEMOTE, "cldemote", 0)
+X86_FEATURE_COMPAT(CLFLUSHOPT, "clflushopt", 0)
+X86_FEATURE_COMPAT(CLWB, "clwb", 0)
+X86_FEATURE_COMPAT(CLZERO, "clzero", 0)
+X86_FEATURE_COMPAT(CMPXCHG16B, "cx16", 0)
+X86_FEATURE_COMPAT(CMPXCHG8B, "cx8", 0)
+X86_FEATURE_COMPAT(ENQCMD, "enqcmd", 0)
+X86_FEATURE_COMPAT(F16C, "f16c", 0)
+X86_FEATURE_COMPAT(FSGSBASE, "fsgsbase", 0)
X86_FEATURE (CRC32, "crc32")
-X86_FEATURE (ENQCMD, "enqcmd")
-X86_FEATURE (FSGSBASE, "fsgsbase")
-X86_FEATURE (FXSR, "fxsr")
X86_FEATURE (INVPCID, "invpcid")
-X86_FEATURE (KL, "kl")
-X86_FEATURE (WIDEKL, "widekl")
-X86_FEATURE (LWP, "lwp")
-X86_FEATURE (LZCNT, "lzcnt")
-X86_FEATURE (MOVBE, "movbe")
-X86_FEATURE (MOVDIR64B, "movdir64b")
-X86_FEATURE (MOVDIRI, "movdiri")
-X86_FEATURE (MWAITX, "mwaitx")
-X86_FEATURE (PCONFIG, "pconfig")
-X86_FEATURE (PKU, "pku")
-X86_FEATURE (PREFETCHI, "prefetchi")
-X86_FEATURE (PREFETCHWT1, "prefetchwt1")
-X86_FEATURE (PRFCHW, "prfchw")
-X86_FEATURE (PTWRITE, "ptwrite")
-X86_FEATURE (RDPID, "rdpid")
X86_FEATURE (RDPRU, "rdpru")
-X86_FEATURE (RDRND, "rdrnd")
-X86_FEATURE (RDSEED, "rdseed")
-X86_FEATURE (RTM, "rtm")
X86_FEATURE (SAHF, "sahf")
-X86_FEATURE (SERIALIZE, "serialize")
-X86_FEATURE (SGX, "sgx")
-X86_FEATURE (SHA, "sha")
-X86_FEATURE (SHSTK, "shstk")
-X86_FEATURE (TBM, "tbm")
-X86_FEATURE (TSXLDTRK, "tsxldtrk")
-X86_FEATURE (UINTR, "uintr")
-X86_FEATURE (VAES, "vaes")
X86_FEATURE (VZEROUPPER, "vzeroupper")
-X86_FEATURE (WAITPKG, "waitpkg")
-X86_FEATURE (WBNOINVD, "wbnoinvd")
+X86_FEATURE_COMPAT(LWP, "lwp", 0)
+X86_FEATURE_COMPAT(LZCNT, "lzcnt", 0)
+X86_FEATURE_COMPAT(MOVBE, "movbe", 0)
+X86_FEATURE_COMPAT(MOVDIR64B, "movdir64b", 0)
+X86_FEATURE_COMPAT(MOVDIRI, "movdiri", 0)
+X86_FEATURE_COMPAT(MWAITX, "mwaitx", 0)
X86_FEATURE (X87, "x87")
-X86_FEATURE (XSAVE, "xsave")
-X86_FEATURE (XSAVEC, "xsavec")
-X86_FEATURE (XSAVEOPT, "xsaveopt")
-X86_FEATURE (XSAVES, "xsaves")
-X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 39)
-X86_FEATURE (HRESET, "hreset")
-X86_FEATURE (RAOINT, "raoint")
-X86_FEATURE (AMX_FP16, "amx-fp16")
-X86_FEATURE (CMPCCXADD, "cmpccxadd")
-X86_FEATURE (AVXNECONVERT, "avxneconvert")
-X86_FEATURE (AVXVNNI, "avxvnni")
-X86_FEATURE (AVXIFMA, "avxifma")
-X86_FEATURE (AVXVNNIINT8, "avxvnniint8")
-X86_FEATURE (SHA512, "sha512")
-X86_FEATURE (SM3, "sm3")
-X86_FEATURE (SM4, "sm4")
-X86_FEATURE (AVXVNNIINT16, "avxvnniint16")
-X86_FEATURE (EVEX512, "evex512")
-X86_FEATURE (AVX10_1, "avx10.1-256")
-X86_FEATURE (AVX10_1_512, "avx10.1-512")
-X86_FEATURE (USERMSR, "usermsr")
-X86_FEATURE (EGPR, "egpr")
+X86_FEATURE_COMPAT(PCONFIG, "pconfig", 0)
+X86_FEATURE_COMPAT(PKU, "pku", 0)
+X86_FEATURE_COMPAT(PREFETCHWT1, "prefetchwt1", 0)
+X86_FEATURE_COMPAT(PRFCHW, "prfchw", 0)
+X86_FEATURE_COMPAT(PTWRITE, "ptwrite", 0)
+X86_FEATURE_COMPAT(RDPID, "rdpid", 0)
+X86_FEATURE_COMPAT(RDRND, "rdrnd", 0)
+X86_FEATURE_COMPAT(RDSEED, "rdseed", 0)
+X86_FEATURE_COMPAT(RTM, "rtm", 0)
+X86_FEATURE_COMPAT(SERIALIZE, "serialize", 0)
+X86_FEATURE_COMPAT(SGX, "sgx", 0)
+X86_FEATURE_COMPAT(SHA, "sha", 0)
+X86_FEATURE_COMPAT(SHSTK, "shstk", 0)
+X86_FEATURE_COMPAT(TBM, "tbm", 0)
+X86_FEATURE_COMPAT(TSXLDTRK, "tsxldtrk", 0)
+X86_FEATURE_COMPAT(VAES, "vaes", 0)
+X86_FEATURE_COMPAT(WAITPKG, "waitpkg", 0)
+X86_FEATURE_COMPAT(WBNOINVD, "wbnoinvd", 0)
+X86_FEATURE_COMPAT(XSAVE, "xsave", 0)
+X86_FEATURE_COMPAT(XSAVEC, "xsavec", 0)
+X86_FEATURE_COMPAT(XSAVEOPT, "xsaveopt", 0)
+X86_FEATURE_COMPAT(XSAVES, "xsaves", 0)
+X86_FEATURE_COMPAT(AMX_TILE, "amx-tile", 0)
+X86_FEATURE_COMPAT(AMX_INT8, "amx-int8", 0)
+X86_FEATURE_COMPAT(AMX_BF16, "amx-bf16", 0)
+X86_FEATURE_COMPAT(UINTR, "uintr", 0)
+X86_FEATURE_COMPAT(HRESET, "hreset", 0)
+X86_FEATURE_COMPAT(KL, "kl", 0)
+X86_FEATURE (FXSR, "fxsr")
+X86_FEATURE_COMPAT(WIDEKL, "widekl", 0)
+X86_FEATURE_COMPAT(AVXVNNI, "avxvnni", 0)
+X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 0)
+X86_FEATURE (CCMP, "ccmp")
X86_FEATURE (Push2Pop2, "push2pop2")
X86_FEATURE (PPX, "ppx")
X86_FEATURE (NDD, "ndd")
-X86_FEATURE (CCMP, "ccmp")
+X86_FEATURE_COMPAT(AVXIFMA, "avxifma", 0)
+X86_FEATURE_COMPAT(AVXVNNIINT8, "avxvnniint8", 0)
+X86_FEATURE_COMPAT(AVXNECONVERT, "avxneconvert", 0)
+X86_FEATURE_COMPAT(CMPCCXADD, "cmpccxadd", 0)
+X86_FEATURE_COMPAT(AMX_FP16, "amx-fp16", 0)
+X86_FEATURE_COMPAT(PREFETCHI, "prefetchi", 0)
+X86_FEATURE_COMPAT(RAOINT, "raoint", 0)
+X86_FEATURE_COMPAT(AMX_COMPLEX, "amx-complex", 0)
+X86_FEATURE_COMPAT(AVXVNNIINT16, "avxvnniint16", 0)
+X86_FEATURE_COMPAT(SM3, "sm3", 0)
+X86_FEATURE_COMPAT(SHA512, "sha512", 0)
+X86_FEATURE_COMPAT(SM4, "sm4", 0)
+X86_FEATURE (EGPR, "egpr")
+X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
+X86_FEATURE (EVEX512, "evex512")
+X86_FEATURE (AVX10_1, "avx10.1-256")
+X86_FEATURE (AVX10_1_512, "avx10.1-512")
X86_FEATURE (CF, "cf")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
>From 61155068d63ab3bbdd610515927090dd4f4c38b1 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 23 Jan 2024 13:39:33 +0800
Subject: [PATCH 2/3] clang-format and other refine
---
compiler-rt/lib/builtins/cpu_model/x86.c | 13 ++++++-------
llvm/lib/TargetParser/X86TargetParser.cpp | 7 +++++--
2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index c499754890343..66ec58a5d8a6c 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -772,7 +772,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
#define hasFeature(F) ((Features[F / 32] >> (F % 32)) & 1)
#define setFeature(F) Features[F / 32] |= 1U << (F % 32)
- if ((EDX >> 8) & 1)
+ if ((EDX >> 8) & 1)
setFeature(FEATURE_CMPXCHG8B);
if ((EDX >> 15) & 1)
setFeature(FEATURE_CMOV);
@@ -974,8 +974,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_USERMSR);
unsigned MaxLevel;
- if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1)
- return false;
+ getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
@@ -997,7 +996,7 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_LZCNT);
if (((ECX >> 6) & 1))
setFeature(FEATURE_SSE4_A);
- if (((ECX >> 8) & 1))
+ if (((ECX >> 8) & 1))
setFeature(FEATURE_PRFCHW);
if (((ECX >> 11) & 1))
setFeature(FEATURE_XOP);
@@ -1022,14 +1021,14 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_WBNOINVD);
bool HasLeaf14 = MaxLevel >= 0x14 &&
- !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
+ !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
if (HasLeaf14 && ((EBX >> 4) & 1))
- setFeature(FEATURE_PTWRITE);
+ setFeature(FEATURE_PTWRITE);
bool HasLeaf19 =
MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
- setFeature(FEATURE_WIDEKL);
+ setFeature(FEATURE_WIDEKL);
if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
setFeature(FEATURE_X86_64_BASELINE);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 518fb9d892164..67d8f291824b1 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -750,13 +750,16 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
#ifndef NDEBUG
// Check that priorities are set properly in the .def file. We expect that
// "compat" features are assigned non-duplicate consecutive priorities
- // starting from zero (0, 1, ..., num_features - 1).
+ // starting from one (1, ..., 37) and multiple zeros.
#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY,
unsigned Priorities[] = {
#include "llvm/TargetParser/X86TargetParser.def"
};
std::array<unsigned, std::size(Priorities)> HelperList;
- std::iota(HelperList.begin(), HelperList.end(), 0);
+ const size_t MaxPriority = 37;
+ std::iota(HelperList.begin(), HelperList.begin() + MaxPriority + 1, 0);
+ for (int i = MaxPriority + 1; i != std::size(Priorities); ++i)
+ HelperList[i] = 0;
assert(std::is_permutation(HelperList.begin(), HelperList.end(),
std::begin(Priorities), std::end(Priorities)) &&
"Priorities don't form consecutive range!");
>From 5922fc9ec4064509200d3dd3e14493e94b400eb1 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 30 Jan 2024 17:23:09 +0800
Subject: [PATCH 3/3] Fix lit fail of attr-cpuspecific.c
By removing the support of cx8 first, the current cpu_specific logic
requires the "generic" cpu to be with no CMPXCHG8B.
---
clang/test/CodeGen/target-builtin-noerror.c | 1 -
compiler-rt/lib/builtins/cpu_model/x86.c | 4 ++--
llvm/include/llvm/TargetParser/X86TargetParser.def | 2 +-
llvm/lib/TargetParser/X86TargetParser.cpp | 2 +-
4 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 16097fa08cb9c..0dbd5ad9ae774 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -91,7 +91,6 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("clwb");
(void)__builtin_cpu_supports("clzero");
(void)__builtin_cpu_supports("cx16");
- (void)__builtin_cpu_supports("cx8");
(void)__builtin_cpu_supports("enqcmd");
(void)__builtin_cpu_supports("fsgsbase");
(void)__builtin_cpu_supports("lwp");
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 66ec58a5d8a6c..116319f4e4dfd 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -146,8 +146,8 @@ enum ProcessorFeatures {
FEATURE_CLWB,
FEATURE_CLZERO,
FEATURE_CMPXCHG16B,
- FEATURE_CMPXCHG8B,
- FEATURE_ENQCMD,
+
+ FEATURE_ENQCMD = 48,
FEATURE_F16C,
FEATURE_FSGSBASE,
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 7f230920351e3..59521c3c62d6d 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -182,7 +182,7 @@ X86_FEATURE_COMPAT(CLFLUSHOPT, "clflushopt", 0)
X86_FEATURE_COMPAT(CLWB, "clwb", 0)
X86_FEATURE_COMPAT(CLZERO, "clzero", 0)
X86_FEATURE_COMPAT(CMPXCHG16B, "cx16", 0)
-X86_FEATURE_COMPAT(CMPXCHG8B, "cx8", 0)
+X86_FEATURE (CMPXCHG8B, "cx8")
X86_FEATURE_COMPAT(ENQCMD, "enqcmd", 0)
X86_FEATURE_COMPAT(F16C, "f16c", 0)
X86_FEATURE_COMPAT(FSGSBASE, "fsgsbase", 0)
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 67d8f291824b1..21f46f576490a 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -758,7 +758,7 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
std::array<unsigned, std::size(Priorities)> HelperList;
const size_t MaxPriority = 37;
std::iota(HelperList.begin(), HelperList.begin() + MaxPriority + 1, 0);
- for (int i = MaxPriority + 1; i != std::size(Priorities); ++i)
+ for (size_t i = MaxPriority + 1; i != std::size(Priorities); ++i)
HelperList[i] = 0;
assert(std::is_permutation(HelperList.begin(), HelperList.end(),
std::begin(Priorities), std::end(Priorities)) &&
More information about the cfe-commits
mailing list