[clang] f3a4de3 - [X86] Support "f16c" and "avx512fp16" for __builtin_cpu_supports (#78384)

via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 17 17:22:09 PST 2024


Author: Freddy Ye
Date: 2024-01-18T09:22:04+08:00
New Revision: f3a4de395c167aeb8207294222c6ff5719ef6f62

URL: https://github.com/llvm/llvm-project/commit/f3a4de395c167aeb8207294222c6ff5719ef6f62
DIFF: https://github.com/llvm/llvm-project/commit/f3a4de395c167aeb8207294222c6ff5719ef6f62.diff

LOG: [X86] Support "f16c" and "avx512fp16" for __builtin_cpu_supports (#78384)

This resolves issue #65320.
This also supports clarify sapphirerapids and cooperlake for
cpu_specific/dispatch.

Added: 
    

Modified: 
    clang/test/CodeGen/target-builtin-noerror.c
    clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
    compiler-rt/lib/builtins/cpu_model/x86.c
    llvm/include/llvm/TargetParser/X86TargetParser.def
    llvm/lib/TargetParser/X86TargetParser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 505f4a3e94565d4..06bb40a2b71ea9a 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -82,6 +82,8 @@ void verifyfeaturestrings(void) {
   (void)__builtin_cpu_supports("avx512bitalg");
   (void)__builtin_cpu_supports("avx512bf16");
   (void)__builtin_cpu_supports("avx512vp2intersect");
+  (void)__builtin_cpu_supports("f16c");
+  (void)__builtin_cpu_supports("avx512fp16");
 }
 
 void verifycpustrings(void) {

diff  --git a/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp b/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
index ef2498bd7e14cec..183eb4fb6ac61e0 100644
--- a/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
+++ b/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
@@ -80,8 +80,8 @@ OutOfLineDefs::foo(int, int, int) {
 // LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.S
 // LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.R
 // LINUX: define weak_odr ptr @_ZN13OutOfLineDefs3fooEiii.resolver()
-// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R
 // LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.S
+// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R
 // LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.O
 // LINUX: call void @llvm.trap
 // LINUX: define linkonce_odr noundef i32 @_ZN13OutOfLineDefs3fooEiii.O
@@ -89,8 +89,8 @@ OutOfLineDefs::foo(int, int, int) {
 // WINDOWS: define dso_local noundef i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.S"
 // WINDOWS: define dso_local noundef i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.R"
 // WINDOWS: define weak_odr dso_local i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z"(ptr %0, i32 %1, i32 %2, i32 %3)
-// WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.R"(ptr %0, i32 %1, i32 %2, i32 %3)
 // WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.S"(ptr %0, i32 %1, i32 %2, i32 %3)
+// WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.R"(ptr %0, i32 %1, i32 %2, i32 %3)
 // WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.O"(ptr %0, i32 %1, i32 %2, i32 %3)
 // WINDOWS: call void @llvm.trap
 // WINDOWS: define linkonce_odr dso_local noundef i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.O"

diff  --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 9d9a5d3f1542cde..0750e29f989a8d9 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -148,7 +148,8 @@ enum ProcessorFeatures {
   FEATURE_LZCNT,
   FEATURE_MOVBE,
 
-  FEATURE_X86_64_BASELINE = 95,
+  FEATURE_AVX512FP16 = 94,
+  FEATURE_X86_64_BASELINE,
   FEATURE_X86_64_V2,
   FEATURE_X86_64_V3,
   FEATURE_X86_64_V4,
@@ -812,6 +813,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(FEATURE_AVX5124FMAPS);
   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
     setFeature(FEATURE_AVX512VP2INTERSECT);
+  if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
+    setFeature(FEATURE_AVX512FP16);
 
   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
   // return all 0s for invalid subleaves so check the limit.

diff  --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index b58feafe4e8c243..43162f2b52ebab4 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -122,6 +122,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont")
 //
 // We cannot just re-sort the list though because its order is dictated by the
 // order of bits in CodeGenFunction::GetX86CpuSupportsMask.
+// We cannot re-adjust the position of X86_FEATURE_COMPAT at the whole list.
 #ifndef X86_FEATURE_COMPAT
 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
 #endif
@@ -184,12 +185,12 @@ X86_FEATURE       (AMX_TILE,        "amx-tile")
 X86_FEATURE       (CLDEMOTE,        "cldemote")
 X86_FEATURE       (CLFLUSHOPT,      "clflushopt")
 X86_FEATURE       (CLWB,            "clwb")
+X86_FEATURE_COMPAT(F16C,            "f16c",                  38)
 X86_FEATURE       (CLZERO,          "clzero")
 X86_FEATURE       (CMPXCHG16B,      "cx16")
 X86_FEATURE       (CMPXCHG8B,       "cx8")
 X86_FEATURE       (CRC32,           "crc32")
 X86_FEATURE       (ENQCMD,          "enqcmd")
-X86_FEATURE       (F16C,            "f16c")
 X86_FEATURE       (FSGSBASE,        "fsgsbase")
 X86_FEATURE       (FXSR,            "fxsr")
 X86_FEATURE       (INVPCID,         "invpcid")
@@ -229,9 +230,9 @@ X86_FEATURE       (XSAVE,           "xsave")
 X86_FEATURE       (XSAVEC,          "xsavec")
 X86_FEATURE       (XSAVEOPT,        "xsaveopt")
 X86_FEATURE       (XSAVES,          "xsaves")
+X86_FEATURE_COMPAT(AVX512FP16,      "avx512fp16",            39)
 X86_FEATURE       (HRESET,          "hreset")
 X86_FEATURE       (RAOINT,          "raoint")
-X86_FEATURE       (AVX512FP16,      "avx512fp16")
 X86_FEATURE       (AMX_FP16,        "amx-fp16")
 X86_FEATURE       (CMPCCXADD,       "cmpccxadd")
 X86_FEATURE       (AVXNECONVERT,    "avxneconvert")

diff  --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index d46ff07ec7340af..518fb9d89216477 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -347,7 +347,7 @@ constexpr ProcInfo Processors[] = {
   // Tigerlake microarchitecture based processors.
   { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake, 'l', false },
   // Sapphire Rapids microarchitecture based processors.
-  { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
+  { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
   // Raptorlake microarchitecture based processors.
@@ -369,12 +369,12 @@ constexpr ProcInfo Processors[] = {
   // Grandridge microarchitecture based processors.
   { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
   // Granite Rapids microarchitecture based processors.
-  { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false },
+  { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512FP16, FeaturesGraniteRapids, 'n', false },
   // Granite Rapids D microarchitecture based processors.
-  { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
-  { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
+  { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
+  { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
   // Emerald Rapids microarchitecture based processors.
-  { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
+  { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
   // Clearwaterforest microarchitecture based processors.
   { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
   // Knights Landing processor.


        


More information about the cfe-commits mailing list