[compiler-rt] [clang] [llvm] [X86] Support "f16c" and "avx512fp16" for __builtin_cpu_supports (PR #78384)

Freddy Ye via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 16 22:04:34 PST 2024


https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/78384

>From 04bbfad594054c2dab033b977c7dfa178fee8568 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 4 Jan 2024 21:00:23 +0800
Subject: [PATCH 1/4] [X86] Support "f16c" and "avx512fp16" for
 __builtin_cpu_supports

This resolves issue #65320.
---
 compiler-rt/lib/builtins/cpu_model/x86.c           | 5 ++++-
 llvm/include/llvm/TargetParser/X86TargetParser.def | 5 +++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 9d9a5d3f1542cd..0750e29f989a8d 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -148,7 +148,8 @@ enum ProcessorFeatures {
   FEATURE_LZCNT,
   FEATURE_MOVBE,
 
-  FEATURE_X86_64_BASELINE = 95,
+  FEATURE_AVX512FP16 = 94,
+  FEATURE_X86_64_BASELINE,
   FEATURE_X86_64_V2,
   FEATURE_X86_64_V3,
   FEATURE_X86_64_V4,
@@ -812,6 +813,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(FEATURE_AVX5124FMAPS);
   if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
     setFeature(FEATURE_AVX512VP2INTERSECT);
+  if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
+    setFeature(FEATURE_AVX512FP16);
 
   // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
   // return all 0s for invalid subleaves so check the limit.
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index b58feafe4e8c24..43162f2b52ebab 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -122,6 +122,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont")
 //
 // We cannot just re-sort the list though because its order is dictated by the
 // order of bits in CodeGenFunction::GetX86CpuSupportsMask.
+// We cannot re-adjust the position of X86_FEATURE_COMPAT at the whole list.
 #ifndef X86_FEATURE_COMPAT
 #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
 #endif
@@ -184,12 +185,12 @@ X86_FEATURE       (AMX_TILE,        "amx-tile")
 X86_FEATURE       (CLDEMOTE,        "cldemote")
 X86_FEATURE       (CLFLUSHOPT,      "clflushopt")
 X86_FEATURE       (CLWB,            "clwb")
+X86_FEATURE_COMPAT(F16C,            "f16c",                  38)
 X86_FEATURE       (CLZERO,          "clzero")
 X86_FEATURE       (CMPXCHG16B,      "cx16")
 X86_FEATURE       (CMPXCHG8B,       "cx8")
 X86_FEATURE       (CRC32,           "crc32")
 X86_FEATURE       (ENQCMD,          "enqcmd")
-X86_FEATURE       (F16C,            "f16c")
 X86_FEATURE       (FSGSBASE,        "fsgsbase")
 X86_FEATURE       (FXSR,            "fxsr")
 X86_FEATURE       (INVPCID,         "invpcid")
@@ -229,9 +230,9 @@ X86_FEATURE       (XSAVE,           "xsave")
 X86_FEATURE       (XSAVEC,          "xsavec")
 X86_FEATURE       (XSAVEOPT,        "xsaveopt")
 X86_FEATURE       (XSAVES,          "xsaves")
+X86_FEATURE_COMPAT(AVX512FP16,      "avx512fp16",            39)
 X86_FEATURE       (HRESET,          "hreset")
 X86_FEATURE       (RAOINT,          "raoint")
-X86_FEATURE       (AVX512FP16,      "avx512fp16")
 X86_FEATURE       (AMX_FP16,        "amx-fp16")
 X86_FEATURE       (CMPCCXADD,       "cmpccxadd")
 X86_FEATURE       (AVXNECONVERT,    "avxneconvert")

>From 79bf68d976ef53d5a404cd9b7a0bd485c9937ba0 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 17 Jan 2024 12:06:38 +0800
Subject: [PATCH 2/4] add test

---
 clang/test/CodeGen/target-builtin-noerror.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 505f4a3e94565d..06bb40a2b71ea9 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -82,6 +82,8 @@ void verifyfeaturestrings(void) {
   (void)__builtin_cpu_supports("avx512bitalg");
   (void)__builtin_cpu_supports("avx512bf16");
   (void)__builtin_cpu_supports("avx512vp2intersect");
+  (void)__builtin_cpu_supports("f16c");
+  (void)__builtin_cpu_supports("avx512fp16");
 }
 
 void verifycpustrings(void) {

>From 400b00d4aaf4a11978dc5ddd58acc2a2cb36ed1f Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 17 Jan 2024 13:40:27 +0800
Subject: [PATCH 3/4] update lit test

---
 clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp b/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
index ef2498bd7e14ce..183eb4fb6ac61e 100644
--- a/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
+++ b/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp
@@ -80,8 +80,8 @@ OutOfLineDefs::foo(int, int, int) {
 // LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.S
 // LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.R
 // LINUX: define weak_odr ptr @_ZN13OutOfLineDefs3fooEiii.resolver()
-// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R
 // LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.S
+// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R
 // LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.O
 // LINUX: call void @llvm.trap
 // LINUX: define linkonce_odr noundef i32 @_ZN13OutOfLineDefs3fooEiii.O
@@ -89,8 +89,8 @@ OutOfLineDefs::foo(int, int, int) {
 // WINDOWS: define dso_local noundef i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.S"
 // WINDOWS: define dso_local noundef i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.R"
 // WINDOWS: define weak_odr dso_local i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z"(ptr %0, i32 %1, i32 %2, i32 %3)
-// WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.R"(ptr %0, i32 %1, i32 %2, i32 %3)
 // WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.S"(ptr %0, i32 %1, i32 %2, i32 %3)
+// WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.R"(ptr %0, i32 %1, i32 %2, i32 %3)
 // WINDOWS: musttail call i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.O"(ptr %0, i32 %1, i32 %2, i32 %3)
 // WINDOWS: call void @llvm.trap
 // WINDOWS: define linkonce_odr dso_local noundef i32 @"?foo at OutOfLineDefs@@QEAAHHHH at Z.O"

>From c50fb217772edc274882518e56eeb017b9cf175a Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 17 Jan 2024 13:45:47 +0800
Subject: [PATCH 4/4] update key feature after sapphirerapids to be AVX512FP16

---
 llvm/lib/TargetParser/X86TargetParser.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index d46ff07ec7340a..518fb9d8921647 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -347,7 +347,7 @@ constexpr ProcInfo Processors[] = {
   // Tigerlake microarchitecture based processors.
   { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake, 'l', false },
   // Sapphire Rapids microarchitecture based processors.
-  { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
+  { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
   // Alderlake microarchitecture based processors.
   { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
   // Raptorlake microarchitecture based processors.
@@ -369,12 +369,12 @@ constexpr ProcInfo Processors[] = {
   // Grandridge microarchitecture based processors.
   { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
   // Granite Rapids microarchitecture based processors.
-  { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false },
+  { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512FP16, FeaturesGraniteRapids, 'n', false },
   // Granite Rapids D microarchitecture based processors.
-  { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
-  { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
+  { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
+  { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
   // Emerald Rapids microarchitecture based processors.
-  { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
+  { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
   // Clearwaterforest microarchitecture based processors.
   { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
   // Knights Landing processor.



More information about the cfe-commits mailing list