[clang] allow prefer 256 bit attribute target (PR #117092)
Matthias Braun via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 20 17:34:53 PST 2024
https://github.com/MatzeB created https://github.com/llvm/llvm-project/pull/117092
- Rework attr-target-x86 test
- Allow prefer-256-bit for __attribute__((target))
>From 3bab9f901045426321d687fc36e4ba3034cc0f30 Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Wed, 20 Nov 2024 17:23:56 -0800
Subject: [PATCH 1/2] Rework attr-target-x86 test
Rework the attr-target-x86 test so the CHECK lines for the attributes
are next to their corresponding `__attribute__`.
---
clang/test/CodeGen/attr-target-x86.c | 153 ++++++++++++++++-----------
1 file changed, 89 insertions(+), 64 deletions(-)
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 2033a8b4c335f9..75e6dd18be2090 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -1,80 +1,105 @@
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -tune-cpu i686 -emit-llvm %s -o - | FileCheck %s
-int baz(int a) { return 4; }
+// CHECK: define {{.*}}@f_default({{.*}} [[f_default:#[0-9]+]]
+// CHECK: define {{.*}}@f_avx_sse4_2_ivybridge({{.*}} [[f_avx_sse4_2_ivybridge:#[0-9]+]]
+// CHECK: define {{.*}}@f_fpmath_387({{.*}} [[f_default]]
+// CHECK: define {{.*}}@f_no_sse2({{.*}} [[f_no_sse2:#[0-9]+]]
+// CHECK: define {{.*}}@f_sse4({{.*}} [[f_sse4:#[0-9]+]]
+// CHECK: define {{.*}}@f_no_sse4({{.*}} [[f_no_sse4:#[0-9]+]]
+// CHECK: define {{.*}}@f_default2({{.*}} [[f_default]]
+// CHECK: define {{.*}}@f_avx_sse4_2_ivybridge_2({{.*}} [[f_avx_sse4_2_ivybridge]]
+// CHECK: define {{.*}}@f_no_aes_ivybridge({{.*}} [[f_no_aes_ivybridge:#[0-9]+]]
+// CHECK: define {{.*}}@f_no_mmx({{.*}} [[f_no_mmx:#[0-9]+]]
+// CHECK: define {{.*}}@f_lakemont_mmx({{.*}} [[f_lakemont_mmx:#[0-9]+]]
+// CHECK: define {{.*}}@f_use_before_def({{.*}} [[f_lakemont_mmx]]
+// CHECK: define {{.*}}@f_tune_sandybridge({{.*}} [[f_tune_sandybridge:#[0-9]+]]
+// CHECK: define {{.*}}@f_x86_64_v2({{.*}} [[f_x86_64_v2:#[0-9]+]]
+// CHECK: define {{.*}}@f_x86_64_v3({{.*}} [[f_x86_64_v3:#[0-9]+]]
+// CHECK: define {{.*}}@f_x86_64_v4({{.*}} [[f_x86_64_v4:#[0-9]+]]
+// CHECK: define {{.*}}@f_avx10_1_256{{.*}} [[f_avx10_1_256:#[0-9]+]]
+// CHECK: define {{.*}}@f_avx10_1_512{{.*}} [[f_avx10_1_512:#[0-9]+]]
+
+// CHECK: [[f_default]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
+void f_default(void) {}
+
+// CHECK: [[f_avx_sse4_2_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+__attribute__((target("avx,sse4.2,arch=ivybridge")))
+void f_avx_sse4_2_ivybridge(void) {}
+
+// We're currently ignoring the fpmath attribute. So checked above that
+// attributes are identical to f_default.
+__attribute__((target("fpmath=387")))
+void f_fpmath_387(void) {}
-int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
-
-int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
-
-int __attribute__((target("no-sse2"))) echidna(int a) { return 4; }
-
-int __attribute__((target("sse4"))) panda(int a) { return 4; }
-int __attribute__((target("no-sse4"))) narwhal(int a) { return 4; }
+// CHECK-NOT: tune-cpu
+// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+__attribute__((target("no-sse2")))
+void f_no_sse2(void) {}
+
+// CHECK: [[f_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
+__attribute__((target("sse4")))
+void f_sse4(void) {}
+
+// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+__attribute__((target("no-sse4")))
+void f_no_sse4(void) {}
+
+// checked above that attributes are identical to f_default
+void f_default2(void) {
+ f_avx_sse4_2_ivybridge();
+ return f_default();
+}
-int bar(int a) { return baz(a) + foo(a); }
+// Checked above to have same attributes as f_avx_sse4_2_ivybridge
+__attribute__((target("avx, sse4.2, arch= ivybridge")))
+void f_avx_sse4_2_ivybridge_2(void) {}
-int __attribute__((target("avx, sse4.2, arch= ivybridge"))) qux(int a) { return 4; }
-int __attribute__((target("no-aes, arch=ivybridge"))) qax(int a) { return 4; }
+// CHECK: [[f_no_aes_ivybridge]] = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
+__attribute__((target("no-aes, arch=ivybridge")))
+void f_no_aes_ivybridge(void) {}
-int __attribute__((target("no-mmx"))) qq(int a) { return 40; }
+// CHECK-NOT: tune-cpu
+// CHECK: [[f_no_mmx]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
+__attribute__((target("no-mmx")))
+void f_no_mmx(void) {}
-int __attribute__((target("arch=lakemont,mmx"))) lake(int a) { return 4; }
+// CHECK: [[f_lakemont_mmx]] = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
+// Adding the attribute to a definition does update it in IR.
+__attribute__((target("arch=lakemont,mmx")))
+void f_lakemont_mmx(void) {}
-int use_before_def(void);
-int useage(void){
- return use_before_def();
+void f_use_before_def(void);
+void usage(void){
+ f_use_before_def();
}
-// Adding the attribute to a definition does update it in IR.
-int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
- return 5;
-}
+// Checked above to have same attributes as f_lakemont_mmx
+__attribute__((target("arch=lakemont,mmx")))
+void f_use_before_def(void) {}
-int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
-
-void __attribute__((target("arch=x86-64-v2"))) x86_64_v2(void) {}
-void __attribute__((target("arch=x86-64-v3"))) x86_64_v3(void) {}
-void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
-
-void __attribute__((target("avx10.1-256"))) avx10_1_256(void) {}
-void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
-
-// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
-// CHECK: baz{{.*}} #0
-// CHECK: foo{{.*}} #1
-// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
-// CHECK: koala{{.*}} #0
-// CHECK: echidna{{.*}} #2
-// CHECK: panda{{.*}} #3
-// CHECK: narwhal{{.*}} #4
-// CHECK: bar{{.*}} #0
-// CHECK: qux{{.*}} #1
-// CHECK: qax{{.*}} #5
-// CHECK: qq{{.*}} #6
-// CHECK: lake{{.*}} #7
-// CHECK: use_before_def{{.*}} #7
-// CHECK: walrus{{.*}} #8
-// CHECK: avx10_1_256{{.*}} #12
-// CHECK: avx10_1_512{{.*}} #13
-// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
-// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
-// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-amx-avx512,-avx10.1-256,-avx10.1-512,-avx10.2-256,-avx10.2-512,-vaes"
-// CHECK-NOT: tune-cpu
-// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-mmx"
-// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
-// CHECK-NOT: tune-cpu
-// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge"
+// CHECK: [[f_tune_sandybridge]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="sandybridge"
+__attribute__((target("tune=sandybridge")))
+void f_tune_sandybridge(void) {}
-// CHECK: "target-cpu"="x86-64-v2"
+// CHECK: [[f_x86_64_v2]] ={{.*}}"target-cpu"="x86-64-v2"
// CHECK-SAME: "target-features"="+cmov,+crc32,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
-// CHECK: "target-cpu"="x86-64-v3"
+__attribute__((target("arch=x86-64-v2")))
+void f_x86_64_v2(void) {}
+
+// CHECK: [[f_x86_64_v3]] = {{.*}}"target-cpu"="x86-64-v3"
// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
-// CHECK: "target-cpu"="x86-64-v4"
+__attribute__((target("arch=x86-64-v3")))
+void f_x86_64_v3(void) {}
+
+// CHECK: [[f_x86_64_v4]] = {{.*}}"target-cpu"="x86-64-v4"
// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+__attribute__((target("arch=x86-64-v4")))
+void f_x86_64_v4(void) {}
+
+// CHECK: [[f_avx10_1_256]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
+__attribute__((target("avx10.1-256")))
+void f_avx10_1_256(void) {}
-// CHECK: #12 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave,-amx-avx512,-avx10.1-512,-avx10.2-512,-evex512"
-// CHECK: #13 = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
+// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
+__attribute__((target("avx10.1-512")))
+void f_avx10_1_512(void) {}
>From 3528a2dabca2fa4e3dad70906893829f9a098717 Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze at braunis.de>
Date: Wed, 20 Nov 2024 17:34:11 -0800
Subject: [PATCH 2/2] Allow prefer-256-bit for __attribute__((target))
---
clang/lib/Basic/Targets/X86.cpp | 1 +
clang/test/CodeGen/attr-target-x86.c | 10 ++++++++++
2 files changed, 11 insertions(+)
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5993257e27d5a9..e903e16032bf02 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1162,6 +1162,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pconfig", true)
.Case("pku", true)
.Case("popcnt", true)
+ .Case("prefer-256-bit", true)
.Case("prefetchi", true)
.Case("prfchw", true)
.Case("ptwrite", true)
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 75e6dd18be2090..e9264efaa85c4f 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -18,6 +18,8 @@
// CHECK: define {{.*}}@f_x86_64_v4({{.*}} [[f_x86_64_v4:#[0-9]+]]
// CHECK: define {{.*}}@f_avx10_1_256{{.*}} [[f_avx10_1_256:#[0-9]+]]
// CHECK: define {{.*}}@f_avx10_1_512{{.*}} [[f_avx10_1_512:#[0-9]+]]
+// CHECK: define {{.*}}@f_prefer_256_bit({{.*}} [[f_prefer_256_bit:#[0-9]+]]
+// CHECK: define {{.*}}@f_no_prefer_256_bit({{.*}} [[f_no_prefer_256_bit:#[0-9]+]]
// CHECK: [[f_default]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
void f_default(void) {}
@@ -103,3 +105,11 @@ void f_avx10_1_256(void) {}
// CHECK: [[f_avx10_1_512]] = {{.*}}"target-cpu"="i686" "target-features"="+aes,+avx,+avx10.1-256,+avx10.1-512,+avx2,+avx512bf16,+avx512bitalg,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512fp16,+avx512ifma,+avx512vbmi,+avx512vbmi2,+avx512vl,+avx512vnni,+avx512vpopcntdq,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+mmx,+pclmul,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+vaes,+vpclmulqdq,+x87,+xsave"
__attribute__((target("avx10.1-512")))
void f_avx10_1_512(void) {}
+
+// CHECK: [[f_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}+prefer-256-bit
+__attribute__((target("prefer-256-bit")))
+void f_prefer_256_bit(void) {}
+
+// CHECK: [[f_no_prefer_256_bit]] = {{.*}}"target-features"="{{.*}}-prefer-256-bit
+__attribute__((target("no-prefer-256-bit")))
+void f_no_prefer_256_bit(void) {}
More information about the cfe-commits
mailing list