[llvm] 8dd27d4 - [VectorCombine] Add test coverage for shuffle(cmp,cmp) fold patterns
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 04:53:07 PST 2024
Author: Simon Pilgrim
Date: 2024-12-16T12:52:38Z
New Revision: 8dd27d4569555c181a92f2c3914d3ea16aa1a741
URL: https://github.com/llvm/llvm-project/commit/8dd27d4569555c181a92f2c3914d3ea16aa1a741
DIFF: https://github.com/llvm/llvm-project/commit/8dd27d4569555c181a92f2c3914d3ea16aa1a741.diff
LOG: [VectorCombine] Add test coverage for shuffle(cmp,cmp) fold patterns
Added:
llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
new file mode 100644
index 00000000000000..008c1e7e694b96
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-cmps.ll
@@ -0,0 +1,189 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx512vl | FileCheck %s --check-prefixes=CHECK
+
+declare void @use(<4 x i1>)
+
+; icmp - eq v4i32 is cheap
+
+define <4 x i32> @shuf_icmp_eq_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
+; CHECK-LABEL: define <4 x i32> @shuf_icmp_eq_v4i32(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[C0:%.*]] = icmp eq <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[C1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %c0 = icmp eq <4 x i32> %x, %y
+ %c1 = icmp eq <4 x i32> %z, %w
+ %s = shufflevector <4 x i1> %c0, <4 x i1> %c1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; icmp - eq v2i64 is only cheap on SSE4+ targets with PCMPEQQ
+
+define <2 x i64> @shuf_icmp_eq_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z, <2 x i64> %w) {
+; CHECK-LABEL: define <2 x i64> @shuf_icmp_eq_v2i64(
+; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]], <2 x i64> [[W:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[C0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
+; CHECK-NEXT: [[C1:%.*]] = icmp eq <2 x i64> [[Z]], [[W]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[C0]], <2 x i1> [[C1]], <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[R]]
+;
+ %c0 = icmp eq <2 x i64> %x, %y
+ %c1 = icmp eq <2 x i64> %z, %w
+ %s = shufflevector <2 x i1> %c0, <2 x i1> %c1, <2 x i32> <i32 1, i32 3>
+ %r = sext <2 x i1> %s to <2 x i64>
+ ret <2 x i64> %r
+}
+
+; icmp - ugt v4i32 is expensive
+
+define <4 x i32> @shuf_icmp_ugt_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
+; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %c0 = icmp ugt <4 x i32> %x, %y
+ %c1 = icmp ugt <4 x i32> %z, %w
+ %s = shufflevector <4 x i1> %c0, <4 x i1> %c1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; Common operand is op0 of the fcmps.
+
+define <4 x i32> @shuf_fcmp_oeq_v4i32(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_fcmp_oeq_v4i32(
+; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = fcmp oeq <4 x float> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = fcmp oeq <4 x float> [[X]], [[Z]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = fcmp oeq <4 x float> %x, %y
+ %b1 = fcmp oeq <4 x float> %x, %z
+ %s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; For commutative instructions, common operand may be swapped
+
+define <4 x i32> @shuf_fcmp_one_v4f32_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_fcmp_one_v4f32_swap(
+; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = fcmp one <4 x float> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = fcmp one <4 x float> [[Z]], [[X]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 0, i32 3, i32 4, i32 7>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = fcmp one <4 x float> %x, %y
+ %b1 = fcmp one <4 x float> %z, %x
+ %s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 0, i32 3, i32 4, i32 7>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; non-commutative pred, but common op0
+
+define <4 x i32> @shuf_icmp_sgt_v4i32_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
+; CHECK-LABEL: define <4 x i32> @shuf_icmp_sgt_v4i32_swap(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = icmp sgt <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = icmp sgt <4 x i32> [[X]], [[Z]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = icmp sgt <4 x i32> %x, %y
+ %b1 = icmp sgt <4 x i32> %x, %z
+ %s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; negative test - mismatched opcodes
+
+define <2 x i64> @shuf_icmp_fcmp_v2i64_mismatch_opcode(<2 x i64> %x, <2 x i64> %y, <2 x double> %z, <2 x double> %w) {
+; CHECK-LABEL: define <2 x i64> @shuf_icmp_fcmp_v2i64_mismatch_opcode(
+; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x double> [[Z:%.*]], <2 x double> [[W:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = fcmp oeq <2 x double> [[Z]], [[W]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[B0]], <2 x i1> [[B1]], <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[R]]
+;
+ %b0 = icmp eq <2 x i64> %x, %y
+ %b1 = fcmp oeq <2 x double> %z, %w
+ %s = shufflevector <2 x i1> %b0, <2 x i1> %b1, <2 x i32> <i32 3, i32 0>
+ %r = sext <2 x i1> %s to <2 x i64>
+ ret <2 x i64> %r
+}
+
+; negative test - mismatched predicates
+
+define <2 x i64> @shuf_icmp_eq_sgt_v2i64_mismatch_predicate(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) {
+; CHECK-LABEL: define <2 x i64> @shuf_icmp_eq_sgt_v2i64_mismatch_predicate(
+; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = icmp eq <2 x i64> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = icmp sgt <2 x i64> [[Z]], [[Y]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i1> [[B0]], <2 x i1> [[B1]], <2 x i32> <i32 3, i32 0>
+; CHECK-NEXT: [[R:%.*]] = sext <2 x i1> [[S]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[R]]
+;
+ %b0 = icmp eq <2 x i64> %x, %y
+ %b1 = icmp sgt <2 x i64> %z, %y
+ %s = shufflevector <2 x i1> %b0, <2 x i1> %b1, <2 x i32> <i32 3, i32 0>
+ %r = sext <2 x i1> %s to <2 x i64>
+ ret <2 x i64> %r
+}
+
+; negative test - mismatched types
+
+define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(<4 x i64> %x, <4 x i64> %y, <4 x i32> %z, <4 x i32> %w) {
+; CHECK-LABEL: define <4 x i32> @shuf_icmp_eq_v4i64_v4i32_mismatch_type(
+; CHECK-SAME: <4 x i64> [[X:%.*]], <4 x i64> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[B0:%.*]] = icmp eq <4 x i64> [[X]], [[Y]]
+; CHECK-NEXT: [[B1:%.*]] = icmp eq <4 x i32> [[Z]], [[W]]
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[B0]], <4 x i1> [[B1]], <4 x i32> <i32 3, i32 1, i32 1, i32 6>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %b0 = icmp eq <4 x i64> %x, %y
+ %b1 = icmp eq <4 x i32> %z, %w
+ %s = shufflevector <4 x i1> %b0, <4 x i1> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
+
+; negative test - uses
+
+define <4 x i32> @shuf_icmp_ugt_v4i32_use(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
+; CHECK-LABEL: define <4 x i32> @shuf_icmp_ugt_v4i32_use(
+; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> [[W:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[C0:%.*]] = icmp ugt <4 x i32> [[X]], [[Y]]
+; CHECK-NEXT: [[C1:%.*]] = icmp ugt <4 x i32> [[Z]], [[W]]
+; CHECK-NEXT: call void @use(<4 x i1> [[C0]])
+; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i1> [[C0]], <4 x i1> [[C1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[R:%.*]] = sext <4 x i1> [[S]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[R]]
+;
+ %c0 = icmp ugt <4 x i32> %x, %y
+ %c1 = icmp ugt <4 x i32> %z, %w
+ call void @use(<4 x i1> %c0)
+ %s = shufflevector <4 x i1> %c0, <4 x i1> %c1, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %r = sext <4 x i1> %s to <4 x i32>
+ ret <4 x i32> %r
+}
More information about the llvm-commits
mailing list