[llvm] [VectorCombine] foldShuffleToIdentity - peek through bitcasts to see if they come from the same value to form identity sequence (PR #98334)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 08:04:48 PDT 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/98334
Workaround until I can get #96884 fixed properly - when trying to find identity sequences, peek through any bitcasts to see if the values all came from the same source. We don't run CSE frequently enough to merge all the bitcasts that we end up with.
>From 829ea31f971fcf98afbdd578b35f96558ad1a90b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 10 Jul 2024 16:02:51 +0100
Subject: [PATCH] [VectorCombine] foldShuffleToIdentity - peek through bitcasts
to see if they come from the same value to form identity sequence
Workaround until I can get #96884 fixed properly - when trying to find identity sequences, peek through any bitcasts to see if the values all came from the same source. We don't run CSE frequently enough to merge all the bitcasts that we end up with.
---
.../Transforms/Vectorize/VectorCombine.cpp | 10 ++-
.../PhaseOrdering/X86/blendv-select.ll | 72 +++++--------------
2 files changed, 24 insertions(+), 58 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 4896b8ed2595b..3a49f95d3f117 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1859,13 +1859,19 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) {
if (!FrontU)
return false;
+ // Helper to peek through bitcasts to the same value.
+ auto IsEquiv = [&](Value *X, Value *Y) {
+ return X->getType() == Y->getType() &&
+ peekThroughBitcasts(X) == peekThroughBitcasts(Y);
+ };
+
// Look for an identity value.
if (FrontLane == 0 &&
cast<FixedVectorType>(FrontU->get()->getType())->getNumElements() ==
Ty->getNumElements() &&
- all_of(drop_begin(enumerate(Item)), [Item](const auto &E) {
+ all_of(drop_begin(enumerate(Item)), [IsEquiv, Item](const auto &E) {
Value *FrontV = Item.front().first->get();
- return !E.value().first || (E.value().first->get() == FrontV &&
+ return !E.value().first || (IsEquiv(E.value().first->get(), FrontV) &&
E.value().second == (int)E.index());
})) {
IdentityLeafs.insert(FrontU);
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
index 011d51600b51f..c2ed7b9c84523 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
@@ -88,20 +88,10 @@ define <4 x i64> @x86_pblendvb_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b, <4 x i64>
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <8 x i32>
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <8 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP5]], <4 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i1> [[CMP]], <8 x i1> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP8]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP10]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP11]], <4 x i32> [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP13]] to <4 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[CMP]], <8 x i32> [[TMP2]], <8 x i32> [[TMP1]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <8 x i32> [[TMP3]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
@@ -129,20 +119,10 @@ define <4 x i64> @x86_pblendvb_v16i16_v8i16(<4 x i64> %a, <4 x i64> %b, <4 x i64
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <4 x i64> [[C:%.*]] to <16 x i16>
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <4 x i64> [[D:%.*]] to <16 x i16>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP5]], <8 x i16> [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[A]] to <16 x i16>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i64> [[B]] to <16 x i16>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i16> [[TMP10]], <16 x i16> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i16> [[TMP11]], <8 x i16> [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP13]] to <4 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[A:%.*]] to <16 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[B:%.*]] to <16 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i16> [[TMP2]], <16 x i16> [[TMP1]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i16> [[TMP3]] to <4 x i64>
; CHECK-NEXT: ret <4 x i64> [[RES]]
;
%a.bc = bitcast <4 x i64> %a to <32 x i8>
@@ -276,20 +256,10 @@ define <8 x i64> @x86_pblendvb_v16i32_v8i32(<8 x i64> %a, <8 x i64> %b, <8 x i64
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <16 x i32>
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <16 x i32>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i32> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[TMP5]], <8 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i1> [[CMP]], <16 x i1> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i32> [[TMP8]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <16 x i32>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i32> [[TMP11]], <8 x i32> [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP13]] to <8 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[CMP]], <16 x i32> [[TMP2]], <16 x i32> [[TMP1]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <16 x i32> [[TMP3]] to <8 x i64>
; CHECK-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
@@ -317,20 +287,10 @@ define <8 x i64> @x86_pblendvb_v32i16_v16i16(<8 x i64> %a, <8 x i64> %b, <8 x i6
; CHECK-NEXT: [[C_BC:%.*]] = bitcast <8 x i64> [[C:%.*]] to <32 x i16>
; CHECK-NEXT: [[D_BC:%.*]] = bitcast <8 x i64> [[D:%.*]] to <32 x i16>
; CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i16> [[C_BC]], [[D_BC]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <32 x i1> [[CMP]], <32 x i1> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[A]] to <32 x i16>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i16> [[TMP8]], <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[B]] to <32 x i16>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <32 x i16> [[TMP10]], <32 x i16> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i16> [[TMP11]], <16 x i16> [[TMP9]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i16> [[TMP6]], <16 x i16> [[TMP12]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP13]] to <8 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i64> [[A:%.*]] to <32 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[B:%.*]] to <32 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[CMP]], <32 x i16> [[TMP2]], <32 x i16> [[TMP1]]
+; CHECK-NEXT: [[RES:%.*]] = bitcast <32 x i16> [[TMP3]] to <8 x i64>
; CHECK-NEXT: ret <8 x i64> [[RES]]
;
%a.bc = bitcast <8 x i64> %a to <64 x i8>
More information about the llvm-commits
mailing list