[llvm] ac01ae7 - [SLP]Use ShuffleInstructionBuilder for vector shrinking.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 28 06:11:15 PST 2022
Author: Alexey Bataev
Date: 2022-12-28T06:09:04-08:00
New Revision: ac01ae71f0c4a67194f92da3d6e6733b689ec087
URL: https://github.com/llvm/llvm-project/commit/ac01ae71f0c4a67194f92da3d6e6733b689ec087
DIFF: https://github.com/llvm/llvm-project/commit/ac01ae71f0c4a67194f92da3d6e6733b689ec087.diff
LOG: [SLP]Use ShuffleInstructionBuilder for vector shrinking.
We can use ShuffleInstructionBuilder now for shrinking shuffle emission.
It allows to remove extra shuffle from the emitted code and reuse
original vector.
Part of D110978
Differential Revision: https://reviews.llvm.org/D140499
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 122caaccd5d6..65b9f5731af9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8663,6 +8663,11 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
return TE->isOperandGatherNode({E, NodeIdx}) &&
VE->isSame(TE->Scalars);
}))) {
+ auto FinalShuffle = [&](Value *V, ArrayRef<int> Mask) {
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
+ ShuffleBuilder.add(V, Mask);
+ return ShuffleBuilder.finalize(std::nullopt);
+ };
Value *V = vectorizeTree(VE);
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!VE->ReuseShuffleIndices.empty()) {
@@ -8696,18 +8701,14 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
"less than original vector size.");
UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
- V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
+ V = FinalShuffle(V, UniqueIdxs);
} else {
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
"Expected vectorization factor less "
"than original vector size.");
SmallVector<int> UniformMask(VF, 0);
std::iota(UniformMask.begin(), UniformMask.end(), 0);
- V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
- }
- if (auto *I = dyn_cast<Instruction>(V)) {
- GatherShuffleExtractSeq.insert(I);
- CSEBlocks.insert(I->getParent());
+ V = FinalShuffle(V, UniformMask);
}
}
return V;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
index 9637e40d2b3e..a009b1eaf65f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
@@ -5,15 +5,15 @@ define void @wombat(ptr %ptr, ptr %ptr1) {
; CHECK-LABEL: @wombat(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i32 3
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
-; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef
-; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]]
-; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]]
-; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP27]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP1]], undef
+; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> undef, <4 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP6]]
+; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr [[TMP27]], align 8
; CHECK-NEXT: ret void
;
bb:
@@ -58,12 +58,11 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ]
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[TMP0]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
-; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], ptr undef, align 2
-; CHECK-NEXT: [[SHRINK_SHUFFLE]] = shufflevector <8 x i16> [[SHUFFLE]], <8 x i16> poison, <2 x i32> <i32 0, i32 4>
+; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr undef, align 2
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
index d70d54183a91..8d1d257820f0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder2.ll
@@ -9,23 +9,22 @@ define void @foo(ptr %this, ptr %p, i32 %add7) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[ADD7:%.*]], i32 0
; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], <i32 2, i32 2>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
; CHECK-NEXT: switch i32 undef, label [[SW_EPILOG:%.*]] [
; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
; CHECK-NEXT: i32 2, label [[SW_BB]]
; CHECK-NEXT: ]
; CHECK: sw.bb:
-; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 2, i32 0>
-; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], <i32 -1, i32 -1>
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]]
; CHECK-NEXT: br label [[SW_EPILOG]]
; CHECK: sw.epilog:
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ undef, [[ENTRY:%.*]] ], [ [[TMP5]], [[SW_BB]] ]
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
-; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i32> undef, [[SHUFFLE]]
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[SHUFFLE1]]
-; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> undef, [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[P:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list