[llvm] a054e94 - [SLP]Merge reorder and reuse shuffles.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 2 06:41:09 PST 2021
Author: Alexey Bataev
Date: 2021-03-02T06:39:47-08:00
New Revision: a054e94e9ea26b2aab85de02177e06b90ad8eef0
URL: https://github.com/llvm/llvm-project/commit/a054e94e9ea26b2aab85de02177e06b90ad8eef0
DIFF: https://github.com/llvm/llvm-project/commit/a054e94e9ea26b2aab85de02177e06b90ad8eef0.diff
LOG: [SLP]Merge reorder and reuse shuffles.
It is possible to merge reuse and reorder shuffles and reduce the total
cost of the vectorization tree/number of final instructions.
Differential Revision: https://reviews.llvm.org/D94992
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0e3b6e107553..065ddca81789 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3513,6 +3513,9 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
+ // The common cost of removal ExtractElement/ExtractValue instructions +
+ // the cost of shuffles, if required to resuffle the original vector.
+ InstructionCost CommonCost = 0;
if (NeedToShuffleReuses) {
unsigned Idx = 0;
for (unsigned I : E->ReuseShuffleIndices) {
@@ -3540,11 +3543,9 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
ReuseShuffleCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx);
}
- }
- InstructionCost DeadCost = ReuseShuffleCost;
- if (!E->ReorderIndices.empty()) {
- // TODO: Merge this shuffle with the ReuseShuffleCost.
- DeadCost += TTI->getShuffleCost(
+ CommonCost = ReuseShuffleCost;
+ } else if (!E->ReorderIndices.empty()) {
+ CommonCost = TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
}
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
@@ -3561,20 +3562,20 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
[](User *U) { return isa<GetElementPtrInst>(U); })) {
// Use getExtractWithExtendCost() to calculate the cost of
// extractelement/ext pair.
- DeadCost -= TTI->getExtractWithExtendCost(
+ CommonCost -= TTI->getExtractWithExtendCost(
Ext->getOpcode(), Ext->getType(), VecTy, I);
// Add back the cost of s|zext which is subtracted separately.
- DeadCost += TTI->getCastInstrCost(
+ CommonCost += TTI->getCastInstrCost(
Ext->getOpcode(), Ext->getType(), EI->getType(),
TTI::getCastContextHint(Ext), CostKind, Ext);
continue;
}
}
- DeadCost -=
+ CommonCost -=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
}
}
- return DeadCost;
+ return CommonCost;
}
case Instruction::ZExt:
case Instruction::SExt:
@@ -3769,11 +3770,9 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, alignment, CostKind, VL0);
}
- if (!E->ReorderIndices.empty()) {
- // TODO: Merge this shuffle with the ReuseShuffleCost.
+ if (!NeedToShuffleReuses && !E->ReorderIndices.empty())
VecLdCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
- }
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecLdCost, ScalarLdCost));
return ReuseShuffleCost + VecLdCost - ScalarLdCost;
}
@@ -3785,18 +3784,14 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
Align Alignment = SI->getAlign();
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
- if (NeedToShuffleReuses)
- ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecStCost = TTI->getMemoryOpCost(
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
- if (IsReorder) {
- // TODO: Merge this shuffle with the ReuseShuffleCost.
+ if (IsReorder)
VecStCost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
- }
LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecStCost, ScalarStCost));
- return ReuseShuffleCost + VecStCost - ScalarStCost;
+ return VecStCost - ScalarStCost;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
@@ -4294,18 +4289,31 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
if (E->isSame(VL)) {
Value *V = vectorizeTree(E);
if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) {
- // We need to get the vectorized value but without shuffle.
- if (auto *SV = dyn_cast<ShuffleVectorInst>(V)) {
- V = SV->getOperand(0);
- } else {
- // Reshuffle to get only unique values.
- SmallVector<int, 4> UniqueIdxs;
- SmallSet<int, 4> UsedIdxs;
- for (int Idx : E->ReuseShuffleIndices)
- if (UsedIdxs.insert(Idx).second)
- UniqueIdxs.emplace_back(Idx);
- V = Builder.CreateShuffleVector(V, UniqueIdxs);
+ // Reshuffle to get only unique values.
+ // If some of the scalars are duplicated in the vectorization tree
+ // entry, we do not vectorize them but instead generate a mask for the
+ // reuses. But if there are several users of the same entry, they may
+ // have
diff erent vectorization factors. This is especially important
+ // for PHI nodes. In this case, we need to adapt the resulting
+ // instruction for the user vectorization factor and have to reshuffle
+ // it again to take only unique elements of the vector. Without this
+ // code the function incorrectly returns reduced vector instruction
+ // with the same elements, not with the unique ones.
+ // block:
+ // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
+ // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
+ // ... (use %2)
+ // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
+ // br %block
+ SmallVector<int, 4> UniqueIdxs;
+ SmallSet<int, 4> UsedIdxs;
+ int Pos = 0;
+ for (int Idx : E->ReuseShuffleIndices) {
+ if (UsedIdxs.insert(Idx).second)
+ UniqueIdxs.emplace_back(Pos);
+ ++Pos;
}
+ V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
}
return V;
}
@@ -4343,6 +4351,64 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
return Vec;
}
+namespace {
+/// Merges shuffle masks and emits final shuffle instruction, if required.
+class ShuffleInstructionBuilder {
+ IRBuilderBase &Builder;
+ bool IsFinalized = false;
+ SmallVector<int, 4> Mask;
+
+public:
+ ShuffleInstructionBuilder(IRBuilderBase &Builder) : Builder(Builder) {}
+
+ /// Adds a mask, inverting it before applying.
+ void addInversedMask(ArrayRef<unsigned> SubMask) {
+ if (SubMask.empty())
+ return;
+ SmallVector<int, 4> NewMask;
+ inversePermutation(SubMask, NewMask);
+ addMask(NewMask);
+ }
+
+ /// Functions adds masks, merging them into single one.
+ void addMask(ArrayRef<unsigned> SubMask) {
+ SmallVector<int, 4> NewMask(SubMask.begin(), SubMask.end());
+ addMask(NewMask);
+ }
+
+ void addMask(ArrayRef<int> SubMask) {
+ if (SubMask.empty())
+ return;
+ if (Mask.empty()) {
+ Mask.append(SubMask.begin(), SubMask.end());
+ return;
+ }
+ SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
+ int TermValue = std::min(Mask.size(), SubMask.size());
+ for (int I = 0, E = SubMask.size(); I < E; ++I) {
+ if (SubMask[I] >= TermValue || Mask[SubMask[I]] >= TermValue) {
+ NewMask[I] = E;
+ continue;
+ }
+ NewMask[I] = Mask[SubMask[I]];
+ }
+ Mask.swap(NewMask);
+ }
+
+ Value *finalize(Value *V) {
+ IsFinalized = true;
+ if (Mask.empty())
+ return V;
+ return Builder.CreateShuffleVector(V, Mask, "shuffle");
+ }
+
+ ~ShuffleInstructionBuilder() {
+ assert((IsFinalized || Mask.empty()) &&
+ "Shuffle construction must be finalized.");
+ }
+};
+} // namespace
+
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IRBuilder<>::InsertPointGuard Guard(Builder);
@@ -4351,12 +4417,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return E->VectorizedValue;
}
+ ShuffleInstructionBuilder ShuffleBuilder(Builder);
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->State == TreeEntry::NeedToGather) {
setInsertPointAfterBundle(E);
Value *Vec = gather(E->Scalars);
if (NeedToShuffleReuses) {
- Vec = Builder.CreateShuffleVector(Vec, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ Vec = ShuffleBuilder.finalize(Vec);
if (auto *I = dyn_cast<Instruction>(Vec)) {
GatherSeq.insert(I);
CSEBlocks.insert(I->getParent());
@@ -4414,18 +4482,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
- if (!E->ReorderIndices.empty()) {
- SmallVector<int, 4> Mask;
- inversePermutation(E->ReorderIndices, Mask);
- Builder.SetInsertPoint(VL0);
- V = Builder.CreateShuffleVector(V, Mask, "reorder_shuffle");
- }
- if (NeedToShuffleReuses) {
- // TODO: Merge this shuffle with the ReorderShuffleMask.
- if (E->ReorderIndices.empty())
- Builder.SetInsertPoint(VL0);
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
- }
+ Builder.SetInsertPoint(VL0);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
return V;
}
@@ -4436,16 +4496,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
Value *NewV = propagateMetadata(V, E->Scalars);
- if (!E->ReorderIndices.empty()) {
- SmallVector<int, 4> Mask;
- inversePermutation(E->ReorderIndices, Mask);
- NewV = Builder.CreateShuffleVector(NewV, Mask, "reorder_shuffle");
- }
- if (NeedToShuffleReuses) {
- // TODO: Merge this shuffle with the ReorderShuffleMask.
- NewV = Builder.CreateShuffleVector(NewV, E->ReuseShuffleIndices,
- "shuffle");
- }
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ NewV = ShuffleBuilder.finalize(NewV);
E->VectorizedValue = NewV;
return NewV;
}
@@ -4472,8 +4525,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4494,8 +4547,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4514,8 +4567,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateSelect(Cond, True, False);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4537,8 +4590,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4580,8 +4633,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4623,15 +4676,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = propagateMetadata(NewLI, E->Scalars);
- if (IsReorder) {
- SmallVector<int, 4> Mask;
- inversePermutation(E->ReorderIndices, Mask);
- V = Builder.CreateShuffleVector(V, Mask, "reorder_shuffle");
- }
- if (NeedToShuffleReuses) {
- // TODO: Merge this shuffle with the ReorderShuffleMask.
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
- }
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
@@ -4645,11 +4692,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E);
Value *VecValue = vectorizeTree(E->getOperand(0));
- if (IsReorder) {
- SmallVector<int, 4> Mask(E->ReorderIndices.begin(),
- E->ReorderIndices.end());
- VecValue = Builder.CreateShuffleVector(VecValue, Mask, "reorder_shuf");
- }
+ ShuffleBuilder.addMask(E->ReorderIndices);
+ VecValue = ShuffleBuilder.finalize(VecValue);
+
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(
ScalarPtr, VecValue->getType()->getPointerTo(AS));
@@ -4663,8 +4708,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
Value *V = propagateMetadata(ST, E->Scalars);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4702,8 +4745,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4765,8 +4808,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
propagateIRFlags(V, E->Scalars, VL0);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
@@ -4832,8 +4875,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
index 1c8ddba98882..d1754c0bbc54 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/PR38339.ll
@@ -82,8 +82,7 @@ define void @f3(<2 x i16> %x, i16* %a) {
; CHECK: cont:
; CHECK-NEXT: [[XX:%.*]] = phi <2 x i16> [ [[X:%.*]], [[ENTRY:%.*]] ], [ undef, [[CONT]] ]
; CHECK-NEXT: [[AA:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY]] ], [ undef, [[CONT]] ]
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[REORDER_SHUFFLE]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[XX]], <2 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[PTR0:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 0
; CHECK-NEXT: [[PTR1:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 1
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* undef, i16 0, i16 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
index f9e38eaebc3f..741dbcec392e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR32086.ll
@@ -35,8 +35,7 @@ define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[LD:%.*]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[LD]] to <2 x i64>*
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[REORDER_SHUFFLE]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[ST:%.*]], i64 1
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i64, i64* [[ST]], i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
index e7b2ce8ecbb2..ad6bbb3a42b1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
@@ -8,10 +8,10 @@ define void @wombat(i32* %ptr, i32* %ptr1) {
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 0
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 8
-; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[REORDER_SHUFFLE]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[PTR1:%.*]], i32 3
-; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[REORDER_SHUFFLE]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[SHRINK_SHUFFLE:%.*]] = shufflevector <4 x i32> [[SHUFFLE]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[SHRINK_SHUFFLE]], <i32 -1, i32 -1>
; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5
@@ -66,7 +66,7 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[TMP0]], [[IF_END:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ]
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
@@ -78,6 +78,7 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() {
; CHECK-NEXT: [[ARRAYIDX11_6:%.*]] = getelementptr inbounds i16, i16* undef, i32 6
; CHECK-NEXT: [[ARRAYIDX11_7:%.*]] = getelementptr inbounds i16, i16* undef, i32 7
; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], <8 x i16>* undef, align 2
+; CHECK-NEXT: [[SHRINK_SHUFFLE]] = shufflevector <8 x i16> [[SHUFFLE]], <8 x i16> poison, <2 x i32> <i32 0, i32 4>
; CHECK-NEXT: br label [[FOR_BODY]]
;
entry:
More information about the llvm-commits
mailing list