[llvm] 4212ef8 - Revert "[SLP]Further improvement of the cost model for scalars used in buildvectors."
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Mon May 9 13:59:01 PDT 2022
Author: Alexey Bataev
Date: 2022-05-09T13:46:06-07:00
New Revision: 4212ef8a0e5ccdcba41e132501ee6f7dbbf226fb
URL: https://github.com/llvm/llvm-project/commit/4212ef8a0e5ccdcba41e132501ee6f7dbbf226fb
DIFF: https://github.com/llvm/llvm-project/commit/4212ef8a0e5ccdcba41e132501ee6f7dbbf226fb.diff
LOG: Revert "[SLP]Further improvement of the cost model for scalars used in buildvectors."
This reverts commit 99f31acfce338417fea3c14983d6f8fedc8ed043 and several
others to fix detected crashes, reported in https://reviews.llvm.org/D115750
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
Removed:
llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8abe0bddbc62..4319af0df046 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6383,122 +6383,6 @@ static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
return false;
}
-/// Checks if the \p IE1 instructions is followed by \p IE2 instruction in the
-/// buildvector sequence.
-static bool isFirstInsertElement(const InsertElementInst *IE1,
- const InsertElementInst *IE2) {
- const auto *I1 = IE1;
- const auto *I2 = IE2;
- do {
- if (I2 == IE1)
- return true;
- if (I1 == IE2)
- return false;
- if (I1)
- I1 = dyn_cast<InsertElementInst>(I1->getOperand(0));
- if (I2)
- I2 = dyn_cast<InsertElementInst>(I2->getOperand(0));
- } while (I1 || I2);
- llvm_unreachable("Two
diff erent buildvectors not expected.");
-}
-
-/// Does the analysis of the provided shuffle masks and performs the requested
-/// actions on the vectors with the given shuffle masks. It tries to do it in
-/// several steps.
-/// 1. If the Base vector is not undef vector, resizing the very first mask to
-/// have common VF and perform action for 2 input vectors (including non-undef
-/// Base). Other shuffle masks are combined with the resulting after the 1 stage
-/// and processed as a shuffle of 2 elements.
-/// 2. If the Base is undef vector and have only 1 shuffle mask, perform the
-/// action only for 1 vector with the given mask, if it is not the identity
-/// mask.
-/// 3. If > 2 masks are used, perform the remaining shuffle actions for 2
-/// vectors, combing the masks properly between the steps.
-template <typename T>
-static T *performExtractsShuffleAction(
- MutableArrayRef<std::pair<T *, SmallVector<int>>> ShuffleMask, Value *Base,
- function_ref<unsigned(T *)> GetVF,
- function_ref<std::pair<T *, bool>(T *, ArrayRef<int>)> ResizeAction,
- function_ref<T *(ArrayRef<int>, ArrayRef<T *>)> Action) {
- assert(!ShuffleMask.empty() && "Empty list of shuffles for inserts.");
- SmallVector<int> Mask(ShuffleMask.begin()->second);
- auto VMIt = std::next(ShuffleMask.begin());
- T *Prev = nullptr;
- bool IsBaseNotUndef = !isUndefVector(Base);
- if (IsBaseNotUndef) {
- // Base is not undef, need to combine it with the next subvectors.
- std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
- for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
- if (Mask[Idx] == UndefMaskElem)
- Mask[Idx] = Idx;
- else
- Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
- }
- Prev = Action(Mask, {nullptr, Res.first});
- } else if (ShuffleMask.size() == 1) {
- // Base is undef and only 1 vector is shuffled - perform the action only for
- // single vector, if the mask is not the identity mask.
- std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
- if (Res.second)
- // Identity mask is found.
- Prev = Res.first;
- else
- Prev = Action(Mask, {ShuffleMask.begin()->first});
- } else {
- // Base is undef and at least 2 input vectors shuffled - perform 2 vectors
- // shuffles step by step, combining shuffle between the steps.
- unsigned Vec1VF = GetVF(ShuffleMask.begin()->first);
- unsigned Vec2VF = GetVF(VMIt->first);
- if (Vec1VF == Vec2VF) {
- // No need to resize the input vectors since they are of the same size, we
- // can shuffle them directly.
- ArrayRef<int> SecMask = VMIt->second;
- for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
- if (SecMask[I] != UndefMaskElem) {
- assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
- Mask[I] = SecMask[I] + Vec1VF;
- }
- }
- Prev = Action(Mask, {ShuffleMask.begin()->first, VMIt->first});
- } else {
- // Vectors of
diff erent sizes - resize and reshuffle.
- std::pair<T *, bool> Res1 =
- ResizeAction(ShuffleMask.begin()->first, Mask);
- std::pair<T *, bool> Res2 = ResizeAction(VMIt->first, VMIt->second);
- ArrayRef<int> SecMask = VMIt->second;
- for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
- if (Mask[I] != UndefMaskElem) {
- assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars.");
- if (Res1.second)
- Mask[I] = I;
- } else if (SecMask[I] != UndefMaskElem) {
- assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
- Mask[I] = (Res2.second ? I : SecMask[I]) + VF;
- }
- }
- Prev = Action(Mask, {Res1.first, Res2.first});
- }
- VMIt = std::next(VMIt);
- }
- // Perform requested actions for the remaining masks/vectors.
- for (auto E = ShuffleMask.end(); VMIt != E; ++VMIt) {
- // Shuffle other input vectors, if any.
- std::pair<T *, bool> Res = ResizeAction(VMIt->first, VMIt->second);
- ArrayRef<int> SecMask = VMIt->second;
- for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
- if (SecMask[I] != UndefMaskElem) {
- assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) &&
- "Multiple uses of scalars.");
- Mask[I] = (Res.second ? I : SecMask[I]) + VF;
- } else if (Mask[I] != UndefMaskElem) {
- Mask[I] = I;
- }
- }
- Prev = Action(Mask, {Prev, Res.first});
- }
- return Prev;
-}
-
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -6519,8 +6403,9 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallPtrSet<Value *, 16> ExtractCostCalculated;
InstructionCost ExtractCost = 0;
- SmallVector<MapVector<const TreeEntry *, SmallVector<int>>> ShuffleMasks;
- SmallVector<std::pair<Value *, const TreeEntry *>> FirstUsers;
+ SmallVector<unsigned> VF;
+ SmallVector<SmallVector<int>> ShuffleMask;
+ SmallVector<Value *> FirstUsers;
SmallVector<APInt> DemandedElts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
@@ -6549,19 +6434,14 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
Optional<unsigned> InsertIdx = getInsertIndex(VU);
if (InsertIdx) {
- const TreeEntry *ScalarTE = getTreeEntry(EU.Scalar);
- auto *It =
- find_if(FirstUsers,
- [VU](const std::pair<Value *, const TreeEntry *> &Pair) {
- return areTwoInsertFromSameBuildVector(
- VU, cast<InsertElementInst>(Pair.first));
- });
+ auto *It = find_if(FirstUsers, [VU](Value *V) {
+ return areTwoInsertFromSameBuildVector(VU,
+ cast<InsertElementInst>(V));
+ });
int VecId = -1;
if (It == FirstUsers.end()) {
- (void)ShuffleMasks.emplace_back();
- SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
- if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ VF.push_back(FTy->getNumElements());
+ ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
// Find the insertvector, vectorized in tree, if any.
Value *Base = VU;
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
@@ -6569,41 +6449,22 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
if (const TreeEntry *E = getTreeEntry(IEBase)) {
VU = IEBase;
do {
- IEBase = cast<InsertElementInst>(Base);
- int Idx = *getInsertIndex(IEBase);
- assert(Mask[Idx] == UndefMaskElem &&
- "InsertElementInstruction used already.");
- Mask[Idx] = Idx;
- Base = IEBase->getOperand(0);
+ int Idx = E->findLaneForValue(Base);
+ ShuffleMask.back()[Idx] = Idx;
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
} while (E == getTreeEntry(Base));
break;
}
Base = cast<InsertElementInst>(Base)->getOperand(0);
}
- FirstUsers.emplace_back(VU, ScalarTE);
- DemandedElts.push_back(APInt::getZero(FTy->getNumElements()));
+ FirstUsers.push_back(VU);
+ DemandedElts.push_back(APInt::getZero(VF.back()));
VecId = FirstUsers.size() - 1;
} else {
- if (isFirstInsertElement(VU, cast<InsertElementInst>(It->first)))
- It->first = VU;
VecId = std::distance(FirstUsers.begin(), It);
}
int InIdx = *InsertIdx;
- SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
- if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
- // InsertElement should not be used already or the scalar is part of
- // TreeEntry, which is operand of the root insertelement instructions.
- assert((Mask[InIdx] == UndefMaskElem ||
- any_of(ScalarTE->UserTreeIndices,
- [](const EdgeInfo &EI) {
- return EI.EdgeIdx == 1 &&
- EI.UserTE->getOpcode() ==
- Instruction::InsertElement &&
- !EI.UserTE->isAltShuffle();
- })) &&
- "InsertElementInstruction used already.");
- Mask[InIdx] = EU.Lane;
+ ShuffleMask[VecId][InIdx] = EU.Lane;
DemandedElts[VecId].setBit(InIdx);
continue;
}
@@ -6630,75 +6491,89 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- auto &&ResizeToVF = [this, &Cost](const TreeEntry *TE, ArrayRef<int> Mask) {
- InstructionCost C = 0;
- unsigned VF = Mask.size();
- unsigned VecVF = TE->getVectorFactor();
- if (VF != VecVF &&
- (any_of(Mask, [VF](int Idx) { return Idx >= static_cast<int>(VF); }) ||
- (all_of(Mask,
- [VF](int Idx) { return Idx < 2 * static_cast<int>(VF); }) &&
- !ShuffleVectorInst::isIdentityMask(Mask)))) {
- SmallVector<int> OrigMask(VecVF, UndefMaskElem);
- std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
- OrigMask.begin());
- C = TTI->getShuffleCost(
+ if (FirstUsers.size() == 1) {
+ int Limit = ShuffleMask.front().size() * 2;
+ if (!all_of(ShuffleMask.front(),
+ [Limit](int Idx) { return Idx < Limit; }) ||
+ !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
+ InstructionCost C = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc,
- FixedVectorType::get(TE->getMainOp()->getType(), VecVF), OrigMask);
- LLVM_DEBUG(
- dbgs() << "SLP: Adding cost " << C
- << " for final shuffle of insertelement external users.\n";
- TE->dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n");
+ cast<FixedVectorType>(FirstUsers.front()->getType()),
+ ShuffleMask.front());
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of insertelement external users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
Cost += C;
- return std::make_pair(TE, true);
}
- return std::make_pair(TE, false);
- };
- // Calculate the cost of the reshuffled vectors, if any.
- for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
- Value *Base = cast<Instruction>(FirstUsers[I].first)->getOperand(0);
- unsigned VF = ShuffleMasks[I].begin()->second.size();
- auto *FTy = FixedVectorType::get(
- cast<VectorType>(FirstUsers[I].first->getType())->getElementType(), VF);
- auto Vector = ShuffleMasks[I].takeVector();
- auto &&EstimateShufflesCost = [this, FTy,
- &Cost](ArrayRef<int> Mask,
- ArrayRef<const TreeEntry *> TEs) {
- assert((TEs.size() == 1 || TEs.size() == 2) &&
- "Expected exactly 1 or 2 tree entries.");
- if (TEs.size() == 1) {
- int Limit = 2 * Mask.size();
- if (!all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) ||
- !ShuffleVectorInst::isIdentityMask(Mask)) {
- InstructionCost C =
- TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FTy, Mask);
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
- << " for final shuffle of insertelement "
- "external users.\n";
- TEs.front()->dump();
- dbgs() << "SLP: Current total cost = " << Cost << "\n");
- Cost += C;
- }
- } else {
- InstructionCost C =
- TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
- LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
- << " for final shuffle of vector node and external "
- "insertelement users.\n";
- if (TEs.front()) { TEs.front()->dump(); } TEs.back()->dump();
- dbgs() << "SLP: Current total cost = " << Cost << "\n");
- Cost += C;
- }
- return TEs.back();
- };
- (void)performExtractsShuffleAction<const TreeEntry>(
- makeMutableArrayRef(Vector.data(), Vector.size()), Base,
- [](const TreeEntry *E) { return E->getVectorFactor(); }, ResizeToVF,
- EstimateShufflesCost);
InstructionCost InsertCost = TTI->getScalarizationOverhead(
- cast<FixedVectorType>(FirstUsers[I].first->getType()), DemandedElts[I],
- /*Insert*/ true, /*Extract*/ false);
+ cast<FixedVectorType>(FirstUsers.front()->getType()),
+ DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost -= InsertCost;
+ } else if (FirstUsers.size() >= 2) {
+ unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
+ // Combined masks of the first 2 vectors.
+ SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
+ copy(ShuffleMask.front(), CombinedMask.begin());
+ APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
+ auto *VecTy = FixedVectorType::get(
+ cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
+ MaxVF);
+ for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
+ if (ShuffleMask[1][I] != UndefMaskElem) {
+ CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
+ CombinedDemandedElts.setBit(I);
+ }
+ }
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ InstructionCost InsertCost = TTI->getScalarizationOverhead(
+ VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
Cost -= InsertCost;
+ for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
+ if (ShuffleMask[I].empty())
+ continue;
+ // Other elements - permutation of 2 vectors (the initial one and the
+ // next Ith incoming vector).
+ unsigned VF = ShuffleMask[I].size();
+ for (unsigned Idx = 0; Idx < VF; ++Idx) {
+ int Mask = ShuffleMask[I][Idx];
+ if (Mask != UndefMaskElem)
+ CombinedMask[Idx] = MaxVF + Mask;
+ else if (CombinedMask[Idx] != UndefMaskElem)
+ CombinedMask[Idx] = Idx;
+ }
+ for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
+ if (CombinedMask[Idx] != UndefMaskElem)
+ CombinedMask[Idx] = Idx;
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ InstructionCost InsertCost = TTI->getScalarizationOverhead(
+ cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
+ /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost -= InsertCost;
+ }
}
#ifndef NDEBUG
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
deleted file mode 100644
index fa4fd9d75bdb..000000000000
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-shuffle.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-
-define void @b() {
-; CHECK-LABEL: @b(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> poison, float 0x7FF8000000000000, i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0x7FF8000000000000, i32 3
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP2]], <4 x float> zeroinitializer, <4 x float> zeroinitializer)
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[TMP3]], <float undef, float undef, float undef, float 2.000000e+00>
-; CHECK-NEXT: [[TMP5:%.*]] = fdiv <4 x float> [[TMP4]], zeroinitializer
-; CHECK-NEXT: store <4 x float> [[TMP5]], ptr undef, align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %mul = fmul float undef, 2.000000e+00
- %i = tail call float @llvm.fmuladd.f32(float %mul, float 0.000000e+00, float 0.000000e+00)
- %mul2 = fmul float undef, %i
- %add = fadd float undef, 1.000000e+00
- %neg = fneg float %add
- %i1 = tail call float @llvm.fmuladd.f32(float %neg, float 0.000000e+00, float 0.000000e+00)
- %mul4 = fmul float undef, %i1
- %neg7 = fneg float %mul
- %i2 = tail call float @llvm.fmuladd.f32(float %neg7, float 0.000000e+00, float 0.000000e+00)
- %mul8 = fmul float undef, %i2
- %i3 = tail call float @llvm.fmuladd.f32(float %add, float 0.000000e+00, float 0.000000e+00)
- %mul11 = fmul float %i3, 2.000000e+00
- %div = fdiv float %mul2, 0.000000e+00
- store float %div, ptr undef, align 4
- %div12 = fdiv float %mul4, 0.000000e+00
- %arrayidx13 = getelementptr inbounds float, ptr undef, i64 1
- store float %div12, ptr %arrayidx13, align 4
- %div14 = fdiv float %mul8, 0.000000e+00
- %arrayidx15 = getelementptr inbounds float, ptr undef, i64 2
- store float %div14, ptr %arrayidx15, align 4
- %div16 = fdiv float %mul11, 0.000000e+00
- %arrayidx17 = getelementptr inbounds float, ptr undef, i64 3
- store float %div16, ptr %arrayidx17, align 4
- ret void
-}
-
-declare float @llvm.fmuladd.f32(float, float, float)
-
-define void @test(float %a) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float [[A]], i32 1
-; CHECK-NEXT: br label [[LOOP:%.*]]
-; CHECK: loop:
-; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
-; CHECK-NEXT: [[AGG:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP3]], i64 1
-; CHECK-NEXT: br label [[LOOP]]
-;
-entry:
- br label %loop
-
-loop:
- %add.i157 = fadd float 0.000000e+00, %a
- %add23.i = fadd float 0.000000e+00, %a
- %insert = insertelement <2 x float> zeroinitializer, float %add.i157, i64 0
- %insert.i = insertelement <2 x float> %insert, float %add23.i, i64 1
- %agg = insertelement <2 x float> %insert.i, float %add.i157, i64 1
- br label %loop
-}
-
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index bbd71825f96c..f529d0fc4733 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -11,27 +11,25 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
+; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
+; CHECK-NEXT: [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
+; CHECK-NEXT: [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[TMP6]], i32 0
+; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
+; CHECK-NEXT: [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index cd05b940be22..9265ca1731a0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -11,27 +11,25 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
; CHECK-NEXT: [[TAB2:%.*]] = alloca [256 x i32], align 16
; CHECK-NEXT: br label [[BB1:%.*]]
; CHECK: bb1:
+; CHECK-NEXT: [[MUL19:%.*]] = fmul double [[P1:%.*]], 1.638400e+04
; CHECK-NEXT: [[MUL20:%.*]] = fmul double [[P3:%.*]], 1.638400e+04
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P1:%.*]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P2:%.*]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> <double 0.000000e+00, double poison>, double [[ADD]], i32 1
+; CHECK-NEXT: [[MUL21:%.*]] = fmul double [[P2:%.*]], 1.638400e+04
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
-; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0
+; CHECK-NEXT: [[T_0259:%.*]] = phi double [ 0.000000e+00, [[BB1]] ], [ [[ADD27:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[P3_ADDR_0258:%.*]] = phi double [ [[ADD]], [[BB1]] ], [ [[ADD28:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[T_0259]], i32 0
; CHECK-NEXT: [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
-; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[TMP6]], i32 0
+; CHECK-NEXT: [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[P3_ADDR_0258]], i32 0
; CHECK-NEXT: [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
; CHECK-NEXT: store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
+; CHECK-NEXT: [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
; CHECK-NEXT: [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
index a0254043a91e..1589efe6553c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extracts-with-undefs.ll
@@ -6,26 +6,28 @@ define void @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
-; CHECK-NEXT: [[PHI1:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ 0.000000e+00, [[BODY]] ]
-; CHECK-NEXT: [[PHI2:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ 0.000000e+00, [[BODY]] ]
-; CHECK-NEXT: [[MUL_I478_I:%.*]] = fmul fast double [[PHI1]], 0.000000e+00
-; CHECK-NEXT: [[MUL7_I485_I:%.*]] = fmul fast double undef, 0.000000e+00
-; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[MUL_I478_I]], [[MUL7_I485_I]]
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x double> [ zeroinitializer, [[ENTRY:%.*]] ], [ zeroinitializer, [[BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP1]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT: [[ADD8_I_I:%.*]] = fadd fast double [[TMP5]], [[TMP4]]
; CHECK-NEXT: [[CMP42_I:%.*]] = fcmp fast ole double [[ADD8_I_I]], 0.000000e+00
; CHECK-NEXT: br i1 false, label [[BODY]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: br i1 false, label [[IF_THEN135_I:%.*]], label [[IF_END209_I:%.*]]
; CHECK: if.then135.i:
-; CHECK-NEXT: [[CMP145_I:%.*]] = fcmp fast olt double [[PHI1]], 0.000000e+00
-; CHECK-NEXT: [[CMP152_I:%.*]] = fcmp fast olt double [[PHI2]], 0.000000e+00
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i1> <i1 poison, i1 false>, i1 [[CMP152_I]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[TMP0]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = fcmp fast olt <2 x double> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i1> <i1 poison, i1 false>, i1 [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x double> zeroinitializer, <2 x double> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <2 x double> zeroinitializer, [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[TMP11]], zeroinitializer
; CHECK-NEXT: br label [[IF_END209_I]]
; CHECK: if.end209.i:
-; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x double> [ [[TMP4]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
+; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x double> [ [[TMP12]], [[IF_THEN135_I]] ], [ zeroinitializer, [[EXIT]] ]
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list