[llvm] 9b12975 - Revert "[SLP]Improve/fix reordering of the gathered graph nodes."
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 27 07:32:38 PDT 2021
Author: Alexey Bataev
Date: 2021-10-27T07:31:36-07:00
New Revision: 9b12975cbf32d88a811a81cec3e7394679b62cf5
URL: https://github.com/llvm/llvm-project/commit/9b12975cbf32d88a811a81cec3e7394679b62cf5
DIFF: https://github.com/llvm/llvm-project/commit/9b12975cbf32d88a811a81cec3e7394679b62cf5.diff
LOG: Revert "[SLP]Improve/fix reordering of the gathered graph nodes."
This reverts commit f719b794bcaa1df8fa82659d6d4e754c77d2f94e to fix
instability in tests.
Added:
Modified:
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 08d0d059d9937..63bc1faed0dee 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -766,12 +766,6 @@ class BoUpSLP {
/// Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// Checks if the specified gather tree entry \p TE can be represented as a
- /// shuffled vector entry + (possibly) permutation with other gathers. It
- /// implements the checks only for possibly ordered scalars (Loads,
- /// ExtractElement, ExtractValue), which can be part of the graph.
- Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
-
/// Reorders the current graph to the most profitable order starting from the
/// root node to the leaf nodes. The best order is chosen only from the nodes
/// of the same size (vectorization factor). Smaller nodes are considered
@@ -2676,64 +2670,6 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
fixupOrderingIndices(Order);
}
-Optional<BoUpSLP::OrdersType>
-BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
- assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
- unsigned NumScalars = TE.Scalars.size();
- OrdersType CurrentOrder(NumScalars, NumScalars);
- SmallVector<int> Positions;
- SmallBitVector UsedPositions(NumScalars);
- const TreeEntry *STE = nullptr;
- bool IsIdentity = false;
- // Try to find all gathered scalars that are gets vectorized in other
- // vectorize node. Here we can have only one single tree vector node to
- // correctly identify order of the gathered scalars.
- for (unsigned I = 0; I < NumScalars; ++I) {
- Value *V = TE.Scalars[I];
- if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
- continue;
- if (const auto *LocalSTE = getTreeEntry(V)) {
- if (!STE)
- STE = LocalSTE;
- else if (STE != LocalSTE)
- // Take the order only from the single vector node.
- return None;
- unsigned Lane =
- std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
- if (Lane >= NumScalars)
- return None;
- // The partial identity (where only some elements of the gather node are
- // in the identity order) is good.
- IsIdentity |= Lane == I;
- if (CurrentOrder[Lane] != NumScalars)
- continue;
- CurrentOrder[Lane] = I;
- UsedPositions.set(I);
- }
- }
- // Need to keep the order if we have a vector entry and at least 2 scalars.
- if (STE && UsedPositions.count() > 1) {
- if (IsIdentity) {
- CurrentOrder.clear();
- return CurrentOrder;
- }
- auto *It = CurrentOrder.begin();
- for (unsigned I = 0; I < NumScalars;) {
- if (UsedPositions.test(I)) {
- ++I;
- continue;
- }
- if (*It == NumScalars) {
- *It = I;
- ++I;
- }
- ++It;
- }
- return CurrentOrder;
- }
- return None;
-}
-
void BoUpSLP::reorderTopToBottom() {
// Maps VF to the graph nodes.
DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
@@ -2753,29 +2689,19 @@ void BoUpSLP::reorderTopToBottom() {
InsertElementInst>(TE->getMainOp()) &&
!TE->isAltShuffle()) {
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
- return;
- }
- if (TE->State == TreeEntry::NeedToGather) {
- if (TE->getOpcode() == Instruction::ExtractElement &&
- !TE->isAltShuffle() &&
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
- ->getVectorOperandType()) &&
- allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
- // Check that gather of extractelements can be represented as
- // just a shuffle of a single vector.
- OrdersType CurrentOrder;
- bool Reuse =
- canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
- if (Reuse || !CurrentOrder.empty()) {
- VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
- GathersToOrders.try_emplace(TE.get(), CurrentOrder);
- return;
- }
- }
- if (Optional<OrdersType> CurrentOrder =
- findReusedOrderedScalars(*TE.get())) {
+ } else if (TE->State == TreeEntry::NeedToGather &&
+ TE->getOpcode() == Instruction::ExtractElement &&
+ !TE->isAltShuffle() &&
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
+ ->getVectorOperandType()) &&
+ allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector.
+ OrdersType CurrentOrder;
+ bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
+ if (Reuse || !CurrentOrder.empty()) {
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
- GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ GathersToOrders.try_emplace(TE.get(), CurrentOrder);
}
}
});
@@ -2904,8 +2830,6 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
&NonVectorized](
const std::unique_ptr<TreeEntry> &TE) {
- if (TE->State != TreeEntry::Vectorize)
- NonVectorized.push_back(TE.get());
// No need to reorder if need to shuffle reuses, still need to shuffle the
// node.
if (!TE->ReuseShuffleIndices.empty())
@@ -2914,37 +2838,28 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
!TE->isAltShuffle()) {
OrderedEntries.insert(TE.get());
- return;
- }
- if (TE->State == TreeEntry::NeedToGather) {
- if (TE->getOpcode() == Instruction::ExtractElement &&
- !TE->isAltShuffle() &&
- isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
- ->getVectorOperandType()) &&
- allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
- // Check that gather of extractelements can be represented as
- // just a shuffle of a single vector with a single user only.
- OrdersType CurrentOrder;
- bool Reuse =
- canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
- if ((Reuse || !CurrentOrder.empty()) &&
- !any_of(VectorizableTree,
- [&TE](const std::unique_ptr<TreeEntry> &Entry) {
- return Entry->State == TreeEntry::NeedToGather &&
- Entry.get() != TE.get() &&
- Entry->isSame(TE->Scalars);
- })) {
- OrderedEntries.insert(TE.get());
- GathersToOrders.try_emplace(TE.get(), CurrentOrder);
- return;
- }
- }
- if (Optional<OrdersType> CurrentOrder =
- findReusedOrderedScalars(*TE.get())) {
+ } else if (TE->State == TreeEntry::NeedToGather &&
+ TE->getOpcode() == Instruction::ExtractElement &&
+ !TE->isAltShuffle() &&
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
+ ->getVectorOperandType()) &&
+ allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector with a single user only.
+ OrdersType CurrentOrder;
+ bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
+ if ((Reuse || !CurrentOrder.empty()) &&
+ !any_of(
+ VectorizableTree, [&TE](const std::unique_ptr<TreeEntry> &Entry) {
+ return Entry->State == TreeEntry::NeedToGather &&
+ Entry.get() != TE.get() && Entry->isSame(TE->Scalars);
+ })) {
OrderedEntries.insert(TE.get());
- GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ GathersToOrders.try_emplace(TE.get(), CurrentOrder);
}
}
+ if (TE->State != TreeEntry::Vectorize)
+ NonVectorized.push_back(TE.get());
});
// Checks if the operands of the users are reordarable and have only single
@@ -2996,7 +2911,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
for (TreeEntry *TE : OrderedEntries) {
if (!(TE->State == TreeEntry::Vectorize ||
(TE->State == TreeEntry::NeedToGather &&
- GathersToOrders.count(TE))) ||
+ TE->getOpcode() == Instruction::ExtractElement)) ||
TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
!all_of(drop_begin(TE->UserTreeIndices),
[TE](const EdgeInfo &EI) {
@@ -3117,13 +3032,10 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
}
// For gathers just need to reorder its scalars.
for (TreeEntry *Gather : GatherOps) {
+ if (!Gather->ReuseShuffleIndices.empty())
+ continue;
assert(Gather->ReorderIndices.empty() &&
"Unexpected reordering of gathers.");
- if (!Gather->ReuseShuffleIndices.empty()) {
- // Just reorder reuses indices.
- reorderReuses(Gather->ReuseShuffleIndices, Mask);
- continue;
- }
reorderScalars(Gather->Scalars, Mask);
OrderedEntries.remove(Gather);
}
@@ -7457,7 +7369,9 @@ struct SLPVectorizer : public FunctionPass {
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
- bool doInitialization(Module &M) override { return false; }
+ bool doInitialization(Module &M) override {
+ return false;
+ }
bool runOnFunction(Function &F) override {
if (skipFunction(F))
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index 16fd83f4b2ec2..96b143cfc4527 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -32,19 +32,21 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
; CHECK-LABEL: @store_chain_v2i64(
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
+; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
+; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
+; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
+; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
+; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
+; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
+; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
+; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
+; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
+; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
+; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
+; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
+; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
+; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
+; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
; CHECK-NEXT: ret void
;
%a.0 = getelementptr i64, i64* %a, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index b4ed8604e2f85..34b32f2265521 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -32,19 +32,21 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
; CHECK-LABEL: @store_chain_v2i64(
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
-; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
-; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
-; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
+; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
+; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
+; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
+; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
+; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
+; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
+; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
+; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
+; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
+; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
+; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
+; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
+; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
+; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
+; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
; CHECK-NEXT: ret void
;
%a.0 = getelementptr i64, i64* %a, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
index e1c38e398150a..11e313bdbe6fb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load.ll
@@ -69,23 +69,22 @@ define i32 @jumbled-load-multiuses(i32* noalias nocapture %in, i32* noalias noca
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[IN_ADDR]], i64 2
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IN_ADDR]] to <4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 2
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP2]], i32 2
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP7]], i32 2
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP9]], i32 3
-; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP10]]
+; CHECK-NEXT: [[TMP11:%.*]] = mul <4 x i32> [[TMP2]], [[TMP10]]
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 0
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 1
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 2
; CHECK-NEXT: [[GEP_10:%.*]] = getelementptr inbounds i32, i32* [[OUT]], i64 3
-; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[GEP_7]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP12]], align 4
+; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP12]], align 4
; CHECK-NEXT: ret i32 undef
;
%in.addr = getelementptr inbounds i32, i32* %in, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index 96502d44acee4..623fb602279d7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -600,18 +600,21 @@ define void @ChecksExtractScores_
diff erent_vectors(double* %storeArray, double*
; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
-; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[SHUFFLE]]
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]
; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
-; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
+; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
; CHECK-NEXT: ret void
;
%idx0 = getelementptr inbounds double, double* %array, i64 0
More information about the llvm-commits
mailing list