[llvm] 0ad87ff - [SLP]Introduce shuffle of the nodes + gather/vectorbuild of the remaining scalars.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 17 11:21:50 PDT 2023


Author: Alexey Bataev
Date: 2023-03-17T11:18:36-07:00
New Revision: 0ad87ffdcc2379f1156651d1953a162379e2de8c

URL: https://github.com/llvm/llvm-project/commit/0ad87ffdcc2379f1156651d1953a162379e2de8c
DIFF: https://github.com/llvm/llvm-project/commit/0ad87ffdcc2379f1156651d1953a162379e2de8c.diff

LOG: [SLP]Introduce shuffle of the nodes + gather/vectorbuild  of the remaining scalars.

Currently compiler does not support mixing of shuffled nodes
+ gather/buildvector of the remaining scalar values. It may reduce total
  number of instructions and improve performance of the
  gather/buildvector sequences.

Part of D110978

Differential Revision: https://reviews.llvm.org/D146167

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
    llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
    llvm/test/Transforms/SLPVectorizer/X86/cse.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
    llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
    llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
    llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
    llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
    llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
    llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7bc9a4ff548d3..9da2fd4bbabe5 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2489,8 +2489,9 @@ class BoUpSLP {
   /// the bundle
   void setInsertPointAfterBundle(const TreeEntry *E);
 
-  /// \returns a vector from a collection of scalars in \p VL.
-  Value *gather(ArrayRef<Value *> VL);
+  /// \returns a vector from a collection of scalars in \p VL. if \p Root is not
+  /// specified, the starting vector value is poison.
+  Value *gather(ArrayRef<Value *> VL, Value *Root = nullptr);
 
   /// \returns whether the VectorizableTree is fully vectorizable and will
   /// be beneficial even the tree height is tiny.
@@ -6596,9 +6597,10 @@ class BaseShuffleAnalysis {
         LocalVF = SVOpTy->getNumElements();
       SmallVector<int> ExtMask(Mask.size(), UndefMaskElem);
       for (auto [Idx, I] : enumerate(Mask)) {
-         if (I == UndefMaskElem)
-           continue;
-         ExtMask[Idx] = SV->getMaskValue(I);
+        if (I == UndefMaskElem ||
+            static_cast<unsigned>(I) >= SV->getShuffleMask().size())
+          continue;
+        ExtMask[Idx] = SV->getMaskValue(I);
       }
       bool IsOp1Undef =
           isUndefVector(SV->getOperand(0),
@@ -8922,7 +8924,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
   Builder.SetCurrentDebugLocation(Front->getDebugLoc());
 }
 
-Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
+Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
   // List of instructions/lanes from current block and/or the blocks which are
   // part of the current loop. These instructions will be inserted at the end to
   // make it possible to optimize loops and hoist invariant instructions out of
@@ -8939,7 +8941,8 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
   for (int I = 0, E = VL.size(); I < E; ++I) {
     if (auto *Inst = dyn_cast<Instruction>(VL[I]))
       if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
-           getTreeEntry(Inst) || (L && (L->contains(Inst)))) &&
+           getTreeEntry(Inst) ||
+           (L && (!Root || L->isLoopInvariant(Root)) && L->contains(Inst))) &&
           PostponedIndices.insert(I).second)
         PostponedInsts.emplace_back(Inst, I);
   }
@@ -8962,7 +8965,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
   Value *Val0 =
       isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
   FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
-  Value *Vec = PoisonValue::get(VecTy);
+  Value *Vec = Root ? Root : PoisonValue::get(VecTy);
   SmallVector<int> NonConsts;
   // Insert constant values at first.
   for (int I = 0, E = VL.size(); I < E; ++I) {
@@ -8972,6 +8975,18 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
       NonConsts.push_back(I);
       continue;
     }
+    if (Root) {
+      if (!isa<UndefValue>(VL[I])) {
+        NonConsts.push_back(I);
+        continue;
+      }
+      if (isa<PoisonValue>(VL[I]))
+        continue;
+      if (auto *SV = dyn_cast<ShuffleVectorInst>(Root)) {
+        if (SV->getMaskValue(I) == UndefMaskElem)
+          continue;
+      }
+    }
     Vec = CreateInsertElement(Vec, VL[I], I);
   }
   // Insert non-constant values.
@@ -9210,9 +9225,34 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
     add(V1, NewMask);
   }
   /// Finalize emission of the shuffles.
+  /// \param Action the action (if any) to be performed before final applying of
+  /// the \p ExtMask mask.
   Value *
-  finalize(ArrayRef<int> ExtMask = std::nullopt) {
+  finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
+           function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
     IsFinalized = true;
+    if (Action) {
+      Value *Vec = InVectors.front();
+      if (InVectors.size() == 2) {
+        Vec = createShuffle(Vec, InVectors.back(), CommonMask);
+        InVectors.pop_back();
+      } else {
+        Vec = createShuffle(Vec, nullptr, CommonMask);
+      }
+      for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
+        if (CommonMask[Idx] != UndefMaskElem)
+          CommonMask[Idx] = Idx;
+      assert(VF > 0 &&
+             "Expected vector length for the final value before action.");
+      unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
+      if (VecVF < VF) {
+        SmallVector<int> ResizeMask(VF, UndefMaskElem);
+        std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), VecVF), 0);
+        Vec = createShuffle(Vec, nullptr, ResizeMask);
+      }
+      Action(Vec, CommonMask);
+      InVectors.front() = Vec;
+    }
     if (!ExtMask.empty()) {
       if (CommonMask.empty()) {
         CommonMask.assign(ExtMask.begin(), ExtMask.end());
@@ -9378,7 +9418,33 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
   inversePermutation(E->ReorderIndices, ReorderMask);
   if (!ReorderMask.empty())
     reorderScalars(GatheredScalars, ReorderMask);
-
+  auto FindReusedSplat = [&](SmallVectorImpl<int> &Mask) {
+    if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
+          return isa<UndefValue>(V) && !isa<PoisonValue>(V);
+        }))
+      return false;
+    TreeEntry *UserTE = E->UserTreeIndices.back().UserTE;
+    unsigned EdgeIdx = E->UserTreeIndices.back().EdgeIdx;
+    if (UserTE->getNumOperands() != 2)
+      return false;
+    auto *It =
+        find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
+          return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
+                   return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
+                 }) != TE->UserTreeIndices.end();
+        });
+    if (It == VectorizableTree.end())
+      return false;
+    unsigned I =
+        *find_if_not(Mask, [](int Idx) { return Idx == UndefMaskElem; });
+    int Sz = Mask.size();
+    if (all_of(Mask, [Sz](int Idx) { return Idx < 2 * Sz; }) &&
+        ShuffleVectorInst::isIdentityMask(Mask))
+      std::iota(Mask.begin(), Mask.end(), 0);
+    else
+      std::fill(Mask.begin(), Mask.end(), I);
+    return true;
+  };
   ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
   Value *Vec = nullptr;
   SmallVector<int> Mask;
@@ -9434,8 +9500,9 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
       }
     }
   }
-  if ((ExtractShuffle || GatherShuffle) &&
-      all_of(GatheredScalars, PoisonValue::classof)) {
+  if (ExtractShuffle || GatherShuffle) {
+    bool IsNonPoisoned = true;
+    bool IsUsedInExpr = false;
     Value *Vec1 = nullptr;
     if (ExtractShuffle) {
       // Gather of extractelements can be represented as just a shuffle of
@@ -9459,22 +9526,97 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
           Vec2 = EI->getVectorOperand();
         }
       }
-      if (Vec2)
+      if (Vec2) {
         ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
-      else if (Vec1)
+      } else if (Vec1) {
         ShuffleBuilder.add(Vec1, ExtractMask);
-      else
+        IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
+        IsUsedInExpr = FindReusedSplat(ExtractMask);
+      } else {
         ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
                                ScalarTy, GatheredScalars.size())),
                            ExtractMask);
+      }
     }
     if (GatherShuffle) {
-      if (Entries.size() == 1)
+      if (Entries.size() == 1) {
         ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
-      else
+        IsNonPoisoned &=
+            isGuaranteedNotToBePoison(Entries.front()->VectorizedValue);
+        IsUsedInExpr = FindReusedSplat(Mask);
+      } else {
         ShuffleBuilder.add(Entries.front()->VectorizedValue,
                            Entries.back()->VectorizedValue, Mask);
+      }
     }
+    // Try to figure out best way to combine values: build a shuffle and insert
+    // elements or just build several shuffles.
+    // Insert non-constant scalars.
+    SmallVector<Value *> NonConstants(GatheredScalars);
+    int EMSz = ExtractMask.size();
+    int MSz = Mask.size();
+    // Try to build constant vector and shuffle with it only if currently we
+    // have a single permutation and more than 1 scalar constants.
+    bool IsSingleShuffle = !ExtractShuffle || !GatherShuffle;
+    bool IsIdentityShuffle =
+        (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) ==
+             TTI::SK_PermuteSingleSrc &&
+         none_of(ExtractMask, [&](int I) { return I >= EMSz; }) &&
+         ShuffleVectorInst::isIdentityMask(ExtractMask)) ||
+        (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) ==
+             TTI::SK_PermuteSingleSrc &&
+         none_of(Mask, [&](int I) { return I >= MSz; }) &&
+         ShuffleVectorInst::isIdentityMask(Mask));
+    bool EnoughConstsForShuffle =
+        IsSingleShuffle &&
+        (none_of(GatheredScalars,
+                 [](Value *V) {
+                   return isa<UndefValue>(V) && !isa<PoisonValue>(V);
+                 }) ||
+         any_of(GatheredScalars,
+                [](Value *V) {
+                  return isa<Constant>(V) && !isa<UndefValue>(V);
+                })) &&
+        (!IsIdentityShuffle ||
+         (GatheredScalars.size() == 2 &&
+          any_of(GatheredScalars,
+                 [](Value *V) { return !isa<UndefValue>(V); })) ||
+         count_if(GatheredScalars, [](Value *V) {
+           return isa<Constant>(V) && !isa<PoisonValue>(V);
+         }) > 1);
+    // NonConstants array contains just non-constant values, GatheredScalars
+    // contains only constant to build final vector and then shuffle.
+    for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) {
+      if (EnoughConstsForShuffle && isa<Constant>(GatheredScalars[I]))
+        NonConstants[I] = PoisonValue::get(ScalarTy);
+      else
+        GatheredScalars[I] = PoisonValue::get(ScalarTy);
+    }
+    // Generate constants for final shuffle and build a mask for them.
+    if (!all_of(GatheredScalars, PoisonValue::classof)) {
+      SmallVector<int> BVMask(GatheredScalars.size(), UndefMaskElem);
+      Value *BV = gather(GatheredScalars);
+      for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) {
+        if (!isa<PoisonValue>(GatheredScalars[I]))
+          BVMask[I] = I;
+      }
+      ShuffleBuilder.add(BV, BVMask);
+    }
+    if (all_of(NonConstants, [=](Value *V) {
+          return isa<PoisonValue>(V) ||
+                 (IsSingleShuffle && ((IsIdentityShuffle &&
+                  IsNonPoisoned) || IsUsedInExpr) && isa<UndefValue>(V));
+        }))
+      Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
+    else
+      Vec = ShuffleBuilder.finalize(
+          E->ReuseShuffleIndices, E->Scalars.size(),
+          [&](Value *&Vec, SmallVectorImpl<int> &Mask) {
+            Vec = gather(NonConstants, Vec);
+            for (unsigned I = 0, Sz = Mask.size(); I < Sz; ++I)
+              if (!isa<PoisonValue>(NonConstants[I]))
+                Mask[I] = I;
+          });
   } else if (!allConstant(E->Scalars)) {
     // TODO: remove this code once able to combine shuffled vectors and build
     // vector elements.
@@ -9570,13 +9712,14 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
     // Gather unique scalars and all constants.
     Vec = gather(GatheredScalars);
     ShuffleBuilder.add(Vec, ReuseMask);
+    Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
   } else {
     // Gather all constants.
     Vec = gather(E->Scalars);
     ShuffleBuilder.add(Vec, ReuseMask);
+    Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
   }
 
-  Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
   if (NeedFreeze)
     Vec = Builder.CreateFreeze(Vec);
   return Vec;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
index 6b104f33e0c6a..b7f161b056fe2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
@@ -67,23 +67,23 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde
 ; CHECK-NEXT:    [[MUL88:%.*]] = fmul double [[TMP4]], 2.000000e+00
 ; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <2 x double> poison, double [[FNEG87]], i32 0
 ; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <2 x double> [[TMP26]], double [[CALL]], i32 1
-; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <2 x double> poison, double [[CALL]], i32 0
+; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <2 x double> [[TMP27]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <2 x double> [[TMP28]], double [[TMP12]], i32 1
 ; CHECK-NEXT:    [[TMP30:%.*]] = fsub <2 x double> [[TMP27]], [[TMP29]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <2 x double> poison, double [[MUL88]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP32:%.*]] = fdiv <2 x double> [[TMP30]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x double> [[TMP32]], i32 1
-; CHECK-NEXT:    [[CMP93:%.*]] = fcmp olt double [[TMP33]], 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[TMP32]], i32 0
-; CHECK-NEXT:    [[CMP94:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D
+; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <2 x double> [[TMP31]], <2 x double> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP33:%.*]] = fdiv <2 x double> [[TMP30]], [[TMP32]]
+; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[TMP33]], i32 1
+; CHECK-NEXT:    [[CMP93:%.*]] = fcmp olt double [[TMP34]], 0x3EB0C6F7A0B5ED8D
+; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i32 0
+; CHECK-NEXT:    [[CMP94:%.*]] = fcmp olt double [[TMP35]], 0x3EB0C6F7A0B5ED8D
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]]
 ; CHECK:       lor.lhs.false:
-; CHECK-NEXT:    [[TMP35:%.*]] = fcmp ule <2 x double> [[TMP32]], <double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <2 x i1> [[TMP35]], i32 0
-; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x i1> [[TMP35]], i32 1
-; CHECK-NEXT:    [[OR_COND106:%.*]] = select i1 [[TMP37]], i1 true, i1 [[TMP36]]
+; CHECK-NEXT:    [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0
+; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1
+; CHECK-NEXT:    [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]]
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = zext i1 [[OR_COND106]] to i32
 ; CHECK-NEXT:    br label [[CLEANUP]]
 ; CHECK:       cleanup:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
index 8f592783d8d76..4ebb78017db1c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -95,11 +95,10 @@ define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
 ; AVX-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
 ; AVX-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; AVX-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
-; AVX-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[B:%.*]], i32 1
-; AVX-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[C]], i32 2
-; AVX-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
-; AVX-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP5]], [[TMP8]]
-; AVX-NEXT:    store <4 x i32> [[TMP9]], ptr @cle32, align 16
+; AVX-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 undef, i32 4, i32 0>
+; AVX-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[B:%.*]], i32 1
+; AVX-NEXT:    [[TMP8:%.*]] = xor <4 x i32> [[TMP5]], [[TMP7]]
+; AVX-NEXT:    store <4 x i32> [[TMP8]], ptr @cle32, align 16
 ; AVX-NEXT:    ret void
 ;
   %add1 = add i32 %c, %a

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
index f3c9847f55b14..4eb63cbd44d5a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_clear_undefs.ll
@@ -5,23 +5,19 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 define i1 @foo() {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr null, align 4
-; CHECK-NEXT:    br i1 false, label [[TMP15:%.*]], label [[TMP2:%.*]]
+; CHECK-NEXT:    br i1 false, label [[TMP11:%.*]], label [[TMP2:%.*]]
 ; CHECK:       2:
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> zeroinitializer, i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x float> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> zeroinitializer, <4 x float> [[TMP7]], <4 x float> [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fsub <4 x float> [[TMP11]], zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = fadd <4 x float> [[TMP11]], zeroinitializer
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    br label [[TMP15]]
-; CHECK:       15:
-; CHECK-NEXT:    [[TMP16:%.*]] = phi <4 x float> [ [[TMP14]], [[TMP2]] ], [ zeroinitializer, [[TMP0:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> zeroinitializer, <4 x float> [[TMP5]], <4 x float> [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = fsub <4 x float> [[TMP7]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <4 x float> [[TMP7]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    br label [[TMP11]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi <4 x float> [ [[TMP10]], [[TMP2]] ], [ zeroinitializer, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    ret i1 false
 ;
   %1 = load float, ptr null, align 4

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
index f110f2e760393..12b227cbeb3d3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -23,7 +23,7 @@ define i32 @fn1() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr @c, align 4
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[TMP2]], 7
 ; CHECK-NEXT:    store i32 [[AND]], ptr @a, align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> <i32 poison, i32 0>, <2 x i32> [[TMP0]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    switch i32 [[AND]], label [[IF_END:%.*]] [
 ; CHECK-NEXT:    i32 7, label [[SAVE_STATE_AND_RETURN]]
 ; CHECK-NEXT:    i32 0, label [[SAVE_STATE_AND_RETURN]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index f2acfddb659c6..837b4ffa68260 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -99,15 +99,7 @@ define void @test() {
 ; CHECK-NEXT:    br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
 ; CHECK:       if.then38:
 ; CHECK-NEXT:    [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> <double 6.000000e-01, double poison>, double 6.000000e-02, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> <double 5.000000e-01, double 8.000000e-01>, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> <double 2.400000e-02, double 0.000000e+00>, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> <double 9.000000e-01, double 9.100000e-01>, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 9.200000e-01, double 9.300000e-01>, [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> <double 0x3FEE147AE147AE14, double 0x3FEE666666666666>, [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> <double 0x3FEEB851EB851EB8, double 0x3FEF0A3D70A3D70A>, [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> <double 0x3FEF5C28F5C28F5C, double 0x3FEFAE147AE147AE>, [[TMP6]]
-; CHECK-NEXT:    store <2 x double> [[TMP7]], ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8
+; CHECK-NEXT:    store <2 x double> <double 0x3FFA356C1D8A7F76, double 0x3FFDC4F38B38BEF4>, ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8
 ; CHECK-NEXT:    br label [[IF_THEN78]]
 ; CHECK:       if.then78:
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index a520ddd679dc8..b5602c795f42b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -19,14 +19,12 @@ define i32 @test(ptr nocapture %G) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], <double 4.000000e+00, double 3.000000e+00>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 1.000000e+00, double 6.000000e+00>
 ; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[G]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds double, ptr [[G]], i64 2
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
-; CHECK-NEXT:    [[MUL11:%.*]] = fmul double [[TMP4]], 4.000000e+00
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[MUL11]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 7.000000e+00, double 8.000000e+00>
-; CHECK-NEXT:    store <2 x double> [[TMP7]], ptr [[ARRAYIDX9]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP0]], i32 1
+; CHECK-NEXT:    [[MUL11:%.*]] = fmul double [[TMP3]], 4.000000e+00
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP1]], double [[MUL11]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], <double 7.000000e+00, double 8.000000e+00>
+; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[ARRAYIDX9]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index a616fd5f7c197..6adcf21a4bc63 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -9,11 +9,10 @@ define i32 @fn1() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (ptr @fn1 to i32), i32 poison>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], ptr @a, align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 8, i32 poison, i32 ptrtoint (ptr @fn1 to i32), i32 ptrtoint (ptr @fn1 to i32)>, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr @a, align 4
 ; CHECK-NEXT:    ret i32 0
 ;
   entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
index adbeb638c1a46..52a84e6248f79 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll
@@ -527,11 +527,10 @@ define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
 ; SSE-LABEL: @foo(
 ; SSE-NEXT:    [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
 ; SSE-NEXT:    [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
-; SSE-NEXT:    [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
 ; SSE-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
 ; SSE-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1
-; SSE-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> poison, float [[SUB14_I167]], i32 0
-; SSE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[VECEXT_I276_I169]], i32 1
+; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> <i32 undef, i32 1>
+; SSE-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[SUB14_I167]], i32 0
 ; SSE-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
 ; SSE-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+01>, float [[B:%.*]], i32 0
 ; SSE-NEXT:    [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
index 118372d2d5898..a117fdc75bfdc 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -15,23 +15,20 @@ define i32 @bar() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 6
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 7
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 9
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 undef, i32 undef, i32 poison>, i32 [[SUB86_1]], i32 4
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 5
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 6
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 7
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 12
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12>
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <16 x i32> [[TMP12]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP14:%.*]] = and <16 x i32> [[TMP13]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP15:%.*]] = mul nuw <16 x i32> [[TMP14]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP16:%.*]] = add <16 x i32> [[TMP15]], [[TMP12]]
-; CHECK-NEXT:    [[TMP17:%.*]] = xor <16 x i32> [[TMP16]], [[TMP15]]
-; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP17]])
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP18]], 16
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 6, i32 5, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[SUB102_3]], i32 12
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_3]], i32 15
+; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP13:%.*]] = and <16 x i32> [[TMP12]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
+; CHECK-NEXT:    [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP15:%.*]] = add <16 x i32> [[TMP14]], [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i32> [[TMP15]], [[TMP14]]
+; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP16]])
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP17]], 16
 ; CHECK-NEXT:    [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]]
 ; CHECK-NEXT:    [[SHR120:%.*]] = lshr i32 [[ADD119]], 1
 ; CHECK-NEXT:    ret i32 [[SHR120]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
index d417dd275d62f..e5947dcdd0825 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll
@@ -187,9 +187,9 @@ define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr
 ; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[BB16:%.*]], label [[BB6:%.*]]
 ; CHECK:       bb6:
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[ARG1:%.*]], i32 3
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float [[ARG2:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
+; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x float> [[ARG:%.*]], <2 x float> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[ARG2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    ret void
@@ -226,9 +226,9 @@ define void @gather_sequence_crash(<2 x float> %arg, ptr %arg1, float %arg2, ptr
 ; CHECK-NEXT:    [[TMP38:%.*]] = fadd float 0.000000e+00, [[TMP37]]
 ; CHECK-NEXT:    store float [[TMP38]], ptr [[TMP35]], align 4
 ; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr float, ptr [[ARG4]], i64 1
-; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x float>, ptr [[TMP39]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x float> zeroinitializer, [[TMP7]]
-; CHECK-NEXT:    store <2 x float> [[TMP8]], ptr [[TMP39]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[TMP39]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x float> zeroinitializer, [[TMP4]]
+; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[TMP39]], align 4
 ; CHECK-NEXT:    [[TMP44:%.*]] = load float, ptr [[ARG3:%.*]], align 4
 ; CHECK-NEXT:    [[TMP45:%.*]] = load float, ptr [[ARG4]], align 4
 ; CHECK-NEXT:    [[TMP46:%.*]] = fadd float 0.000000e+00, [[TMP45]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
index 5bac13d87b48f..359c074d6a211 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -9,19 +9,19 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 define void @shuffle_operands1(ptr noalias %from, ptr noalias %to, double %v1, double %v2) {
 ; CHECK-LABEL: @shuffle_operands1(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[TO:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2:%.*]], i64 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @shuffle_operands1(
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
-; SSE2-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i64 1
-; SSE2-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    store <2 x double> [[TMP5]], ptr [[TO:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[V1:%.*]], i64 0
+; SSE2-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[V2:%.*]], i64 1
+; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
   %from_1 = getelementptr double, ptr %from, i64 1
@@ -41,11 +41,11 @@ define void @vecload_vs_broadcast(ptr noalias %from, ptr noalias %to, double %v1
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -55,11 +55,11 @@ define void @vecload_vs_broadcast(ptr noalias %from, ptr noalias %to, double %v1
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
-; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
-; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
-; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
+; SSE2-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP0]], [[TMP2]]
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -89,11 +89,11 @@ define void @vecload_vs_broadcast2(ptr noalias %from, ptr noalias %to, double %v
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -103,11 +103,11 @@ define void @vecload_vs_broadcast2(ptr noalias %from, ptr noalias %to, double %v
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
-; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
-; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
+; SSE2-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -137,11 +137,11 @@ define void @vecload_vs_broadcast3(ptr noalias %from, ptr noalias %to, double %v
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
 ; CHECK-NEXT:    ret void
@@ -151,11 +151,11 @@ define void @vecload_vs_broadcast3(ptr noalias %from, ptr noalias %to, double %v
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[P]], i64 0
-; SSE2-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> [[TMP1]], <2 x i32> <i32 0, i32 2>
-; SSE2-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], [[TMP1]]
-; SSE2-NEXT:    store <2 x double> [[TMP4]], ptr [[TO:%.*]], align 4
+; SSE2-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 0
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP0]]
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
 ; SSE2-NEXT:    ret void
@@ -185,10 +185,10 @@ define void @shuffle_nodes_match1(ptr noalias %from, ptr noalias %to, double %v1
 ; CHECK-NEXT:    br label [[LP:%.*]]
 ; CHECK:       lp:
 ; CHECK-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; CHECK-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; CHECK:       ext:
@@ -199,10 +199,10 @@ define void @shuffle_nodes_match1(ptr noalias %from, ptr noalias %to, double %v1
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
-; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
+; SSE2-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP1]]
 ; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
@@ -251,10 +251,10 @@ define void @vecload_vs_broadcast4(ptr noalias %from, ptr noalias %to, double %v
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
-; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[SHUFFLE]]
+; SSE2-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], [[TMP1]]
 ; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
@@ -304,10 +304,10 @@ define void @shuffle_nodes_match2(ptr noalias %from, ptr noalias %to, double %v1
 ; SSE2-NEXT:    br label [[LP:%.*]]
 ; SSE2:       lp:
 ; SSE2-NEXT:    [[P:%.*]] = phi double [ 1.000000e+00, [[LP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
-; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
-; SSE2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
-; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[P]], i64 1
-; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[SHUFFLE]], [[TMP2]]
+; SSE2-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[FROM:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[TMP0]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+; SSE2-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP0]], double [[P]], i64 1
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
 ; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[TO:%.*]], align 4
 ; SSE2-NEXT:    br i1 undef, label [[LP]], label [[EXT:%.*]]
 ; SSE2:       ext:
@@ -345,7 +345,7 @@ define void @good_load_order() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr @a, align 16
 ; CHECK-NEXT:    br label [[FOR_BODY3:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP14:%.*]], [[FOR_BODY3]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP12:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
@@ -355,20 +355,20 @@ define void @good_load_order() {
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 4
 ; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[TMP8]], [[TMP10]]
-; CHECK-NEXT:    store <4 x float> [[TMP11]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul <4 x float> [[TMP7]], [[TMP9]]
+; CHECK-NEXT:    store <4 x float> [[TMP10]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
-; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP13]]
-; CHECK-NEXT:    [[TMP14]] = load float, ptr [[ARRAYIDX41]], align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP8]], i64 3
-; CHECK-NEXT:    [[MUL45:%.*]] = fmul float [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP11]]
+; CHECK-NEXT:    [[TMP12]] = load float, ptr [[ARRAYIDX41]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP7]], i64 3
+; CHECK-NEXT:    [[MUL45:%.*]] = fmul float [[TMP12]], [[TMP13]]
 ; CHECK-NEXT:    store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP16]], 31995
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP14]], 31995
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -380,7 +380,7 @@ define void @good_load_order() {
 ; SSE2-NEXT:    [[TMP0:%.*]] = load float, ptr @a, align 16
 ; SSE2-NEXT:    br label [[FOR_BODY3:%.*]]
 ; SSE2:       for.body3:
-; SSE2-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP14:%.*]], [[FOR_BODY3]] ]
+; SSE2-NEXT:    [[TMP1:%.*]] = phi float [ [[TMP0]], [[FOR_COND1_PREHEADER]] ], [ [[TMP12:%.*]], [[FOR_BODY3]] ]
 ; SSE2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY3]] ]
 ; SSE2-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; SSE2-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
@@ -390,20 +390,20 @@ define void @good_load_order() {
 ; SSE2-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDVARS_IV]] to i32
 ; SSE2-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 4
 ; SSE2-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP6]]
-; SSE2-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
-; SSE2-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
-; SSE2-NEXT:    [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
-; SSE2-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[TMP8]], [[TMP10]]
-; SSE2-NEXT:    store <4 x float> [[TMP11]], ptr [[ARRAYIDX5]], align 4
+; SSE2-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
+; SSE2-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> poison, <4 x i32> <i32 undef, i32 0, i32 1, i32 2>
+; SSE2-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP1]], i64 0
+; SSE2-NEXT:    [[TMP10:%.*]] = fmul <4 x float> [[TMP7]], [[TMP9]]
+; SSE2-NEXT:    store <4 x float> [[TMP10]], ptr [[ARRAYIDX5]], align 4
 ; SSE2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 5
-; SSE2-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; SSE2-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP13]]
-; SSE2-NEXT:    [[TMP14]] = load float, ptr [[ARRAYIDX41]], align 4
-; SSE2-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP8]], i64 3
-; SSE2-NEXT:    [[MUL45:%.*]] = fmul float [[TMP14]], [[TMP15]]
+; SSE2-NEXT:    [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; SSE2-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [32000 x float], ptr @a, i32 0, i32 [[TMP11]]
+; SSE2-NEXT:    [[TMP12]] = load float, ptr [[ARRAYIDX41]], align 4
+; SSE2-NEXT:    [[TMP13:%.*]] = extractelement <4 x float> [[TMP7]], i64 3
+; SSE2-NEXT:    [[MUL45:%.*]] = fmul float [[TMP12]], [[TMP13]]
 ; SSE2-NEXT:    store float [[MUL45]], ptr [[ARRAYIDX31]], align 4
-; SSE2-NEXT:    [[TMP16:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; SSE2-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP16]], 31995
+; SSE2-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; SSE2-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP14]], 31995
 ; SSE2-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3]], label [[FOR_END:%.*]]
 ; SSE2:       for.end:
 ; SSE2-NEXT:    ret void
@@ -458,17 +458,17 @@ for.end:
 
 define void @load_reorder_double(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b){
 ; CHECK-LABEL: @load_reorder_double(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @load_reorder_double(
-; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
-; SSE2-NEXT:    [[TMP4:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
-; SSE2-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    store <2 x double> [[TMP5]], ptr [[C:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B:%.*]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[A:%.*]], align 4
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; SSE2-NEXT:    store <2 x double> [[TMP3]], ptr [[C:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
   %1 = load double, ptr %a
@@ -493,17 +493,17 @@ define void @load_reorder_double(ptr nocapture %c, ptr noalias nocapture readonl
 
 define void @load_reorder_float(ptr nocapture %c, ptr noalias nocapture readonly %a, ptr noalias nocapture readonly %b){
 ; CHECK-LABEL: @load_reorder_float(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @load_reorder_float(
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
-; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
-; SSE2-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    store <4 x float> [[TMP5]], ptr [[C:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; SSE2-NEXT:    store <4 x float> [[TMP3]], ptr [[C:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
   %1 = load float, ptr %a
@@ -542,21 +542,21 @@ define void @load_reorder_float(ptr nocapture %c, ptr noalias nocapture readonly
 
 define void @opcode_reorder(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c,ptr noalias nocapture readonly %d) {
 ; CHECK-LABEL: @opcode_reorder(
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], [[TMP5]]
-; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], [[TMP3]]
+; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; SSE2-LABEL: @opcode_reorder(
-; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
-; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
-; SSE2-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]]
-; SSE2-NEXT:    [[TMP7:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
-; SSE2-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], [[TMP5]]
-; SSE2-NEXT:    store <4 x float> [[TMP8]], ptr [[A:%.*]], align 4
+; SSE2-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4
+; SSE2-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4
+; SSE2-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
+; SSE2-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[D:%.*]], align 4
+; SSE2-NEXT:    [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], [[TMP3]]
+; SSE2-NEXT:    store <4 x float> [[TMP5]], ptr [[A:%.*]], align 4
 ; SSE2-NEXT:    ret void
 ;
   %1 = load float, ptr %b

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index 4f2c95645a753..57a92385fb125 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -26,11 +26,11 @@ define i32 @foo(ptr nocapture %A, i32 %k) {
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.else:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 10
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[IF_ELSE]] ], [ <double 3.000000e+00, double 5.000000e+00>, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    store <2 x double> [[TMP2]], ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ [[TMP0]], [[IF_ELSE]] ], [ <double 3.000000e+00, double 5.000000e+00>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[A]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
@@ -73,19 +73,19 @@ if.end:                                           ; preds = %entry, %if.else
 define i32 @foo2(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32 %m) #0 {
 ; CHECK-LABEL: @foo2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 1.000000e+01, double 1.000000e+01>
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
-; CHECK-NEXT:    [[TMP5]] = fadd <2 x double> [[TMP4]], <double 4.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ [[TMP0]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 1.000000e+01, double 1.000000e+01>
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 4.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    [[TMP4]] = fadd <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_019]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[B:%.*]], align 8
+; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -138,41 +138,40 @@ define float @foo3(ptr nocapture readonly %A) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[ARRAYIDX1]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP0]], i32 0
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = phi <4 x float> [ [[TMP2]], [[ENTRY]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP7:%.*]] = phi <2 x float> [ [[TMP5]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
-; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP8]], 7.000000e+00
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x float> [ [[TMP3]], [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
+; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[TMP6]], 7.000000e+00
 ; CHECK-NEXT:    [[ADD6]] = fadd float [[R_052]], [[MUL]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX14]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX14]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 3
 ; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT:    [[TMP12]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> [[TMP12]], <4 x i32> <i32 1, i32 undef, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP10]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = fmul <4 x float> [[TMP14]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
-; CHECK-NEXT:    [[TMP16]] = fadd <4 x float> [[TMP6]], [[TMP15]]
-; CHECK-NEXT:    [[TMP17:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP17]], 121
+; CHECK-NEXT:    [[TMP9]] = load <2 x float>, ptr [[ARRAYIDX19]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> [[TMP9]], <4 x i32> <i32 1, i32 undef, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = fmul <4 x float> [[TMP11]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+01, float 1.100000e+01>
+; CHECK-NEXT:    [[TMP13]] = fadd <4 x float> [[TMP4]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP14]], 121
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x float> [[TMP16]], i32 0
-; CHECK-NEXT:    [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP18]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x float> [[TMP16]], i32 1
-; CHECK-NEXT:    [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP19]]
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x float> [[TMP16]], i32 2
-; CHECK-NEXT:    [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP20]]
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x float> [[TMP16]], i32 3
-; CHECK-NEXT:    [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP21]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP13]], i32 0
+; CHECK-NEXT:    [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP15]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float> [[TMP13]], i32 1
+; CHECK-NEXT:    [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x float> [[TMP13]], i32 2
+; CHECK-NEXT:    [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP17]]
+; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x float> [[TMP13]], i32 3
+; CHECK-NEXT:    [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP18]]
 ; CHECK-NEXT:    ret float [[ADD31]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
index 98df52a85248b..9c7e8f66c6c6c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
@@ -68,14 +68,13 @@ define void @pr35497() local_unnamed_addr #0 {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
 ; SSE-NEXT:    [[TMP5:%.*]] = add nuw nsw <2 x i64> [[TMP4]], zeroinitializer
 ; SSE-NEXT:    store <2 x i64> [[TMP5]], ptr undef, align 1
-; SSE-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[ADD]], i32 0
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> [[TMP5]], <2 x i32> <i32 0, i32 3>
-; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 2, i64 2>
-; SSE-NEXT:    [[TMP9:%.*]] = and <2 x i64> [[TMP8]], <i64 20, i64 20>
-; SSE-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP9]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
-; SSE-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP5]], <i64 6, i64 6>
-; SSE-NEXT:    [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP10]], [[TMP11]]
-; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr [[ARRAYIDX2_2]], align 1
+; SSE-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[ADD]], i32 0
+; SSE-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 2, i64 2>
+; SSE-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP7]], <i64 20, i64 20>
+; SSE-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
+; SSE-NEXT:    [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], <i64 6, i64 6>
+; SSE-NEXT:    [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP10]]
+; SSE-NEXT:    store <2 x i64> [[TMP11]], ptr [[ARRAYIDX2_2]], align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @pr35497(
@@ -89,14 +88,13 @@ define void @pr35497() local_unnamed_addr #0 {
 ; AVX-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>
 ; AVX-NEXT:    [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer
 ; AVX-NEXT:    store <2 x i64> [[TMP4]], ptr undef, align 1
-; AVX-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[ADD]], i32 0
-; AVX-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
-; AVX-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 2, i64 2>
-; AVX-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP7]], <i64 20, i64 20>
-; AVX-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
-; AVX-NEXT:    [[TMP10:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
-; AVX-NEXT:    [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP10]]
-; AVX-NEXT:    store <2 x i64> [[TMP11]], ptr [[ARRAYIDX2_2]], align 1
+; AVX-NEXT:    [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[ADD]], i32 0
+; AVX-NEXT:    [[TMP6:%.*]] = shl <2 x i64> [[TMP5]], <i64 2, i64 2>
+; AVX-NEXT:    [[TMP7:%.*]] = and <2 x i64> [[TMP6]], <i64 20, i64 20>
+; AVX-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
+; AVX-NEXT:    [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
+; AVX-NEXT:    [[TMP10:%.*]] = add nuw nsw <2 x i64> [[TMP8]], [[TMP9]]
+; AVX-NEXT:    store <2 x i64> [[TMP10]], ptr [[ARRAYIDX2_2]], align 1
 ; AVX-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 0be5e2d9d536b..97fb5686c8710 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -329,19 +329,12 @@ define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
 
 define i1 @logical_and_icmp_clamp_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; CHECK-LABEL: @logical_and_icmp_clamp_v8i32(
-; CHECK-NEXT:    [[Y0:%.*]] = extractelement <8 x i32> [[Y:%.*]], i32 0
-; CHECK-NEXT:    [[Y1:%.*]] = extractelement <8 x i32> [[Y]], i32 1
-; CHECK-NEXT:    [[Y2:%.*]] = extractelement <8 x i32> [[Y]], i32 2
-; CHECK-NEXT:    [[Y3:%.*]] = extractelement <8 x i32> [[Y]], i32 3
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[X:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[Y0]], i32 4
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[Y1]], i32 5
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[Y2]], i32 6
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[Y3]], i32 7
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = freeze <8 x i1> [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP7]])
-; CHECK-NEXT:    ret i1 [[TMP8]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[Y:%.*]], <8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 poison, i32 poison, i32 poison, i32 poison>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <8 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = freeze <8 x i1> [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP4]])
+; CHECK-NEXT:    ret i1 [[TMP5]]
 ;
   %x0 = extractelement <8 x i32> %x, i32 0
   %x1 = extractelement <8 x i32> %x, i32 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
index ffa052f7cfddd..458f4f98d815f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -89,7 +89,7 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
@@ -138,7 +138,7 @@ define i1 @fcmp_lt(double %a, double %b, double %c) {
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[FNEG]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x double> [[TMP2]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll
index 72faaa93df02e..16578504de084 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-clustered-node.ll
@@ -14,17 +14,16 @@ define i1 @test(ptr %arg, ptr %i233, i64 %i241, ptr %i235, ptr %i237, ptr %i227)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x ptr> <ptr poison, ptr null, ptr poison, ptr null, ptr null, ptr null, ptr null, ptr null>, ptr [[I242]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[I250]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult <8 x ptr> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x ptr> poison, ptr [[I250]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[I242]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[I245]], i32 2
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x ptr> [[TMP7]], ptr [[I248]], i32 3
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x ptr> [[TMP8]], <8 x ptr> poison, <8 x i32> <i32 2, i32 0, i32 1, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x ptr> <ptr poison, ptr poison, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null>, <8 x ptr> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <8 x ptr> [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP13:%.*]] = or <8 x i1> [[TMP4]], [[TMP12]]
-; CHECK-NEXT:    [[TMP14:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP13]])
-; CHECK-NEXT:    [[OP_RDX:%.*]] = and i1 [[TMP14]], false
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x ptr> [[TMP3]], <8 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x ptr> [[TMP5]], ptr [[I245]], i32 2
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x ptr> [[TMP6]], ptr [[I248]], i32 3
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x ptr> [[TMP7]], <4 x ptr> poison, <8 x i32> <i32 2, i32 0, i32 1, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x ptr> [[TMP1]], <8 x ptr> <ptr poison, ptr poison, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null>, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult <8 x ptr> [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i1> [[TMP4]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP12]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = and i1 [[TMP13]], false
 ; CHECK-NEXT:    ret i1 [[OP_RDX]]
 ;
 bb:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
index b182df599ce16..035bc3ecc09d4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather.ll
@@ -10,7 +10,7 @@ define void @test(ptr noalias %0, ptr %p) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP4]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> <i32 4, i32 3, i32 0, i32 1, i32 2, i32 0, i32 1, i32 2, i32 0, i32 2, i32 5, i32 6, i32 7, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> <float poison, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <16 x float> [[TMP8]], <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x float> [[TMP8]], <16 x float> <float poison, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
 ; CHECK-NEXT:    [[TMP10:%.*]] = fadd reassoc nsz arcp contract afn <16 x float> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x float> [[TMP10]], <16 x float> poison, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 9, i32 0, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    store <16 x float> [[TMP11]], ptr [[TMP5]], align 4

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
index f996040f4d517..ccde383afa3ed 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/root-trunc-extract-reuse.ll
@@ -17,9 +17,8 @@ define i1 @test() {
 ; CHECK-NEXT:    [[T13:%.*]] = and <2 x i32> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    br label [[ELSE1:%.*]]
 ; CHECK:       else1:
-; CHECK-NEXT:    [[T20:%.*]] = extractelement <2 x i32> [[T13]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[BF_CAST162]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[T20]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i32> [[T13]], <2 x i32> poison, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[BF_CAST162]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt <2 x i32> [[TMP6]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
 ; CHECK-NEXT:    ret i1 [[TMP8]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll
index 284737b23a6c7..c79e9b94278cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll
@@ -12,10 +12,10 @@ define void @test() {
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr undef, align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> <float 0.000000e+00, float poison>, <2 x float> [[TMP0]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> <float poison, float 0.000000e+00>, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]])
 ; CHECK-NEXT:    br i1 false, label [[BB2:%.*]], label [[BB3:%.*]]
 ; CHECK:       bb2:
@@ -23,12 +23,12 @@ define void @test() {
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x float> [ [[TMP9]], [[BB2]] ], [ zeroinitializer, [[BB1]] ]
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x float> [[TMP1]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = fsub <2 x float> [[TMP12]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x float> [[TMP1]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x float> [[TMP12]], zeroinitializer
 ; CHECK-NEXT:    [[TMP14:%.*]] = fsub <2 x float> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    store <2 x float> [[TMP14]], ptr [[ARRAYIDX21_I]], align 16
+; CHECK-NEXT:    [[TMP15:%.*]] = fsub <2 x float> [[TMP14]], zeroinitializer
+; CHECK-NEXT:    store <2 x float> [[TMP15]], ptr [[ARRAYIDX21_I]], align 16
 ; CHECK-NEXT:    ret void
 ;
 entry:


        


More information about the llvm-commits mailing list