[llvm] a0086ad - [SLP]Improve gathering of scalar elements.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 9 05:24:01 PDT 2021


Author: Alexey Bataev
Date: 2021-06-09T05:23:21-07:00
New Revision: a0086add2e52a82dd83114f458c10e2e4bdd15ac

URL: https://github.com/llvm/llvm-project/commit/a0086add2e52a82dd83114f458c10e2e4bdd15ac
DIFF: https://github.com/llvm/llvm-project/commit/a0086add2e52a82dd83114f458c10e2e4bdd15ac.diff

LOG: [SLP]Improve gathering of scalar elements.

1. Better sorting of scalars to be gathered. Trying to insert
   constants/arguments/instructions-out-of-loop at first and only then
   the instructions which are inside the loop. It improves hoisting of
   invariant insertelements instructions.
2. Better detection of shuffle candidates in gathering function.
3. The cost of insertelement for constants is 0.

Part of D57059.

Differential Revision: https://reviews.llvm.org/D103458

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
    llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
    llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
    llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
    llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
    llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
    llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
    llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
    llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
    llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
    llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
    llvm/test/Transforms/SLPVectorizer/X86/partail.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
    llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
    llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
    llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
    llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
    llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
    llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
    llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
    llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
    llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
    llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
    llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index b73a2377f1b03..ffa91575fb9e2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -218,15 +218,18 @@ static bool allSameBlock(ArrayRef<Value *> VL) {
   return true;
 }
 
+/// \returns True if the value is a constant (but not globals/constant
+/// expressions).
+static bool isConstant(Value *V) {
+  return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
+}
+
 /// \returns True if all of the values in \p VL are constants (but not
 /// globals/constant expressions).
 static bool allConstant(ArrayRef<Value *> VL) {
   // Constant expressions and globals can't be vectorized like normal integer/FP
   // constants.
-  for (Value *i : VL)
-    if (!isa<Constant>(i) || isa<ConstantExpr>(i) || isa<GlobalValue>(i))
-      return false;
-  return true;
+  return all_of(VL, isConstant);
 }
 
 /// \returns True if all of the values in \p VL are identical.
@@ -4725,6 +4728,8 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
   // Iterate in reverse order to consider insert elements with the high cost.
   for (unsigned I = VL.size(); I > 0; --I) {
     unsigned Idx = I - 1;
+    if (isConstant(VL[Idx]))
+      continue;
     if (!UniqueElements.insert(VL[Idx]).second)
       ShuffledElements.insert(Idx);
   }
@@ -4810,96 +4815,65 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
 }
 
 Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
-  Value *Val0 =
-      isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
-  FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
-  Value *Vec = PoisonValue::get(VecTy);
-  unsigned InsIndex = 0;
-  for (Value *Val : VL) {
-    Vec = Builder.CreateInsertElement(Vec, Val, Builder.getInt32(InsIndex++));
+  // List of instructions/lanes from current block and/or the blocks which are
+  // part of the current loop. These instructions will be inserted at the end to
+  // make it possible to optimize loops and hoist invariant instructions out of
+  // the loops body with better chances for success.
+  SmallVector<std::pair<Value *, unsigned>, 4> PostponedInsts;
+  SmallSet<int, 4> PostponedIndices;
+  Loop *L = LI->getLoopFor(Builder.GetInsertBlock());
+  auto &&CheckPredecessor = [](BasicBlock *InstBB, BasicBlock *InsertBB) {
+    SmallPtrSet<BasicBlock *, 4> Visited;
+    while (InsertBB && InsertBB != InstBB && Visited.insert(InsertBB).second)
+      InsertBB = InsertBB->getSinglePredecessor();
+    return InsertBB && InsertBB == InstBB;
+  };
+  for (int I = 0, E = VL.size(); I < E; ++I) {
+    if (auto *Inst = dyn_cast<Instruction>(VL[I]))
+      if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
+           getTreeEntry(Inst) || (L && (L->contains(Inst)))) &&
+          PostponedIndices.insert(I).second)
+        PostponedInsts.emplace_back(Inst, I);
+  }
+
+  auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) {
+    // No need to insert undefs elements - exit.
+    if (isa<UndefValue>(V))
+      return Vec;
+    Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos));
     auto *InsElt = dyn_cast<InsertElementInst>(Vec);
     if (!InsElt)
-      continue;
+      return Vec;
     GatherSeq.insert(InsElt);
     CSEBlocks.insert(InsElt->getParent());
     // Add to our 'need-to-extract' list.
-    if (TreeEntry *Entry = getTreeEntry(Val)) {
+    if (TreeEntry *Entry = getTreeEntry(V)) {
       // Find which lane we need to extract.
-      int FoundLane =
-          findLaneForValue(Entry->Scalars, Entry->ReuseShuffleIndices, Val);
-      ExternalUses.push_back(ExternalUser(Val, InsElt, FoundLane));
-    }
-  }
-
-  return Vec;
-}
-
-Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
-  InstructionsState S = getSameOpcode(VL);
-  if (S.getOpcode()) {
-    if (TreeEntry *E = getTreeEntry(S.OpValue)) {
-      if (E->isSame(VL)) {
-        Value *V = vectorizeTree(E);
-        if (VL.size() == E->Scalars.size() && !E->ReuseShuffleIndices.empty()) {
-          // Reshuffle to get only unique values.
-          // If some of the scalars are duplicated in the vectorization tree
-          // entry, we do not vectorize them but instead generate a mask for the
-          // reuses. But if there are several users of the same entry, they may
-          // have 
diff erent vectorization factors. This is especially important
-          // for PHI nodes. In this case, we need to adapt the resulting
-          // instruction for the user vectorization factor and have to reshuffle
-          // it again to take only unique elements of the vector. Without this
-          // code the function incorrectly returns reduced vector instruction
-          // with the same elements, not with the unique ones.
-          // block:
-          // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
-          // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
-          // ... (use %2)
-          // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
-          // br %block
-          SmallVector<int, 4> UniqueIdxs;
-          SmallSet<int, 4> UsedIdxs;
-          int Pos = 0;
-          for (int Idx : E->ReuseShuffleIndices) {
-            if (UsedIdxs.insert(Idx).second)
-              UniqueIdxs.emplace_back(Pos);
-            ++Pos;
-          }
-          V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
-        }
-        return V;
+      unsigned FoundLane =
+          std::distance(Entry->Scalars.begin(), find(Entry->Scalars, V));
+      assert(FoundLane < Entry->Scalars.size() && "Couldn't find extract lane");
+      if (!Entry->ReuseShuffleIndices.empty()) {
+        FoundLane = std::distance(Entry->ReuseShuffleIndices.begin(),
+                                  find(Entry->ReuseShuffleIndices, FoundLane));
       }
+      ExternalUses.emplace_back(V, InsElt, FoundLane);
     }
+    return Vec;
+  };
+  Value *Val0 =
+      isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
+  FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
+  Value *Vec = PoisonValue::get(VecTy);
+  for (int I = 0, E = VL.size(); I < E; ++I) {
+    if (PostponedIndices.contains(I))
+      continue;
+    Vec = CreateInsertElement(Vec, VL[I], I);
   }
+  // Append instructions, which are/may be part of the loop, in the end to make
+  // it possible to hoist non-loop-based instructions.
+  for (const std::pair<Value *, unsigned> &Pair : PostponedInsts)
+    Vec = CreateInsertElement(Vec, Pair.first, Pair.second);
 
-  // Check that every instruction appears once in this bundle.
-  SmallVector<int, 4> ReuseShuffleIndicies;
-  SmallVector<Value *, 4> UniqueValues;
-  if (VL.size() > 2) {
-    DenseMap<Value *, unsigned> UniquePositions;
-    for (Value *V : VL) {
-      auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
-      ReuseShuffleIndicies.emplace_back(Res.first->second);
-      if (Res.second || isa<Constant>(V))
-        UniqueValues.emplace_back(V);
-    }
-    // Do not shuffle single element or if number of unique values is not power
-    // of 2.
-    if (UniqueValues.size() == VL.size() || UniqueValues.size() <= 1 ||
-        !llvm::isPowerOf2_32(UniqueValues.size()))
-      ReuseShuffleIndicies.clear();
-    else
-      VL = UniqueValues;
-  }
-
-  Value *Vec = gather(VL);
-  if (!ReuseShuffleIndicies.empty()) {
-    Vec = Builder.CreateShuffleVector(Vec, ReuseShuffleIndicies, "shuffle");
-    if (auto *I = dyn_cast<Instruction>(Vec)) {
-      GatherSeq.insert(I);
-      CSEBlocks.insert(I->getParent());
-    }
-  }
   return Vec;
 }
 
@@ -4907,11 +4881,13 @@ namespace {
 /// Merges shuffle masks and emits final shuffle instruction, if required.
 class ShuffleInstructionBuilder {
   IRBuilderBase &Builder;
+  const unsigned VF = 0;
   bool IsFinalized = false;
   SmallVector<int, 4> Mask;
 
 public:
-  ShuffleInstructionBuilder(IRBuilderBase &Builder) : Builder(Builder) {}
+  ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF)
+      : Builder(Builder), VF(VF) {}
 
   /// Adds a mask, inverting it before applying.
   void addInversedMask(ArrayRef<unsigned> SubMask) {
@@ -4938,8 +4914,9 @@ class ShuffleInstructionBuilder {
     SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
     int TermValue = std::min(Mask.size(), SubMask.size());
     for (int I = 0, E = SubMask.size(); I < E; ++I) {
-      if (SubMask[I] >= TermValue || Mask[SubMask[I]] >= TermValue) {
-        NewMask[I] = E;
+      if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
+          Mask[SubMask[I]] >= TermValue) {
+        NewMask[I] = UndefMaskElem;
         continue;
       }
       NewMask[I] = Mask[SubMask[I]];
@@ -4949,7 +4926,14 @@ class ShuffleInstructionBuilder {
 
   Value *finalize(Value *V) {
     IsFinalized = true;
-    if (Mask.empty())
+    unsigned ValueVF = cast<FixedVectorType>(V->getType())->getNumElements();
+    if (VF == ValueVF && Mask.empty())
+      return V;
+    SmallVector<int, 4> NormalizedMask(VF, UndefMaskElem);
+    std::iota(NormalizedMask.begin(), NormalizedMask.end(), 0);
+    addMask(NormalizedMask);
+
+    if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask))
       return V;
     return Builder.CreateShuffleVector(V, Mask, "shuffle");
   }
@@ -4961,6 +4945,120 @@ class ShuffleInstructionBuilder {
 };
 } // namespace
 
+Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
+  unsigned VF = VL.size();
+  InstructionsState S = getSameOpcode(VL);
+  if (S.getOpcode()) {
+    if (TreeEntry *E = getTreeEntry(S.OpValue))
+      if (E->isSame(VL)) {
+        Value *V = vectorizeTree(E);
+        if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
+          if (!E->ReuseShuffleIndices.empty()) {
+            // Reshuffle to get only unique values.
+            // If some of the scalars are duplicated in the vectorization tree
+            // entry, we do not vectorize them but instead generate a mask for
+            // the reuses. But if there are several users of the same entry,
+            // they may have 
diff erent vectorization factors. This is especially
+            // important for PHI nodes. In this case, we need to adapt the
+            // resulting instruction for the user vectorization factor and have
+            // to reshuffle it again to take only unique elements of the vector.
+            // Without this code the function incorrectly returns reduced vector
+            // instruction with the same elements, not with the unique ones.
+
+            // block:
+            // %phi = phi <2 x > { .., %entry} {%shuffle, %block}
+            // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
+            // ... (use %2)
+            // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
+            // br %block
+            SmallVector<int> UniqueIdxs;
+            SmallSet<int, 4> UsedIdxs;
+            int Pos = 0;
+            int Sz = VL.size();
+            for (int Idx : E->ReuseShuffleIndices) {
+              if (Idx != Sz && UsedIdxs.insert(Idx).second)
+                UniqueIdxs.emplace_back(Pos);
+              ++Pos;
+            }
+            assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
+                                            "less than original vector size.");
+            UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
+            V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
+          } else {
+            assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
+                   "Expected vectorization factor less "
+                   "than original vector size.");
+            SmallVector<int> UniformMask(VF, 0);
+            std::iota(UniformMask.begin(), UniformMask.end(), 0);
+            V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
+          }
+        }
+        return V;
+      }
+  }
+
+  // Check that every instruction appears once in this bundle.
+  SmallVector<int> ReuseShuffleIndicies;
+  SmallVector<Value *> UniqueValues;
+  if (VL.size() > 2) {
+    DenseMap<Value *, unsigned> UniquePositions;
+    unsigned NumValues =
+        std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) {
+                                    return !isa<UndefValue>(V);
+                                  }).base());
+    VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
+    int UniqueVals = 0;
+    bool HasUndefs = false;
+    for (Value *V : VL.drop_back(VL.size() - VF)) {
+      if (isa<UndefValue>(V)) {
+        ReuseShuffleIndicies.emplace_back(UndefMaskElem);
+        HasUndefs = true;
+        continue;
+      }
+      if (isConstant(V)) {
+        ReuseShuffleIndicies.emplace_back(UniqueValues.size());
+        UniqueValues.emplace_back(V);
+        continue;
+      }
+      auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
+      ReuseShuffleIndicies.emplace_back(Res.first->second);
+      if (Res.second) {
+        UniqueValues.emplace_back(V);
+        ++UniqueVals;
+      }
+    }
+    if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) {
+      // Emit pure splat vector.
+      // FIXME: why it is not identified as an identity.
+      unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem);
+      if (NumUndefs == ReuseShuffleIndicies.size() - 1)
+        ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
+                                    UndefMaskElem);
+      else
+        ReuseShuffleIndicies.assign(VF, 0);
+    } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
+      ReuseShuffleIndicies.clear();
+      UniqueValues.clear();
+      UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
+    }
+    UniqueValues.append(VF - UniqueValues.size(),
+                        UndefValue::get(VL[0]->getType()));
+    VL = UniqueValues;
+  }
+
+  ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
+  Value *Vec = gather(VL);
+  if (!ReuseShuffleIndicies.empty()) {
+    ShuffleBuilder.addMask(ReuseShuffleIndicies);
+    Vec = ShuffleBuilder.finalize(Vec);
+    if (auto *I = dyn_cast<Instruction>(Vec)) {
+      GatherSeq.insert(I);
+      CSEBlocks.insert(I->getParent());
+    }
+  }
+  return Vec;
+}
+
 Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
   IRBuilder<>::InsertPointGuard Guard(Builder);
 
@@ -4969,8 +5067,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
     return E->VectorizedValue;
   }
 
-  ShuffleInstructionBuilder ShuffleBuilder(Builder);
   bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
+  unsigned VF = E->Scalars.size();
+  if (NeedToShuffleReuses)
+    VF = E->ReuseShuffleIndices.size();
+  ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
   if (E->State == TreeEntry::NeedToGather) {
     setInsertPointAfterBundle(E);
     Value *Vec;

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll
index c6ceb1cc7e793..8f386d672487a 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement-inseltpoison.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define <2 x float> @insertelement-fixed-vector() {
 ; CHECK-LABEL: @insertelement-fixed-vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> poison)
 ; CHECK-NEXT:    ret <2 x float> [[TMP1]]
 ;
   %f0 = tail call fast float @llvm.fabs.f32(float undef)

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
index 12031a679e831..ffe3ab4382d12 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/insertelement.ll
@@ -6,7 +6,7 @@ target triple = "aarch64-unknown-linux-gnu"
 
 define <2 x float> @insertelement-fixed-vector() {
 ; CHECK-LABEL: @insertelement-fixed-vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x float> @llvm.fabs.v2f32(<2 x float> poison)
 ; CHECK-NEXT:    ret <2 x float> [[TMP1]]
 ;
   %f0 = tail call fast float @llvm.fabs.f32(float undef)

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
index f6ab38bb3935e..184630c47bcac 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
@@ -8,21 +8,21 @@ define dso_local void @l() local_unnamed_addr {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i16> [ undef, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i16> [ poison, [[BB:%.*]] ], [ [[TMP11:%.*]], [[BB25:%.*]] ]
 ; CHECK-NEXT:    br i1 undef, label [[BB3:%.*]], label [[BB11:%.*]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[I4:%.*]] = zext i1 undef to i32
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], poison
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], <i16 8, i16 8>
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    br label [[BB25]]
 ; CHECK:       bb11:
 ; CHECK-NEXT:    [[I12:%.*]] = zext i1 undef to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], undef
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i16> [[TMP0]], poison
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64>
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule <2 x i64> undef, [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ule <2 x i64> poison, [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult <2 x i32> undef, [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult <2 x i32> poison, [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext <2 x i1> [[TMP8]] to <2 x i32>
 ; CHECK-NEXT:    br label [[BB25]]
 ; CHECK:       bb25:

diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
index cfd6f59cb774a..ba1338d0a0f14 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
@@ -232,9 +232,9 @@ define void @noop_extracts_existing_vector_4_lanes(<9 x double>* %ptr.1, <4 x do
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double [[V1_LANE_3]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[V1_LANE_0]], i32 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_1]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[V2_LANE_0]], i32 1
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_0]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[A_INS_31:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP7]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -358,36 +358,28 @@ define void @noop_extracts_9_lanes(<9 x double>* %ptr.1, <4 x double>* %ptr.2) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_0]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_2]], i32 4
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_0]], i32 5
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_2]], i32 6
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_1]], i32 7
-; CHECK-NEXT:    [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[TMP15]]
+; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 1, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE2]]
 ; CHECK-NEXT:    [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]]
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
-; CHECK-NEXT:    [[A_INS_72:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP17]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
-; CHECK-NEXT:    [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_7]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_0]], i32 3
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_1]], i32 4
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_2]], i32 5
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_3]], i32 6
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_4]], i32 7
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <8 x double> [[TMP26]], double [[V2_LANE_1]], i32 1
-; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <8 x double> [[TMP27]], double [[V2_LANE_0]], i32 2
-; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <8 x double> [[TMP28]], double [[V2_LANE_2]], i32 3
-; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <8 x double> [[TMP29]], double [[V2_LANE_1]], i32 4
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <8 x double> [[TMP30]], double [[V2_LANE_0]], i32 5
-; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <8 x double> [[TMP31]], double [[V2_LANE_2]], i32 6
-; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <8 x double> [[TMP32]], double [[V2_LANE_1]], i32 7
-; CHECK-NEXT:    [[TMP34:%.*]] = fmul <8 x double> [[TMP25]], [[TMP33]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
+; CHECK-NEXT:    [[A_INS_73:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP12]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
+; CHECK-NEXT:    [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_73]], double [[A_LANE_8]], i32 8
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 1, i32 2, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]]
-; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <8 x double> [[TMP34]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
-; CHECK-NEXT:    [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP35]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
+; CHECK-NEXT:    [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP25]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
 ; CHECK-NEXT:    [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8
 ; CHECK-NEXT:    [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]]
 ; CHECK-NEXT:    store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8
@@ -485,28 +477,25 @@ define void @first_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x double>* %ptr.2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_1]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_0]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_2]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_0]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_2]], i32 4
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_1]], i32 5
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_0]], i32 6
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_2]], i32 7
-; CHECK-NEXT:    [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[TMP15]]
+; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 2, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE2]]
 ; CHECK-NEXT:    [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]]
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
-; CHECK-NEXT:    [[A_INS_72:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP17]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
-; CHECK-NEXT:    [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_7]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_0]], i32 3
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_1]], i32 4
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_2]], i32 5
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_3]], i32 6
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_4]], i32 7
-; CHECK-NEXT:    [[TMP26:%.*]] = fmul <8 x double> [[TMP25]], [[TMP15]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
+; CHECK-NEXT:    [[A_INS_73:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP12]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
+; CHECK-NEXT:    [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_73]], double [[A_LANE_8]], i32 8
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_6]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_7]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_0]], i32 3
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_1]], i32 4
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_2]], i32 5
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_3]], i32 6
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_4]], i32 7
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 2, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP21:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]]
-; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
-; CHECK-NEXT:    [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP27]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
+; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <8 x double> [[TMP21]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
+; CHECK-NEXT:    [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP22]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
 ; CHECK-NEXT:    [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8
 ; CHECK-NEXT:    [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]]
 ; CHECK-NEXT:    store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8
@@ -604,36 +593,28 @@ define void @first_and_second_mul_chain_jumbled(<9 x double>* %ptr.1, <4 x doubl
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_0]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <8 x double> [[TMP8]], double [[V2_LANE_2]], i32 1
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <8 x double> [[TMP9]], double [[V2_LANE_1]], i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <8 x double> [[TMP10]], double [[V2_LANE_2]], i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x double> [[TMP11]], double [[V2_LANE_1]], i32 4
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x double> [[TMP12]], double [[V2_LANE_0]], i32 5
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V2_LANE_2]], i32 6
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V2_LANE_1]], i32 7
-; CHECK-NEXT:    [[TMP16:%.*]] = fmul <8 x double> [[TMP7]], [[TMP15]]
+; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x double> [[TMP10]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 2, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <8 x double> [[TMP7]], [[SHUFFLE2]]
 ; CHECK-NEXT:    [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]]
-; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
-; CHECK-NEXT:    [[A_INS_72:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP17]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
-; CHECK-NEXT:    [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_72]], double [[A_LANE_8]], i32 8
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_6]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_8]], i32 2
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <8 x double> [[TMP20]], double [[V1_LANE_1]], i32 3
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V1_LANE_0]], i32 4
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V1_LANE_3]], i32 5
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <8 x double> [[TMP23]], double [[V1_LANE_2]], i32 6
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <8 x double> [[TMP24]], double [[V1_LANE_5]], i32 7
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0
-; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <8 x double> [[TMP26]], double [[V2_LANE_1]], i32 1
-; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <8 x double> [[TMP27]], double [[V2_LANE_0]], i32 2
-; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <8 x double> [[TMP28]], double [[V2_LANE_2]], i32 3
-; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <8 x double> [[TMP29]], double [[V2_LANE_0]], i32 4
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <8 x double> [[TMP30]], double [[V2_LANE_2]], i32 5
-; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <8 x double> [[TMP31]], double [[V2_LANE_1]], i32 6
-; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <8 x double> [[TMP32]], double [[V2_LANE_0]], i32 7
-; CHECK-NEXT:    [[TMP34:%.*]] = fmul <8 x double> [[TMP25]], [[TMP33]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
+; CHECK-NEXT:    [[A_INS_73:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP12]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
+; CHECK-NEXT:    [[A_INS_8:%.*]] = insertelement <9 x double> [[A_INS_73]], double [[A_LANE_8]], i32 8
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x double> poison, double [[V1_LANE_7]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x double> [[TMP13]], double [[V1_LANE_6]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <8 x double> [[TMP14]], double [[V1_LANE_8]], i32 2
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x double> [[TMP15]], double [[V1_LANE_1]], i32 3
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <8 x double> [[TMP16]], double [[V1_LANE_0]], i32 4
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <8 x double> [[TMP17]], double [[V1_LANE_3]], i32 5
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <8 x double> [[TMP18]], double [[V1_LANE_2]], i32 6
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <8 x double> [[TMP19]], double [[V1_LANE_5]], i32 7
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <8 x double> poison, double [[V2_LANE_2]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <8 x double> [[TMP21]], double [[V2_LANE_1]], i32 1
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <8 x double> [[TMP22]], double [[V2_LANE_0]], i32 2
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 2, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP24:%.*]] = fmul <8 x double> [[TMP20]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]]
-; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <8 x double> [[TMP34]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
-; CHECK-NEXT:    [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP35]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
+; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> undef, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef>
+; CHECK-NEXT:    [[B_INS_71:%.*]] = shufflevector <9 x double> undef, <9 x double> [[TMP25]], <9 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 8>
 ; CHECK-NEXT:    [[B_INS_8:%.*]] = insertelement <9 x double> [[B_INS_71]], double [[B_LANE_8]], i32 8
 ; CHECK-NEXT:    [[RES:%.*]] = fsub <9 x double> [[A_INS_8]], [[B_INS_8]]
 ; CHECK-NEXT:    store <9 x double> [[RES]], <9 x double>* [[PTR_1]], align 8

diff  --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
index acd02b342a5a0..66fc799bbbf67 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -19,7 +19,7 @@ define void @foo() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[ADD277]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP2]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> undef, [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> poison, [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[TMP7]], <i32 6, i32 6, i32 6, i32 6>
 ; CHECK-NEXT:    [[ARRAYIDX372_3:%.*]] = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @dct_luma, i64 0, i64 3, i64 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX372]] to <4 x i32>*

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
index 6109767563f05..edc27a635bd25 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR39774.ll
@@ -7,7 +7,7 @@ define void @Test(i32) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP10:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240, i32 1496, i32 8555, i32 12529, i32 13685>
@@ -39,22 +39,21 @@ define void @Test(i32) {
 ; CHECK-NEXT:    [[OP_EXTRA24:%.*]] = and i32 [[OP_EXTRA23]], [[TMP0]]
 ; CHECK-NEXT:    [[OP_EXTRA25:%.*]] = and i32 [[OP_EXTRA24]], [[TMP0]]
 ; CHECK-NEXT:    [[OP_EXTRA26:%.*]] = and i32 [[OP_EXTRA25]], [[TMP0]]
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[OP_EXTRA26]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TMP11]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i32> [[TMP11]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[OP_EXTRA26]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = and <2 x i32> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP5]], [[TMP7]]
+; CHECK-NEXT:    [[TMP10]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 ; FORCE_REDUCTION-LABEL: @Test(
 ; FORCE_REDUCTION-NEXT:  entry:
 ; FORCE_REDUCTION-NEXT:    br label [[LOOP:%.*]]
 ; FORCE_REDUCTION:       loop:
-; FORCE_REDUCTION-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
+; FORCE_REDUCTION-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP12:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
 ; FORCE_REDUCTION-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; FORCE_REDUCTION-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1
 ; FORCE_REDUCTION-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], <i32 0, i32 55, i32 285, i32 1240>
@@ -96,11 +95,10 @@ define void @Test(i32) {
 ; FORCE_REDUCTION-NEXT:    [[VAL_41:%.*]] = add i32 [[TMP2]], 13685
 ; FORCE_REDUCTION-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_40]], i32 0
 ; FORCE_REDUCTION-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
-; FORCE_REDUCTION-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[VAL_41]], i32 0
-; FORCE_REDUCTION-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 14910, i32 1
-; FORCE_REDUCTION-NEXT:    [[TMP11:%.*]] = and <2 x i32> [[TMP8]], [[TMP10]]
-; FORCE_REDUCTION-NEXT:    [[TMP12:%.*]] = add <2 x i32> [[TMP8]], [[TMP10]]
-; FORCE_REDUCTION-NEXT:    [[TMP13]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> <i32 0, i32 3>
+; FORCE_REDUCTION-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 14910>, i32 [[VAL_41]], i32 0
+; FORCE_REDUCTION-NEXT:    [[TMP10:%.*]] = and <2 x i32> [[TMP8]], [[TMP9]]
+; FORCE_REDUCTION-NEXT:    [[TMP11:%.*]] = add <2 x i32> [[TMP8]], [[TMP9]]
+; FORCE_REDUCTION-NEXT:    [[TMP12]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> <i32 0, i32 3>
 ; FORCE_REDUCTION-NEXT:    br label [[LOOP]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
index b1244a6d75a93..a8e19984499ad 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -67,9 +67,9 @@ define void @splat(i8 %a, i8 %b, i8 %c) {
 ; AVX-NEXT:    [[TMP14:%.*]] = insertelement <16 x i8> [[TMP13]], i8 [[C]], i32 13
 ; AVX-NEXT:    [[TMP15:%.*]] = insertelement <16 x i8> [[TMP14]], i8 [[C]], i32 14
 ; AVX-NEXT:    [[TMP16:%.*]] = insertelement <16 x i8> [[TMP15]], i8 [[C]], i32 15
-; AVX-NEXT:    [[TMP17:%.*]] = insertelement <2 x i8> poison, i8 [[A:%.*]], i32 0
-; AVX-NEXT:    [[TMP18:%.*]] = insertelement <2 x i8> [[TMP17]], i8 [[B:%.*]], i32 1
-; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i8> [[TMP18]], <2 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; AVX-NEXT:    [[TMP17:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
+; AVX-NEXT:    [[TMP18:%.*]] = insertelement <16 x i8> [[TMP17]], i8 [[B:%.*]], i32 1
+; AVX-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[TMP18]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 ; AVX-NEXT:    [[TMP19:%.*]] = xor <16 x i8> [[TMP16]], [[SHUFFLE]]
 ; AVX-NEXT:    store <16 x i8> [[TMP19]], <16 x i8>* bitcast ([32 x i8]* @cle to <16 x i8>*), align 16
 ; AVX-NEXT:    ret void

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
index 14d6920424e7c..8d66bc9565a03 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_exceed_scheduling.ll
@@ -35,9 +35,8 @@ define void @exceed(double %0, double %1) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast <2 x double> [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[IXX101:%.*]] = fsub double undef, undef
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x double> poison, double [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double undef, i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP1]], i32 1
-; CHECK-NEXT:    [[TMP16:%.*]] = fmul fast <2 x double> [[TMP14]], [[TMP15]]
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP15:%.*]] = fmul fast <2 x double> [[TMP13]], [[TMP14]]
 ; CHECK-NEXT:    switch i32 undef, label [[BB1:%.*]] [
 ; CHECK-NEXT:    i32 0, label [[BB2:%.*]]
 ; CHECK-NEXT:    ]
@@ -46,7 +45,7 @@ define void @exceed(double %0, double %1) {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br label [[LABEL]]
 ; CHECK:       label:
-; CHECK-NEXT:    [[TMP17:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP16]], [[BB2]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = phi <2 x double> [ [[TMP12]], [[BB1]] ], [ [[TMP15]], [[BB2]] ]
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
index f932c0c3098e5..d4be8dc00581c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -131,10 +131,9 @@ define fastcc void @dct36(double* %inbuf) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double undef, i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP1]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
index e3ff057355537..da96683570501 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
@@ -97,10 +97,10 @@ define void @zot(%struct.hoge* %arg) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load double, double* undef, align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], poison
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[BB11:%.*]], label [[BB12:%.*]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index fbe784211b339..d75d9cbfcf56c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -31,15 +31,14 @@ define void @main() #0 {
 ; CHECK:       cond.false66.us:
 ; CHECK-NEXT:    [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[ADD_I276_US]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double undef, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 0.000000e+00, double 0xBFA5CC2D1960285F>
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 1.400000e+02, double 1.400000e+02>
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], <double 5.000000e+01, double 5.200000e+01>
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <2 x double> undef, [[TMP2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> [[TMP0]], <double 0.000000e+00, double 0xBFA5CC2D1960285F>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.400000e+02, double 1.400000e+02>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> poison, [[TMP1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>*
+; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP5]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast double* [[AGG_TMP101211_SROA_0_0_IDX]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast double* [[AGG_TMP101211_SROA_0_0_IDX]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP7]], align 8
 ; CHECK-NEXT:    unreachable
 ; CHECK:       cond.true63.us:
 ; CHECK-NEXT:    unreachable
@@ -111,14 +110,14 @@ define void @_Z8radianceRK3RayiPt() #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 undef, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
 ; CHECK:       if.then38:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> <double undef, double poison>, double undef, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> undef, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> undef, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> undef, [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> undef, [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> undef, [[TMP6]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double poison, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> poison, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> poison, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> poison, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> poison, [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = fadd <2 x double> poison, [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <2 x double> poison, [[TMP6]]
 ; CHECK-NEXT:    [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY_5_11_53_95_137_191_197_203_239_257_263_269_275_281_287_293_383_437_443_455_461_599_601:%.*]], %struct.Ray.5.11.53.95.137.191.197.203.239.257.263.269.275.281.287.293.383.437.443.455.461.599.601* undef, i64 0, i32 1, i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[AGG_TMP74663_SROA_0_0_IDX]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
index 596543880d365..1ecda901cf1de 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
@@ -12,7 +12,7 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT19]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[C_022:%.*]] = phi i32* [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32*> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ poison, [[ENTRY]] ]
 ; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[C_022]], i64 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i32* [[C_022]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
index 50ad61a10a855..fb7178e0d76ef 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -16,9 +16,9 @@ target triple = "i386-apple-macosx10.9.0"
 define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[K:%.*]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_024:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD10:%.*]], [[FOR_BODY]] ]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
index 0f515ee146e09..9cef06325f414 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll
@@ -1334,8 +1334,8 @@ define i8 @umin_intrinsic_rdx_v16i8(i8* %p0) {
 
 define void @PR49730() {
 ; CHECK-LABEL: @PR49730(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 1, i32 1>)
-; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <4 x i32> poison, [[TMP1]]
 ; CHECK-NEXT:    [[T12:%.*]] = sub nsw i32 undef, undef
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP3]], i32 [[T12]])

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
index 7369570413c5f..17b6bd2384ae4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -11,12 +11,9 @@ define i32 @fn1() {
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 8, i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP6]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> <i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32), i32 8>, i32 [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
 ; CHECK-NEXT:    ret i32 0
 ;
   entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
index 02716b198187f..2069395d45e29 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-used-in-phi.ll
@@ -52,7 +52,7 @@ define void @phiUsingLoads(i32* noalias nocapture readonly %A, i32* noalias noca
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x i32> [ undef, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x i32> [ poison, [[ENTRY]] ], [ [[TMP26]], [[FOR_INC]] ]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
index c57f83ecb1869..2dbdad91764d6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -11,45 +11,29 @@ define i32 @bar() local_unnamed_addr {
 ; CHECK-NEXT:    [[ADD78_2:%.*]] = add nsw i32 undef, undef
 ; CHECK-NEXT:    [[SUB102_3:%.*]] = sub nsw i32 undef, undef
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_3]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 undef, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[SUB102_1]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 undef, i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 undef, i32 4
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 undef, i32 5
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 undef, i32 6
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 7
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD78_1]], i32 8
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB86_1]], i32 9
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 undef, i32 10
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[ADD78_2]], i32 11
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 undef, i32 12
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 undef, i32 13
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 undef, i32 14
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 undef, i32 15
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[SUB86_1]], i32 2
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 undef, i32 3
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 undef, i32 4
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 undef, i32 5
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 undef, i32 6
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[ADD78_1]], i32 7
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[ADD94_1]], i32 8
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SUB102_1]], i32 9
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[ADD78_2]], i32 10
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <16 x i32> [[TMP24]], i32 undef, i32 11
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <16 x i32> [[TMP25]], i32 undef, i32 12
-; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <16 x i32> [[TMP26]], i32 undef, i32 13
-; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <16 x i32> [[TMP27]], i32 undef, i32 14
-; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <16 x i32> [[TMP28]], i32 [[SUB102_3]], i32 15
-; CHECK-NEXT:    [[TMP30:%.*]] = add nsw <16 x i32> [[TMP15]], [[TMP29]]
-; CHECK-NEXT:    [[TMP31:%.*]] = sub nsw <16 x i32> [[TMP15]], [[TMP29]]
-; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i32> [[TMP30]], <16 x i32> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP33:%.*]] = lshr <16 x i32> [[TMP32]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP34:%.*]] = and <16 x i32> [[TMP33]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP35:%.*]] = mul nuw <16 x i32> [[TMP34]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
-; CHECK-NEXT:    [[TMP36:%.*]] = add <16 x i32> [[TMP35]], [[TMP32]]
-; CHECK-NEXT:    [[TMP37:%.*]] = xor <16 x i32> [[TMP36]], [[TMP35]]
-; CHECK-NEXT:    [[TMP38:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP37]])
-; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP38]], 16
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[SUB102_1]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD94_1]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[ADD78_2]], i32 4
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SUB102_3]], i32 5
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 5>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[TMP13:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31>
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr <16 x i32> [[TMP14]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP16:%.*]] = and <16 x i32> [[TMP15]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
+; CHECK-NEXT:    [[TMP17:%.*]] = mul nuw <16 x i32> [[TMP16]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP18:%.*]] = add <16 x i32> [[TMP17]], [[TMP14]]
+; CHECK-NEXT:    [[TMP19:%.*]] = xor <16 x i32> [[TMP18]], [[TMP17]]
+; CHECK-NEXT:    [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP19]])
+; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP20]], 16
 ; CHECK-NEXT:    [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]]
 ; CHECK-NEXT:    [[SHR120:%.*]] = lshr i32 [[ADD119]], 1
 ; CHECK-NEXT:    ret i32 [[SHR120]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
index bc8adeda08f4f..ce195e8d75242 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
@@ -13,31 +13,27 @@ define void @get_block(i32 %y_pos) local_unnamed_addr #0 {
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[SUB14:%.*]] = sub nsw i32 [[Y_POS:%.*]], undef
 ; CHECK-NEXT:    [[SHR15:%.*]] = ashr i32 [[SUB14]], 2
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[SHR15]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[SUB14]], i32 1
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], <i32 0, i32 -1, i32 -5, i32 -9>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[SHR15]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 undef, i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 undef, i32 2
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 undef, i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP6]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], undef
-; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> undef
-; CHECK-NEXT:    [[TMP10:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
-; CHECK-NEXT:    [[TMP11:%.*]] = trunc <4 x i64> [[TMP10]] to <4 x i32>
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], poison
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> poison
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
+; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
+; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2
 ; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[TMP11]], i32 1
+; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
 ; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[TMP11]], i32 2
-; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3
-; CHECK-NEXT:    [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP19]]
+; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]]
 ; CHECK-NEXT:    unreachable
 ;
 entry:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
index b8e5c3e89f6f7..88825a7cf40f3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -14,12 +14,12 @@ define void @Rf_GReset() {
 ; CHECK-LABEL: @Rf_GReset(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @d, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
 ; CHECK-NEXT:    br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]]
@@ -55,12 +55,12 @@ define void @Rf_GReset_unary_fneg() {
 ; CHECK-LABEL: @Rf_GReset_unary_fneg(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, double* @d, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[TMP0]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = fneg <2 x double> [[TMP1]]
 ; CHECK-NEXT:    br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], poison
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt double [[TMP5]], [[TMP6]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
index 0a752889065f0..a94ba522eaa92 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_landingpad.ll
@@ -12,12 +12,12 @@ define void @test_phi_in_landingpad() personality i8*
 ; CHECK-NEXT:    invoke void @foo()
 ; CHECK-NEXT:    to label [[DONE:%.*]] unwind label [[LPAD]]
 ; CHECK:       lpad:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ undef, [[ENTRY:%.*]] ], [ undef, [[INNER]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x double> [ poison, [[ENTRY:%.*]] ], [ poison, [[INNER]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { i8*, i32 }
 ; CHECK-NEXT:    catch i8* null
 ; CHECK-NEXT:    br label [[DONE]]
 ; CHECK:       done:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ undef, [[INNER]] ], [ [[TMP0]], [[LPAD]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ poison, [[INNER]] ], [ [[TMP0]], [[LPAD]] ]
 ; CHECK-NEXT:    ret void
 ;
   bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
index e283628949107..8871133cdf876 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
@@ -103,11 +103,11 @@ define void @pr35497() local_unnamed_addr #0 {
 ; AVX-NEXT:    [[ADD:%.*]] = add i64 undef, undef
 ; AVX-NEXT:    store i64 [[ADD]], i64* undef, align 1
 ; AVX-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5
-; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> <i64 undef, i64 poison>, i64 [[TMP0]], i32 1
+; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i32 1
 ; AVX-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], <i64 2, i64 2>
 ; AVX-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>
 ; AVX-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4
-; AVX-NEXT:    [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer
+; AVX-NEXT:    [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], poison
 ; AVX-NEXT:    [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1
 ; AVX-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
 ; AVX-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i32 0

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
index a39a00b1f019d..6307fd1a0543f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -88,8 +88,8 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[B:%.*]]
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[FNEG]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[C:%.*]], i32 1
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[FNEG]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B]], i32 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP1]], [[TMP3]]
@@ -137,8 +137,8 @@ define i1 @fcmp_lt(double %a, double %b, double %c) {
 ; CHECK-LABEL: @fcmp_lt(
 ; CHECK-NEXT:    [[FNEG:%.*]] = fneg double [[B:%.*]]
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[A:%.*]], 2.000000e+00
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[FNEG]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C:%.*]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[C:%.*]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[FNEG]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP2]], [[TMP4]]

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
index f1f0e2e9bb0f2..7bfee28d0310a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -32,12 +32,10 @@ define void @fextr(i16* %ptr) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[V0]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i16> [[TMP8]], i16 undef, i32 1
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP10:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
-; CHECK-NEXT:    store <8 x i16> [[TMP10]], <8 x i16>* [[TMP11]], align 2
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
+; CHECK-NEXT:    store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2
 ; CHECK-NEXT:    ret void
 ;
 ; YAML:      Pass:            slp-vectorizer

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
index 85cb70550d138..3cdb56d4545b2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll
@@ -12,22 +12,21 @@ define void @hoge() {
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[T:%.*]] = select i1 undef, i16 undef, i16 15
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i16> poison, i16 [[T]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i16> [[TMP0]], i16 undef, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32>
-; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <2 x i32> <i32 undef, i32 63>, [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef
-; CHECK-NEXT:    [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 15, i32 31, i32 47, i32 undef>
-; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]])
-; CHECK-NEXT:    [[T19:%.*]] = select i1 undef, i32 [[TMP6]], i32 undef
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i16> [[TMP0]] to <2 x i32>
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> <i32 poison, i32 63>, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], poison
+; CHECK-NEXT:    [[SHUFFLE10:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[SHUFFLE10]], <i32 15, i32 31, i32 47, i32 poison>
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[T19:%.*]] = select i1 undef, i32 [[TMP5]], i32 undef
 ; CHECK-NEXT:    [[T20:%.*]] = icmp sgt i32 [[T19]], 63
-; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP10]], undef
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP10]], i32 undef
+; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <2 x i32> poison, [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = sub <2 x i32> [[TMP6]], poison
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[SHUFFLE]], <i32 -49, i32 -33, i32 -33, i32 -17>
+; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP9]], undef
+; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP9]], i32 undef
 ; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = icmp slt i32 [[OP_EXTRA1]], undef
 ; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef
 ; CHECK-NEXT:    [[OP_EXTRA4:%.*]] = icmp slt i32 [[OP_EXTRA3]], undef

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
index ad6bbb3a42b15..9adc9fbdbd7de 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
@@ -15,9 +15,9 @@ define void @wombat(i32* %ptr, i32* %ptr1) {
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 4
 ; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 5
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> undef, <4 x i32> [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> poison, <4 x i32> [[SHUFFLE1]]
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> poison, <4 x i32> zeroinitializer, <4 x i32> [[TMP4]]
 ; CHECK-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[PTR1]], i32 6
 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP27]] to <4 x i32>*
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP6]], align 8
@@ -66,7 +66,7 @@ define internal i32 @ipvideo_decode_block_opcode_0xD_16() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i16> [ undef, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i16> [ poison, [[ENTRY:%.*]] ], [ [[SHRINK_SHUFFLE:%.*]], [[IF_END:%.*]] ]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index 5548a828b778a..3b3609826c3cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -279,9 +279,9 @@ define void @tiny_vector_gather(i32 *%a, i32 *%v1, i32 *%v2) {
 ; CHECK-NEXT:    [[PTR5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 5
 ; CHECK-NEXT:    [[PTR6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 6
 ; CHECK-NEXT:    [[PTR7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 7
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP2]], i32 1
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> poison, i32 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[TMP2]], i32 1
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[PTR0]] to <8 x i32>*
 ; CHECK-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP5]], align 16
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
index fc41ec9e93869..583a896374db4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug-inseltpoison.ll
@@ -13,27 +13,27 @@ define void @test() {
 ; CHECK-NEXT:  bb279:
 ; CHECK-NEXT:    br label [[BB283:%.*]]
 ; CHECK:       bb283:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ poison, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ poison, [[BB279]] ], [ poison, [[EXIT]] ]
 ; CHECK-NEXT:    br label [[BB284:%.*]]
 ; CHECK:       bb284:
 ; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison
 ; CHECK-NEXT:    br label [[BB21_I:%.*]]
 ; CHECK:       bb21.i:
 ; CHECK-NEXT:    br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
 ; CHECK:       bb22.i:
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]]
 ; CHECK-NEXT:    br label [[BB32_I:%.*]]
 ; CHECK:       bb32.i:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
 ; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double undef, double 0.000000e+00>
-; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double poison, double 0.000000e+00>
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> poison, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], poison
 ; CHECK-NEXT:    [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
 ; CHECK-NEXT:    br label [[BB283]]
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
index cdde0971b6df6..24d97c32b636d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/value-bug.ll
@@ -13,27 +13,27 @@ define void @test() {
 ; CHECK-NEXT:  bb279:
 ; CHECK-NEXT:    br label [[BB283:%.*]]
 ; CHECK:       bb283:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ undef, [[EXIT]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ poison, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ poison, [[BB279]] ], [ poison, [[EXIT]] ]
 ; CHECK-NEXT:    br label [[BB284:%.*]]
 ; CHECK:       bb284:
 ; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], poison
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], poison
 ; CHECK-NEXT:    br label [[BB21_I:%.*]]
 ; CHECK:       bb21.i:
 ; CHECK-NEXT:    br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
 ; CHECK:       bb22.i:
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> poison, [[TMP4]]
 ; CHECK-NEXT:    br label [[BB32_I:%.*]]
 ; CHECK:       bb32.i:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
 ; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double undef, double 0.000000e+00>
-; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], undef
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], <double poison, double 0.000000e+00>
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> poison, [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], poison
 ; CHECK-NEXT:    [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
 ; CHECK-NEXT:    br label [[BB283]]
 ;

diff  --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
index 01f4e9cb23ed2..22d6892a0f25a 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
@@ -166,48 +166,43 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX256-NEXT:    [[TMP30:%.*]] = fadd <8 x float> zeroinitializer, [[TMP29]]
 ; MAX256-NEXT:    [[TMP31:%.*]] = fmul <8 x float> [[TMP26]], [[TMP12]]
 ; MAX256-NEXT:    [[TMP32:%.*]] = fadd <8 x float> zeroinitializer, [[TMP31]]
-; MAX256-NEXT:    [[TMP33:%.*]] = extractelement <8 x float> [[TMP14]], i32 0
-; MAX256-NEXT:    [[TMP34:%.*]] = insertelement <8 x float> poison, float [[TMP33]], i32 0
-; MAX256-NEXT:    [[TMP35:%.*]] = extractelement <8 x float> [[TMP14]], i32 1
-; MAX256-NEXT:    [[TMP36:%.*]] = insertelement <8 x float> [[TMP34]], float [[TMP35]], i32 1
-; MAX256-NEXT:    [[TMP37:%.*]] = insertelement <8 x float> [[TMP36]], float [[FVAL]], i32 2
-; MAX256-NEXT:    [[TMP38:%.*]] = insertelement <8 x float> [[TMP37]], float [[FVAL]], i32 3
-; MAX256-NEXT:    [[TMP39:%.*]] = extractelement <8 x float> [[TMP14]], i32 4
-; MAX256-NEXT:    [[TMP40:%.*]] = insertelement <8 x float> [[TMP38]], float [[TMP39]], i32 4
-; MAX256-NEXT:    [[TMP41:%.*]] = extractelement <8 x float> [[TMP14]], i32 5
-; MAX256-NEXT:    [[TMP42:%.*]] = insertelement <8 x float> [[TMP40]], float [[TMP41]], i32 5
-; MAX256-NEXT:    [[TMP43:%.*]] = insertelement <8 x float> [[TMP42]], float [[FVAL]], i32 6
-; MAX256-NEXT:    [[TMP44:%.*]] = insertelement <8 x float> [[TMP43]], float [[FVAL]], i32 7
-; MAX256-NEXT:    [[TMP45:%.*]] = extractelement <8 x float> [[TMP28]], i32 2
-; MAX256-NEXT:    [[TMP46:%.*]] = insertelement <8 x float> [[TMP6]], float [[TMP45]], i32 2
-; MAX256-NEXT:    [[TMP47:%.*]] = extractelement <8 x float> [[TMP28]], i32 3
-; MAX256-NEXT:    [[TMP48:%.*]] = insertelement <8 x float> [[TMP46]], float [[TMP47]], i32 3
-; MAX256-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP48]], float [[FVAL]], i32 4
-; MAX256-NEXT:    [[TMP50:%.*]] = insertelement <8 x float> [[TMP49]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP51:%.*]] = extractelement <8 x float> [[TMP28]], i32 6
-; MAX256-NEXT:    [[TMP52:%.*]] = insertelement <8 x float> [[TMP50]], float [[TMP51]], i32 6
-; MAX256-NEXT:    [[TMP53:%.*]] = extractelement <8 x float> [[TMP28]], i32 7
-; MAX256-NEXT:    [[TMP54:%.*]] = insertelement <8 x float> [[TMP52]], float [[TMP53]], i32 7
-; MAX256-NEXT:    [[TMP55:%.*]] = extractelement <8 x float> [[TMP30]], i32 2
-; MAX256-NEXT:    [[TMP56:%.*]] = insertelement <8 x float> [[TMP6]], float [[TMP55]], i32 2
-; MAX256-NEXT:    [[TMP57:%.*]] = extractelement <8 x float> [[TMP30]], i32 3
-; MAX256-NEXT:    [[TMP58:%.*]] = insertelement <8 x float> [[TMP56]], float [[TMP57]], i32 3
-; MAX256-NEXT:    [[TMP59:%.*]] = insertelement <8 x float> [[TMP58]], float [[FVAL]], i32 4
-; MAX256-NEXT:    [[TMP60:%.*]] = insertelement <8 x float> [[TMP59]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP61:%.*]] = extractelement <8 x float> [[TMP30]], i32 6
-; MAX256-NEXT:    [[TMP62:%.*]] = insertelement <8 x float> [[TMP60]], float [[TMP61]], i32 6
-; MAX256-NEXT:    [[TMP63:%.*]] = extractelement <8 x float> [[TMP30]], i32 7
-; MAX256-NEXT:    [[TMP64:%.*]] = insertelement <8 x float> [[TMP62]], float [[TMP63]], i32 7
-; MAX256-NEXT:    [[TMP65:%.*]] = extractelement <8 x float> [[TMP32]], i32 2
-; MAX256-NEXT:    [[TMP66:%.*]] = insertelement <8 x float> [[TMP6]], float [[TMP65]], i32 2
-; MAX256-NEXT:    [[TMP67:%.*]] = extractelement <8 x float> [[TMP32]], i32 3
-; MAX256-NEXT:    [[TMP68:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP67]], i32 3
-; MAX256-NEXT:    [[TMP69:%.*]] = insertelement <8 x float> [[TMP68]], float [[FVAL]], i32 4
-; MAX256-NEXT:    [[TMP70:%.*]] = insertelement <8 x float> [[TMP69]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP71:%.*]] = extractelement <8 x float> [[TMP32]], i32 6
-; MAX256-NEXT:    [[TMP72:%.*]] = insertelement <8 x float> [[TMP70]], float [[TMP71]], i32 6
-; MAX256-NEXT:    [[TMP73:%.*]] = extractelement <8 x float> [[TMP32]], i32 7
-; MAX256-NEXT:    [[TMP74:%.*]] = insertelement <8 x float> [[TMP72]], float [[TMP73]], i32 7
+; MAX256-NEXT:    [[TMP33:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 2
+; MAX256-NEXT:    [[TMP34:%.*]] = extractelement <8 x float> [[TMP14]], i32 0
+; MAX256-NEXT:    [[TMP35:%.*]] = insertelement <8 x float> [[TMP33]], float [[TMP34]], i32 0
+; MAX256-NEXT:    [[TMP36:%.*]] = extractelement <8 x float> [[TMP14]], i32 1
+; MAX256-NEXT:    [[TMP37:%.*]] = insertelement <8 x float> [[TMP35]], float [[TMP36]], i32 1
+; MAX256-NEXT:    [[TMP38:%.*]] = extractelement <8 x float> [[TMP14]], i32 4
+; MAX256-NEXT:    [[TMP39:%.*]] = insertelement <8 x float> [[TMP37]], float [[TMP38]], i32 3
+; MAX256-NEXT:    [[TMP40:%.*]] = extractelement <8 x float> [[TMP14]], i32 5
+; MAX256-NEXT:    [[TMP41:%.*]] = insertelement <8 x float> [[TMP39]], float [[TMP40]], i32 4
+; MAX256-NEXT:    [[SHUFFLE3:%.*]] = shufflevector <8 x float> [[TMP41]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 3, i32 4, i32 2, i32 2>
+; MAX256-NEXT:    [[TMP42:%.*]] = extractelement <8 x float> [[TMP28]], i32 2
+; MAX256-NEXT:    [[TMP43:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP42]], i32 1
+; MAX256-NEXT:    [[TMP44:%.*]] = extractelement <8 x float> [[TMP28]], i32 3
+; MAX256-NEXT:    [[TMP45:%.*]] = insertelement <8 x float> [[TMP43]], float [[TMP44]], i32 2
+; MAX256-NEXT:    [[TMP46:%.*]] = extractelement <8 x float> [[TMP28]], i32 6
+; MAX256-NEXT:    [[TMP47:%.*]] = insertelement <8 x float> [[TMP45]], float [[TMP46]], i32 3
+; MAX256-NEXT:    [[TMP48:%.*]] = extractelement <8 x float> [[TMP28]], i32 7
+; MAX256-NEXT:    [[TMP49:%.*]] = insertelement <8 x float> [[TMP47]], float [[TMP48]], i32 4
+; MAX256-NEXT:    [[SHUFFLE6:%.*]] = shufflevector <8 x float> [[TMP49]], <8 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 0, i32 0, i32 3, i32 4>
+; MAX256-NEXT:    [[TMP50:%.*]] = extractelement <8 x float> [[TMP30]], i32 2
+; MAX256-NEXT:    [[TMP51:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP50]], i32 1
+; MAX256-NEXT:    [[TMP52:%.*]] = extractelement <8 x float> [[TMP30]], i32 3
+; MAX256-NEXT:    [[TMP53:%.*]] = insertelement <8 x float> [[TMP51]], float [[TMP52]], i32 2
+; MAX256-NEXT:    [[TMP54:%.*]] = extractelement <8 x float> [[TMP30]], i32 6
+; MAX256-NEXT:    [[TMP55:%.*]] = insertelement <8 x float> [[TMP53]], float [[TMP54]], i32 3
+; MAX256-NEXT:    [[TMP56:%.*]] = extractelement <8 x float> [[TMP30]], i32 7
+; MAX256-NEXT:    [[TMP57:%.*]] = insertelement <8 x float> [[TMP55]], float [[TMP56]], i32 4
+; MAX256-NEXT:    [[SHUFFLE9:%.*]] = shufflevector <8 x float> [[TMP57]], <8 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 0, i32 0, i32 3, i32 4>
+; MAX256-NEXT:    [[TMP58:%.*]] = extractelement <8 x float> [[TMP32]], i32 2
+; MAX256-NEXT:    [[TMP59:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP58]], i32 1
+; MAX256-NEXT:    [[TMP60:%.*]] = extractelement <8 x float> [[TMP32]], i32 3
+; MAX256-NEXT:    [[TMP61:%.*]] = insertelement <8 x float> [[TMP59]], float [[TMP60]], i32 2
+; MAX256-NEXT:    [[TMP62:%.*]] = extractelement <8 x float> [[TMP32]], i32 6
+; MAX256-NEXT:    [[TMP63:%.*]] = insertelement <8 x float> [[TMP61]], float [[TMP62]], i32 3
+; MAX256-NEXT:    [[TMP64:%.*]] = extractelement <8 x float> [[TMP32]], i32 7
+; MAX256-NEXT:    [[TMP65:%.*]] = insertelement <8 x float> [[TMP63]], float [[TMP64]], i32 4
+; MAX256-NEXT:    [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP65]], <8 x float> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 2, i32 0, i32 0, i32 3, i32 4>
 ; MAX256-NEXT:    switch i32 undef, label [[BB5:%.*]] [
 ; MAX256-NEXT:    i32 0, label [[BB2:%.*]]
 ; MAX256-NEXT:    i32 1, label [[BB3:%.*]]
@@ -216,94 +211,74 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX256:       bb3:
 ; MAX256-NEXT:    br label [[BB2]]
 ; MAX256:       bb4:
-; MAX256-NEXT:    [[TMP75:%.*]] = insertelement <8 x float> [[TMP34]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP76:%.*]] = insertelement <8 x float> [[TMP75]], float [[FVAL]], i32 2
-; MAX256-NEXT:    [[TMP77:%.*]] = extractelement <8 x float> [[TMP14]], i32 3
-; MAX256-NEXT:    [[TMP78:%.*]] = insertelement <8 x float> [[TMP76]], float [[TMP77]], i32 3
-; MAX256-NEXT:    [[TMP79:%.*]] = insertelement <8 x float> [[TMP78]], float [[TMP39]], i32 4
-; MAX256-NEXT:    [[TMP80:%.*]] = insertelement <8 x float> [[TMP79]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP81:%.*]] = insertelement <8 x float> [[TMP80]], float [[FVAL]], i32 6
-; MAX256-NEXT:    [[TMP82:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
-; MAX256-NEXT:    [[TMP83:%.*]] = insertelement <8 x float> [[TMP81]], float [[TMP82]], i32 7
-; MAX256-NEXT:    [[TMP84:%.*]] = extractelement <8 x float> [[TMP28]], i32 0
-; MAX256-NEXT:    [[TMP85:%.*]] = insertelement <8 x float> poison, float [[TMP84]], i32 0
-; MAX256-NEXT:    [[TMP86:%.*]] = insertelement <8 x float> [[TMP85]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP87:%.*]] = insertelement <8 x float> [[TMP86]], float [[FVAL]], i32 2
-; MAX256-NEXT:    [[TMP88:%.*]] = insertelement <8 x float> [[TMP87]], float [[TMP47]], i32 3
-; MAX256-NEXT:    [[TMP89:%.*]] = extractelement <8 x float> [[TMP28]], i32 4
-; MAX256-NEXT:    [[TMP90:%.*]] = insertelement <8 x float> [[TMP88]], float [[TMP89]], i32 4
-; MAX256-NEXT:    [[TMP91:%.*]] = insertelement <8 x float> [[TMP90]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP92:%.*]] = insertelement <8 x float> [[TMP91]], float [[FVAL]], i32 6
-; MAX256-NEXT:    [[TMP93:%.*]] = insertelement <8 x float> [[TMP92]], float [[TMP53]], i32 7
-; MAX256-NEXT:    [[TMP94:%.*]] = extractelement <8 x float> [[TMP30]], i32 0
-; MAX256-NEXT:    [[TMP95:%.*]] = insertelement <8 x float> poison, float [[TMP94]], i32 0
-; MAX256-NEXT:    [[TMP96:%.*]] = insertelement <8 x float> [[TMP95]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP97:%.*]] = insertelement <8 x float> [[TMP96]], float [[FVAL]], i32 2
-; MAX256-NEXT:    [[TMP98:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP57]], i32 3
-; MAX256-NEXT:    [[TMP99:%.*]] = extractelement <8 x float> [[TMP30]], i32 4
-; MAX256-NEXT:    [[TMP100:%.*]] = insertelement <8 x float> [[TMP98]], float [[TMP99]], i32 4
-; MAX256-NEXT:    [[TMP101:%.*]] = insertelement <8 x float> [[TMP100]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP102:%.*]] = insertelement <8 x float> [[TMP101]], float [[FVAL]], i32 6
-; MAX256-NEXT:    [[TMP103:%.*]] = insertelement <8 x float> [[TMP102]], float [[TMP63]], i32 7
-; MAX256-NEXT:    [[TMP104:%.*]] = extractelement <8 x float> [[TMP32]], i32 0
-; MAX256-NEXT:    [[TMP105:%.*]] = insertelement <8 x float> poison, float [[TMP104]], i32 0
-; MAX256-NEXT:    [[TMP106:%.*]] = insertelement <8 x float> [[TMP105]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP107:%.*]] = insertelement <8 x float> [[TMP106]], float [[FVAL]], i32 2
-; MAX256-NEXT:    [[TMP108:%.*]] = insertelement <8 x float> [[TMP107]], float [[TMP67]], i32 3
-; MAX256-NEXT:    [[TMP109:%.*]] = extractelement <8 x float> [[TMP32]], i32 4
-; MAX256-NEXT:    [[TMP110:%.*]] = insertelement <8 x float> [[TMP108]], float [[TMP109]], i32 4
-; MAX256-NEXT:    [[TMP111:%.*]] = insertelement <8 x float> [[TMP110]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP112:%.*]] = insertelement <8 x float> [[TMP111]], float [[FVAL]], i32 6
-; MAX256-NEXT:    [[TMP113:%.*]] = insertelement <8 x float> [[TMP112]], float [[TMP73]], i32 7
+; MAX256-NEXT:    [[TMP66:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 1
+; MAX256-NEXT:    [[TMP67:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP34]], i32 0
+; MAX256-NEXT:    [[TMP68:%.*]] = extractelement <8 x float> [[TMP14]], i32 3
+; MAX256-NEXT:    [[TMP69:%.*]] = insertelement <8 x float> [[TMP67]], float [[TMP68]], i32 2
+; MAX256-NEXT:    [[TMP70:%.*]] = insertelement <8 x float> [[TMP69]], float [[TMP38]], i32 3
+; MAX256-NEXT:    [[TMP71:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
+; MAX256-NEXT:    [[TMP72:%.*]] = insertelement <8 x float> [[TMP70]], float [[TMP71]], i32 4
+; MAX256-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <8 x float> [[TMP72]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 4>
+; MAX256-NEXT:    [[TMP73:%.*]] = extractelement <8 x float> [[TMP28]], i32 0
+; MAX256-NEXT:    [[TMP74:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP73]], i32 0
+; MAX256-NEXT:    [[TMP75:%.*]] = insertelement <8 x float> [[TMP74]], float [[TMP44]], i32 2
+; MAX256-NEXT:    [[TMP76:%.*]] = extractelement <8 x float> [[TMP28]], i32 4
+; MAX256-NEXT:    [[TMP77:%.*]] = insertelement <8 x float> [[TMP75]], float [[TMP76]], i32 3
+; MAX256-NEXT:    [[TMP78:%.*]] = insertelement <8 x float> [[TMP77]], float [[TMP48]], i32 4
+; MAX256-NEXT:    [[SHUFFLE4:%.*]] = shufflevector <8 x float> [[TMP78]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 4>
+; MAX256-NEXT:    [[TMP79:%.*]] = extractelement <8 x float> [[TMP30]], i32 0
+; MAX256-NEXT:    [[TMP80:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP79]], i32 0
+; MAX256-NEXT:    [[TMP81:%.*]] = insertelement <8 x float> [[TMP80]], float [[TMP52]], i32 2
+; MAX256-NEXT:    [[TMP82:%.*]] = extractelement <8 x float> [[TMP30]], i32 4
+; MAX256-NEXT:    [[TMP83:%.*]] = insertelement <8 x float> [[TMP81]], float [[TMP82]], i32 3
+; MAX256-NEXT:    [[TMP84:%.*]] = insertelement <8 x float> [[TMP83]], float [[TMP56]], i32 4
+; MAX256-NEXT:    [[SHUFFLE7:%.*]] = shufflevector <8 x float> [[TMP84]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 4>
+; MAX256-NEXT:    [[TMP85:%.*]] = extractelement <8 x float> [[TMP32]], i32 0
+; MAX256-NEXT:    [[TMP86:%.*]] = insertelement <8 x float> [[TMP66]], float [[TMP85]], i32 0
+; MAX256-NEXT:    [[TMP87:%.*]] = insertelement <8 x float> [[TMP86]], float [[TMP60]], i32 2
+; MAX256-NEXT:    [[TMP88:%.*]] = extractelement <8 x float> [[TMP32]], i32 4
+; MAX256-NEXT:    [[TMP89:%.*]] = insertelement <8 x float> [[TMP87]], float [[TMP88]], i32 3
+; MAX256-NEXT:    [[TMP90:%.*]] = insertelement <8 x float> [[TMP89]], float [[TMP64]], i32 4
+; MAX256-NEXT:    [[SHUFFLE10:%.*]] = shufflevector <8 x float> [[TMP90]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 4>
 ; MAX256-NEXT:    br label [[BB2]]
 ; MAX256:       bb5:
-; MAX256-NEXT:    [[TMP114:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP35]], i32 1
-; MAX256-NEXT:    [[TMP115:%.*]] = insertelement <8 x float> [[TMP114]], float [[FVAL]], i32 2
-; MAX256-NEXT:    [[TMP116:%.*]] = extractelement <8 x float> [[TMP14]], i32 3
-; MAX256-NEXT:    [[TMP117:%.*]] = insertelement <8 x float> [[TMP115]], float [[TMP116]], i32 3
-; MAX256-NEXT:    [[TMP118:%.*]] = insertelement <8 x float> [[TMP117]], float [[FVAL]], i32 4
-; MAX256-NEXT:    [[TMP119:%.*]] = insertelement <8 x float> [[TMP118]], float [[TMP41]], i32 5
-; MAX256-NEXT:    [[TMP120:%.*]] = insertelement <8 x float> [[TMP119]], float [[FVAL]], i32 6
-; MAX256-NEXT:    [[TMP121:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
-; MAX256-NEXT:    [[TMP122:%.*]] = insertelement <8 x float> [[TMP120]], float [[TMP121]], i32 7
-; MAX256-NEXT:    [[TMP123:%.*]] = extractelement <8 x float> [[TMP28]], i32 0
-; MAX256-NEXT:    [[TMP124:%.*]] = insertelement <8 x float> poison, float [[TMP123]], i32 0
-; MAX256-NEXT:    [[TMP125:%.*]] = insertelement <8 x float> [[TMP124]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP126:%.*]] = insertelement <8 x float> [[TMP125]], float [[TMP45]], i32 2
-; MAX256-NEXT:    [[TMP127:%.*]] = insertelement <8 x float> [[TMP126]], float [[FVAL]], i32 3
-; MAX256-NEXT:    [[TMP128:%.*]] = extractelement <8 x float> [[TMP28]], i32 4
-; MAX256-NEXT:    [[TMP129:%.*]] = insertelement <8 x float> [[TMP127]], float [[TMP128]], i32 4
-; MAX256-NEXT:    [[TMP130:%.*]] = insertelement <8 x float> [[TMP129]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP131:%.*]] = insertelement <8 x float> [[TMP130]], float [[TMP51]], i32 6
-; MAX256-NEXT:    [[TMP132:%.*]] = insertelement <8 x float> [[TMP131]], float [[FVAL]], i32 7
-; MAX256-NEXT:    [[TMP133:%.*]] = extractelement <8 x float> [[TMP30]], i32 0
-; MAX256-NEXT:    [[TMP134:%.*]] = insertelement <8 x float> poison, float [[TMP133]], i32 0
-; MAX256-NEXT:    [[TMP135:%.*]] = insertelement <8 x float> [[TMP134]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP136:%.*]] = insertelement <8 x float> [[TMP135]], float [[TMP55]], i32 2
-; MAX256-NEXT:    [[TMP137:%.*]] = insertelement <8 x float> [[TMP136]], float [[FVAL]], i32 3
-; MAX256-NEXT:    [[TMP138:%.*]] = extractelement <8 x float> [[TMP30]], i32 4
-; MAX256-NEXT:    [[TMP139:%.*]] = insertelement <8 x float> [[TMP137]], float [[TMP138]], i32 4
-; MAX256-NEXT:    [[TMP140:%.*]] = insertelement <8 x float> [[TMP139]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP141:%.*]] = insertelement <8 x float> [[TMP140]], float [[TMP61]], i32 6
-; MAX256-NEXT:    [[TMP142:%.*]] = insertelement <8 x float> [[TMP141]], float [[FVAL]], i32 7
-; MAX256-NEXT:    [[TMP143:%.*]] = extractelement <8 x float> [[TMP32]], i32 0
-; MAX256-NEXT:    [[TMP144:%.*]] = insertelement <8 x float> poison, float [[TMP143]], i32 0
-; MAX256-NEXT:    [[TMP145:%.*]] = insertelement <8 x float> [[TMP144]], float [[FVAL]], i32 1
-; MAX256-NEXT:    [[TMP146:%.*]] = insertelement <8 x float> [[TMP145]], float [[TMP65]], i32 2
-; MAX256-NEXT:    [[TMP147:%.*]] = insertelement <8 x float> [[TMP146]], float [[FVAL]], i32 3
-; MAX256-NEXT:    [[TMP148:%.*]] = extractelement <8 x float> [[TMP32]], i32 4
-; MAX256-NEXT:    [[TMP149:%.*]] = insertelement <8 x float> [[TMP147]], float [[TMP148]], i32 4
-; MAX256-NEXT:    [[TMP150:%.*]] = insertelement <8 x float> [[TMP149]], float [[FVAL]], i32 5
-; MAX256-NEXT:    [[TMP151:%.*]] = insertelement <8 x float> [[TMP150]], float [[TMP71]], i32 6
-; MAX256-NEXT:    [[TMP152:%.*]] = insertelement <8 x float> [[TMP151]], float [[FVAL]], i32 7
+; MAX256-NEXT:    [[TMP91:%.*]] = insertelement <8 x float> [[TMP5]], float [[TMP36]], i32 1
+; MAX256-NEXT:    [[TMP92:%.*]] = extractelement <8 x float> [[TMP14]], i32 3
+; MAX256-NEXT:    [[TMP93:%.*]] = insertelement <8 x float> [[TMP91]], float [[TMP92]], i32 2
+; MAX256-NEXT:    [[TMP94:%.*]] = insertelement <8 x float> [[TMP93]], float [[TMP40]], i32 3
+; MAX256-NEXT:    [[TMP95:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
+; MAX256-NEXT:    [[TMP96:%.*]] = insertelement <8 x float> [[TMP94]], float [[TMP95]], i32 4
+; MAX256-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x float> [[TMP96]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 2, i32 0, i32 3, i32 0, i32 4>
+; MAX256-NEXT:    [[TMP97:%.*]] = insertelement <8 x float> poison, float [[FVAL]], i32 1
+; MAX256-NEXT:    [[TMP98:%.*]] = extractelement <8 x float> [[TMP28]], i32 0
+; MAX256-NEXT:    [[TMP99:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP98]], i32 0
+; MAX256-NEXT:    [[TMP100:%.*]] = insertelement <8 x float> [[TMP99]], float [[TMP42]], i32 2
+; MAX256-NEXT:    [[TMP101:%.*]] = extractelement <8 x float> [[TMP28]], i32 4
+; MAX256-NEXT:    [[TMP102:%.*]] = insertelement <8 x float> [[TMP100]], float [[TMP101]], i32 3
+; MAX256-NEXT:    [[TMP103:%.*]] = insertelement <8 x float> [[TMP102]], float [[TMP46]], i32 4
+; MAX256-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP103]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 3, i32 1, i32 4, i32 1>
+; MAX256-NEXT:    [[TMP104:%.*]] = extractelement <8 x float> [[TMP30]], i32 0
+; MAX256-NEXT:    [[TMP105:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP104]], i32 0
+; MAX256-NEXT:    [[TMP106:%.*]] = insertelement <8 x float> [[TMP105]], float [[TMP50]], i32 2
+; MAX256-NEXT:    [[TMP107:%.*]] = extractelement <8 x float> [[TMP30]], i32 4
+; MAX256-NEXT:    [[TMP108:%.*]] = insertelement <8 x float> [[TMP106]], float [[TMP107]], i32 3
+; MAX256-NEXT:    [[TMP109:%.*]] = insertelement <8 x float> [[TMP108]], float [[TMP54]], i32 4
+; MAX256-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP109]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 3, i32 1, i32 4, i32 1>
+; MAX256-NEXT:    [[TMP110:%.*]] = extractelement <8 x float> [[TMP32]], i32 0
+; MAX256-NEXT:    [[TMP111:%.*]] = insertelement <8 x float> [[TMP97]], float [[TMP110]], i32 0
+; MAX256-NEXT:    [[TMP112:%.*]] = insertelement <8 x float> [[TMP111]], float [[TMP58]], i32 2
+; MAX256-NEXT:    [[TMP113:%.*]] = extractelement <8 x float> [[TMP32]], i32 4
+; MAX256-NEXT:    [[TMP114:%.*]] = insertelement <8 x float> [[TMP112]], float [[TMP113]], i32 3
+; MAX256-NEXT:    [[TMP115:%.*]] = insertelement <8 x float> [[TMP114]], float [[TMP62]], i32 4
+; MAX256-NEXT:    [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP115]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 1, i32 3, i32 1, i32 4, i32 1>
 ; MAX256-NEXT:    br label [[BB2]]
 ; MAX256:       bb2:
-; MAX256-NEXT:    [[TMP153:%.*]] = phi <8 x float> [ [[TMP14]], [[BB3]] ], [ [[TMP83]], [[BB4]] ], [ [[TMP122]], [[BB5]] ], [ [[TMP44]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP154:%.*]] = phi <8 x float> [ [[TMP28]], [[BB3]] ], [ [[TMP93]], [[BB4]] ], [ [[TMP132]], [[BB5]] ], [ [[TMP54]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP155:%.*]] = phi <8 x float> [ [[TMP30]], [[BB3]] ], [ [[TMP103]], [[BB4]] ], [ [[TMP142]], [[BB5]] ], [ [[TMP64]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP156:%.*]] = phi <8 x float> [ [[TMP32]], [[BB3]] ], [ [[TMP113]], [[BB4]] ], [ [[TMP152]], [[BB5]] ], [ [[TMP74]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP157:%.*]] = extractelement <8 x float> [[TMP156]], i32 6
-; MAX256-NEXT:    store float [[TMP157]], float* undef, align 4
+; MAX256-NEXT:    [[TMP116:%.*]] = phi <8 x float> [ [[TMP14]], [[BB3]] ], [ [[SHUFFLE1]], [[BB4]] ], [ [[SHUFFLE2]], [[BB5]] ], [ [[SHUFFLE3]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP117:%.*]] = phi <8 x float> [ [[TMP28]], [[BB3]] ], [ [[SHUFFLE4]], [[BB4]] ], [ [[SHUFFLE5]], [[BB5]] ], [ [[SHUFFLE6]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP118:%.*]] = phi <8 x float> [ [[TMP30]], [[BB3]] ], [ [[SHUFFLE7]], [[BB4]] ], [ [[SHUFFLE8]], [[BB5]] ], [ [[SHUFFLE9]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP119:%.*]] = phi <8 x float> [ [[TMP32]], [[BB3]] ], [ [[SHUFFLE10]], [[BB4]] ], [ [[SHUFFLE11]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP120:%.*]] = extractelement <8 x float> [[TMP119]], i32 6
+; MAX256-NEXT:    store float [[TMP120]], float* undef, align 4
 ; MAX256-NEXT:    ret void
 ;
 ; MAX1024-LABEL: @phi_float32(
@@ -350,54 +325,40 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX1024-NEXT:    [[TMP36:%.*]] = insertelement <32 x float> [[TMP35]], float [[FVAL]], i32 31
 ; MAX1024-NEXT:    [[TMP37:%.*]] = fmul <32 x float> [[SHUFFLE]], [[TMP36]]
 ; MAX1024-NEXT:    [[TMP38:%.*]] = fadd <32 x float> zeroinitializer, [[TMP37]]
-; MAX1024-NEXT:    [[TMP39:%.*]] = extractelement <32 x float> [[TMP38]], i32 0
-; MAX1024-NEXT:    [[TMP40:%.*]] = insertelement <32 x float> poison, float [[TMP39]], i32 0
-; MAX1024-NEXT:    [[TMP41:%.*]] = extractelement <32 x float> [[TMP38]], i32 1
-; MAX1024-NEXT:    [[TMP42:%.*]] = insertelement <32 x float> [[TMP40]], float [[TMP41]], i32 1
-; MAX1024-NEXT:    [[TMP43:%.*]] = insertelement <32 x float> [[TMP42]], float [[FVAL]], i32 2
-; MAX1024-NEXT:    [[TMP44:%.*]] = insertelement <32 x float> [[TMP43]], float [[FVAL]], i32 3
-; MAX1024-NEXT:    [[TMP45:%.*]] = extractelement <32 x float> [[TMP38]], i32 4
-; MAX1024-NEXT:    [[TMP46:%.*]] = insertelement <32 x float> [[TMP44]], float [[TMP45]], i32 4
-; MAX1024-NEXT:    [[TMP47:%.*]] = extractelement <32 x float> [[TMP38]], i32 5
-; MAX1024-NEXT:    [[TMP48:%.*]] = insertelement <32 x float> [[TMP46]], float [[TMP47]], i32 5
-; MAX1024-NEXT:    [[TMP49:%.*]] = insertelement <32 x float> [[TMP48]], float [[FVAL]], i32 6
-; MAX1024-NEXT:    [[TMP50:%.*]] = insertelement <32 x float> [[TMP49]], float [[FVAL]], i32 7
-; MAX1024-NEXT:    [[TMP51:%.*]] = insertelement <32 x float> [[TMP50]], float [[FVAL]], i32 8
-; MAX1024-NEXT:    [[TMP52:%.*]] = insertelement <32 x float> [[TMP51]], float [[FVAL]], i32 9
-; MAX1024-NEXT:    [[TMP53:%.*]] = extractelement <32 x float> [[TMP38]], i32 10
-; MAX1024-NEXT:    [[TMP54:%.*]] = insertelement <32 x float> [[TMP52]], float [[TMP53]], i32 10
-; MAX1024-NEXT:    [[TMP55:%.*]] = extractelement <32 x float> [[TMP38]], i32 11
-; MAX1024-NEXT:    [[TMP56:%.*]] = insertelement <32 x float> [[TMP54]], float [[TMP55]], i32 11
-; MAX1024-NEXT:    [[TMP57:%.*]] = insertelement <32 x float> [[TMP56]], float [[FVAL]], i32 12
-; MAX1024-NEXT:    [[TMP58:%.*]] = insertelement <32 x float> [[TMP57]], float [[FVAL]], i32 13
-; MAX1024-NEXT:    [[TMP59:%.*]] = extractelement <32 x float> [[TMP38]], i32 14
-; MAX1024-NEXT:    [[TMP60:%.*]] = insertelement <32 x float> [[TMP58]], float [[TMP59]], i32 14
-; MAX1024-NEXT:    [[TMP61:%.*]] = extractelement <32 x float> [[TMP38]], i32 15
-; MAX1024-NEXT:    [[TMP62:%.*]] = insertelement <32 x float> [[TMP60]], float [[TMP61]], i32 15
-; MAX1024-NEXT:    [[TMP63:%.*]] = insertelement <32 x float> [[TMP62]], float [[FVAL]], i32 16
-; MAX1024-NEXT:    [[TMP64:%.*]] = insertelement <32 x float> [[TMP63]], float [[FVAL]], i32 17
-; MAX1024-NEXT:    [[TMP65:%.*]] = extractelement <32 x float> [[TMP38]], i32 18
-; MAX1024-NEXT:    [[TMP66:%.*]] = insertelement <32 x float> [[TMP64]], float [[TMP65]], i32 18
-; MAX1024-NEXT:    [[TMP67:%.*]] = extractelement <32 x float> [[TMP38]], i32 19
-; MAX1024-NEXT:    [[TMP68:%.*]] = insertelement <32 x float> [[TMP66]], float [[TMP67]], i32 19
-; MAX1024-NEXT:    [[TMP69:%.*]] = insertelement <32 x float> [[TMP68]], float [[FVAL]], i32 20
-; MAX1024-NEXT:    [[TMP70:%.*]] = insertelement <32 x float> [[TMP69]], float [[FVAL]], i32 21
-; MAX1024-NEXT:    [[TMP71:%.*]] = extractelement <32 x float> [[TMP38]], i32 22
-; MAX1024-NEXT:    [[TMP72:%.*]] = insertelement <32 x float> [[TMP70]], float [[TMP71]], i32 22
-; MAX1024-NEXT:    [[TMP73:%.*]] = extractelement <32 x float> [[TMP38]], i32 23
-; MAX1024-NEXT:    [[TMP74:%.*]] = insertelement <32 x float> [[TMP72]], float [[TMP73]], i32 23
-; MAX1024-NEXT:    [[TMP75:%.*]] = insertelement <32 x float> [[TMP74]], float [[FVAL]], i32 24
-; MAX1024-NEXT:    [[TMP76:%.*]] = insertelement <32 x float> [[TMP75]], float [[FVAL]], i32 25
-; MAX1024-NEXT:    [[TMP77:%.*]] = extractelement <32 x float> [[TMP38]], i32 26
-; MAX1024-NEXT:    [[TMP78:%.*]] = insertelement <32 x float> [[TMP76]], float [[TMP77]], i32 26
-; MAX1024-NEXT:    [[TMP79:%.*]] = extractelement <32 x float> [[TMP38]], i32 27
-; MAX1024-NEXT:    [[TMP80:%.*]] = insertelement <32 x float> [[TMP78]], float [[TMP79]], i32 27
-; MAX1024-NEXT:    [[TMP81:%.*]] = insertelement <32 x float> [[TMP80]], float [[FVAL]], i32 28
-; MAX1024-NEXT:    [[TMP82:%.*]] = insertelement <32 x float> [[TMP81]], float [[FVAL]], i32 29
-; MAX1024-NEXT:    [[TMP83:%.*]] = extractelement <32 x float> [[TMP38]], i32 30
-; MAX1024-NEXT:    [[TMP84:%.*]] = insertelement <32 x float> [[TMP82]], float [[TMP83]], i32 30
-; MAX1024-NEXT:    [[TMP85:%.*]] = extractelement <32 x float> [[TMP38]], i32 31
-; MAX1024-NEXT:    [[TMP86:%.*]] = insertelement <32 x float> [[TMP84]], float [[TMP85]], i32 31
+; MAX1024-NEXT:    [[TMP39:%.*]] = insertelement <32 x float> poison, float [[FVAL]], i32 2
+; MAX1024-NEXT:    [[TMP40:%.*]] = extractelement <32 x float> [[TMP38]], i32 0
+; MAX1024-NEXT:    [[TMP41:%.*]] = insertelement <32 x float> [[TMP39]], float [[TMP40]], i32 0
+; MAX1024-NEXT:    [[TMP42:%.*]] = extractelement <32 x float> [[TMP38]], i32 1
+; MAX1024-NEXT:    [[TMP43:%.*]] = insertelement <32 x float> [[TMP41]], float [[TMP42]], i32 1
+; MAX1024-NEXT:    [[TMP44:%.*]] = extractelement <32 x float> [[TMP38]], i32 4
+; MAX1024-NEXT:    [[TMP45:%.*]] = insertelement <32 x float> [[TMP43]], float [[TMP44]], i32 3
+; MAX1024-NEXT:    [[TMP46:%.*]] = extractelement <32 x float> [[TMP38]], i32 5
+; MAX1024-NEXT:    [[TMP47:%.*]] = insertelement <32 x float> [[TMP45]], float [[TMP46]], i32 4
+; MAX1024-NEXT:    [[TMP48:%.*]] = extractelement <32 x float> [[TMP38]], i32 10
+; MAX1024-NEXT:    [[TMP49:%.*]] = insertelement <32 x float> [[TMP47]], float [[TMP48]], i32 5
+; MAX1024-NEXT:    [[TMP50:%.*]] = extractelement <32 x float> [[TMP38]], i32 11
+; MAX1024-NEXT:    [[TMP51:%.*]] = insertelement <32 x float> [[TMP49]], float [[TMP50]], i32 6
+; MAX1024-NEXT:    [[TMP52:%.*]] = extractelement <32 x float> [[TMP38]], i32 14
+; MAX1024-NEXT:    [[TMP53:%.*]] = insertelement <32 x float> [[TMP51]], float [[TMP52]], i32 7
+; MAX1024-NEXT:    [[TMP54:%.*]] = extractelement <32 x float> [[TMP38]], i32 15
+; MAX1024-NEXT:    [[TMP55:%.*]] = insertelement <32 x float> [[TMP53]], float [[TMP54]], i32 8
+; MAX1024-NEXT:    [[TMP56:%.*]] = extractelement <32 x float> [[TMP38]], i32 18
+; MAX1024-NEXT:    [[TMP57:%.*]] = insertelement <32 x float> [[TMP55]], float [[TMP56]], i32 9
+; MAX1024-NEXT:    [[TMP58:%.*]] = extractelement <32 x float> [[TMP38]], i32 19
+; MAX1024-NEXT:    [[TMP59:%.*]] = insertelement <32 x float> [[TMP57]], float [[TMP58]], i32 10
+; MAX1024-NEXT:    [[TMP60:%.*]] = extractelement <32 x float> [[TMP38]], i32 22
+; MAX1024-NEXT:    [[TMP61:%.*]] = insertelement <32 x float> [[TMP59]], float [[TMP60]], i32 11
+; MAX1024-NEXT:    [[TMP62:%.*]] = extractelement <32 x float> [[TMP38]], i32 23
+; MAX1024-NEXT:    [[TMP63:%.*]] = insertelement <32 x float> [[TMP61]], float [[TMP62]], i32 12
+; MAX1024-NEXT:    [[TMP64:%.*]] = extractelement <32 x float> [[TMP38]], i32 26
+; MAX1024-NEXT:    [[TMP65:%.*]] = insertelement <32 x float> [[TMP63]], float [[TMP64]], i32 13
+; MAX1024-NEXT:    [[TMP66:%.*]] = extractelement <32 x float> [[TMP38]], i32 27
+; MAX1024-NEXT:    [[TMP67:%.*]] = insertelement <32 x float> [[TMP65]], float [[TMP66]], i32 14
+; MAX1024-NEXT:    [[TMP68:%.*]] = extractelement <32 x float> [[TMP38]], i32 30
+; MAX1024-NEXT:    [[TMP69:%.*]] = insertelement <32 x float> [[TMP67]], float [[TMP68]], i32 15
+; MAX1024-NEXT:    [[TMP70:%.*]] = extractelement <32 x float> [[TMP38]], i32 31
+; MAX1024-NEXT:    [[TMP71:%.*]] = insertelement <32 x float> [[TMP69]], float [[TMP70]], i32 16
+; MAX1024-NEXT:    [[SHUFFLE3:%.*]] = shufflevector <32 x float> [[TMP71]], <32 x float> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 2, i32 3, i32 4, i32 2, i32 2, i32 2, i32 2, i32 5, i32 6, i32 2, i32 2, i32 7, i32 8, i32 2, i32 2, i32 9, i32 10, i32 2, i32 2, i32 11, i32 12, i32 2, i32 2, i32 13, i32 14, i32 2, i32 2, i32 15, i32 16>
 ; MAX1024-NEXT:    switch i32 undef, label [[BB5:%.*]] [
 ; MAX1024-NEXT:    i32 0, label [[BB2:%.*]]
 ; MAX1024-NEXT:    i32 1, label [[BB3:%.*]]
@@ -406,91 +367,64 @@ define void @phi_float32(half %hval, float %fval) {
 ; MAX1024:       bb3:
 ; MAX1024-NEXT:    br label [[BB2]]
 ; MAX1024:       bb4:
-; MAX1024-NEXT:    [[TMP87:%.*]] = insertelement <32 x float> [[TMP40]], float [[FVAL]], i32 1
-; MAX1024-NEXT:    [[TMP88:%.*]] = insertelement <32 x float> [[TMP87]], float [[FVAL]], i32 2
-; MAX1024-NEXT:    [[TMP89:%.*]] = extractelement <32 x float> [[TMP38]], i32 3
-; MAX1024-NEXT:    [[TMP90:%.*]] = insertelement <32 x float> [[TMP88]], float [[TMP89]], i32 3
-; MAX1024-NEXT:    [[TMP91:%.*]] = insertelement <32 x float> [[TMP90]], float [[TMP45]], i32 4
-; MAX1024-NEXT:    [[TMP92:%.*]] = insertelement <32 x float> [[TMP91]], float [[FVAL]], i32 5
-; MAX1024-NEXT:    [[TMP93:%.*]] = insertelement <32 x float> [[TMP92]], float [[FVAL]], i32 6
-; MAX1024-NEXT:    [[TMP94:%.*]] = extractelement <32 x float> [[TMP38]], i32 7
-; MAX1024-NEXT:    [[TMP95:%.*]] = insertelement <32 x float> [[TMP93]], float [[TMP94]], i32 7
-; MAX1024-NEXT:    [[TMP96:%.*]] = extractelement <32 x float> [[TMP38]], i32 8
-; MAX1024-NEXT:    [[TMP97:%.*]] = insertelement <32 x float> [[TMP95]], float [[TMP96]], i32 8
-; MAX1024-NEXT:    [[TMP98:%.*]] = insertelement <32 x float> [[TMP97]], float [[FVAL]], i32 9
-; MAX1024-NEXT:    [[TMP99:%.*]] = insertelement <32 x float> [[TMP98]], float [[FVAL]], i32 10
-; MAX1024-NEXT:    [[TMP100:%.*]] = insertelement <32 x float> [[TMP99]], float [[TMP55]], i32 11
-; MAX1024-NEXT:    [[TMP101:%.*]] = extractelement <32 x float> [[TMP38]], i32 12
-; MAX1024-NEXT:    [[TMP102:%.*]] = insertelement <32 x float> [[TMP100]], float [[TMP101]], i32 12
-; MAX1024-NEXT:    [[TMP103:%.*]] = insertelement <32 x float> [[TMP102]], float [[FVAL]], i32 13
-; MAX1024-NEXT:    [[TMP104:%.*]] = insertelement <32 x float> [[TMP103]], float [[FVAL]], i32 14
-; MAX1024-NEXT:    [[TMP105:%.*]] = insertelement <32 x float> [[TMP104]], float [[TMP61]], i32 15
-; MAX1024-NEXT:    [[TMP106:%.*]] = extractelement <32 x float> [[TMP38]], i32 16
-; MAX1024-NEXT:    [[TMP107:%.*]] = insertelement <32 x float> [[TMP105]], float [[TMP106]], i32 16
-; MAX1024-NEXT:    [[TMP108:%.*]] = insertelement <32 x float> [[TMP107]], float [[FVAL]], i32 17
-; MAX1024-NEXT:    [[TMP109:%.*]] = insertelement <32 x float> [[TMP108]], float [[FVAL]], i32 18
-; MAX1024-NEXT:    [[TMP110:%.*]] = insertelement <32 x float> [[TMP109]], float [[TMP67]], i32 19
-; MAX1024-NEXT:    [[TMP111:%.*]] = extractelement <32 x float> [[TMP38]], i32 20
-; MAX1024-NEXT:    [[TMP112:%.*]] = insertelement <32 x float> [[TMP110]], float [[TMP111]], i32 20
-; MAX1024-NEXT:    [[TMP113:%.*]] = insertelement <32 x float> [[TMP112]], float [[FVAL]], i32 21
-; MAX1024-NEXT:    [[TMP114:%.*]] = insertelement <32 x float> [[TMP113]], float [[FVAL]], i32 22
-; MAX1024-NEXT:    [[TMP115:%.*]] = insertelement <32 x float> [[TMP114]], float [[TMP73]], i32 23
-; MAX1024-NEXT:    [[TMP116:%.*]] = extractelement <32 x float> [[TMP38]], i32 24
-; MAX1024-NEXT:    [[TMP117:%.*]] = insertelement <32 x float> [[TMP115]], float [[TMP116]], i32 24
-; MAX1024-NEXT:    [[TMP118:%.*]] = insertelement <32 x float> [[TMP117]], float [[FVAL]], i32 25
-; MAX1024-NEXT:    [[TMP119:%.*]] = insertelement <32 x float> [[TMP118]], float [[FVAL]], i32 26
-; MAX1024-NEXT:    [[TMP120:%.*]] = insertelement <32 x float> [[TMP119]], float [[TMP79]], i32 27
-; MAX1024-NEXT:    [[TMP121:%.*]] = extractelement <32 x float> [[TMP38]], i32 28
-; MAX1024-NEXT:    [[TMP122:%.*]] = insertelement <32 x float> [[TMP120]], float [[TMP121]], i32 28
-; MAX1024-NEXT:    [[TMP123:%.*]] = insertelement <32 x float> [[TMP122]], float [[FVAL]], i32 29
-; MAX1024-NEXT:    [[TMP124:%.*]] = insertelement <32 x float> [[TMP123]], float [[FVAL]], i32 30
-; MAX1024-NEXT:    [[TMP125:%.*]] = insertelement <32 x float> [[TMP124]], float [[TMP85]], i32 31
+; MAX1024-NEXT:    [[TMP72:%.*]] = insertelement <32 x float> poison, float [[FVAL]], i32 1
+; MAX1024-NEXT:    [[TMP73:%.*]] = insertelement <32 x float> [[TMP72]], float [[TMP40]], i32 0
+; MAX1024-NEXT:    [[TMP74:%.*]] = extractelement <32 x float> [[TMP38]], i32 3
+; MAX1024-NEXT:    [[TMP75:%.*]] = insertelement <32 x float> [[TMP73]], float [[TMP74]], i32 2
+; MAX1024-NEXT:    [[TMP76:%.*]] = insertelement <32 x float> [[TMP75]], float [[TMP44]], i32 3
+; MAX1024-NEXT:    [[TMP77:%.*]] = extractelement <32 x float> [[TMP38]], i32 7
+; MAX1024-NEXT:    [[TMP78:%.*]] = insertelement <32 x float> [[TMP76]], float [[TMP77]], i32 4
+; MAX1024-NEXT:    [[TMP79:%.*]] = extractelement <32 x float> [[TMP38]], i32 8
+; MAX1024-NEXT:    [[TMP80:%.*]] = insertelement <32 x float> [[TMP78]], float [[TMP79]], i32 5
+; MAX1024-NEXT:    [[TMP81:%.*]] = insertelement <32 x float> [[TMP80]], float [[TMP50]], i32 6
+; MAX1024-NEXT:    [[TMP82:%.*]] = extractelement <32 x float> [[TMP38]], i32 12
+; MAX1024-NEXT:    [[TMP83:%.*]] = insertelement <32 x float> [[TMP81]], float [[TMP82]], i32 7
+; MAX1024-NEXT:    [[TMP84:%.*]] = insertelement <32 x float> [[TMP83]], float [[TMP54]], i32 8
+; MAX1024-NEXT:    [[TMP85:%.*]] = extractelement <32 x float> [[TMP38]], i32 16
+; MAX1024-NEXT:    [[TMP86:%.*]] = insertelement <32 x float> [[TMP84]], float [[TMP85]], i32 9
+; MAX1024-NEXT:    [[TMP87:%.*]] = insertelement <32 x float> [[TMP86]], float [[TMP58]], i32 10
+; MAX1024-NEXT:    [[TMP88:%.*]] = extractelement <32 x float> [[TMP38]], i32 20
+; MAX1024-NEXT:    [[TMP89:%.*]] = insertelement <32 x float> [[TMP87]], float [[TMP88]], i32 11
+; MAX1024-NEXT:    [[TMP90:%.*]] = insertelement <32 x float> [[TMP89]], float [[TMP62]], i32 12
+; MAX1024-NEXT:    [[TMP91:%.*]] = extractelement <32 x float> [[TMP38]], i32 24
+; MAX1024-NEXT:    [[TMP92:%.*]] = insertelement <32 x float> [[TMP90]], float [[TMP91]], i32 13
+; MAX1024-NEXT:    [[TMP93:%.*]] = insertelement <32 x float> [[TMP92]], float [[TMP66]], i32 14
+; MAX1024-NEXT:    [[TMP94:%.*]] = extractelement <32 x float> [[TMP38]], i32 28
+; MAX1024-NEXT:    [[TMP95:%.*]] = insertelement <32 x float> [[TMP93]], float [[TMP94]], i32 15
+; MAX1024-NEXT:    [[TMP96:%.*]] = insertelement <32 x float> [[TMP95]], float [[TMP70]], i32 16
+; MAX1024-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <32 x float> [[TMP96]], <32 x float> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 4, i32 5, i32 1, i32 1, i32 6, i32 7, i32 1, i32 1, i32 8, i32 9, i32 1, i32 1, i32 10, i32 11, i32 1, i32 1, i32 12, i32 13, i32 1, i32 1, i32 14, i32 15, i32 1, i32 1, i32 16>
 ; MAX1024-NEXT:    br label [[BB2]]
 ; MAX1024:       bb5:
-; MAX1024-NEXT:    [[TMP126:%.*]] = insertelement <32 x float> [[TMP5]], float [[TMP41]], i32 1
-; MAX1024-NEXT:    [[TMP127:%.*]] = insertelement <32 x float> [[TMP126]], float [[FVAL]], i32 2
-; MAX1024-NEXT:    [[TMP128:%.*]] = extractelement <32 x float> [[TMP38]], i32 3
-; MAX1024-NEXT:    [[TMP129:%.*]] = insertelement <32 x float> [[TMP127]], float [[TMP128]], i32 3
-; MAX1024-NEXT:    [[TMP130:%.*]] = insertelement <32 x float> [[TMP129]], float [[FVAL]], i32 4
-; MAX1024-NEXT:    [[TMP131:%.*]] = insertelement <32 x float> [[TMP130]], float [[TMP47]], i32 5
-; MAX1024-NEXT:    [[TMP132:%.*]] = insertelement <32 x float> [[TMP131]], float [[FVAL]], i32 6
-; MAX1024-NEXT:    [[TMP133:%.*]] = extractelement <32 x float> [[TMP38]], i32 7
-; MAX1024-NEXT:    [[TMP134:%.*]] = insertelement <32 x float> [[TMP132]], float [[TMP133]], i32 7
-; MAX1024-NEXT:    [[TMP135:%.*]] = extractelement <32 x float> [[TMP38]], i32 8
-; MAX1024-NEXT:    [[TMP136:%.*]] = insertelement <32 x float> [[TMP134]], float [[TMP135]], i32 8
-; MAX1024-NEXT:    [[TMP137:%.*]] = insertelement <32 x float> [[TMP136]], float [[FVAL]], i32 9
-; MAX1024-NEXT:    [[TMP138:%.*]] = insertelement <32 x float> [[TMP137]], float [[TMP53]], i32 10
-; MAX1024-NEXT:    [[TMP139:%.*]] = insertelement <32 x float> [[TMP138]], float [[FVAL]], i32 11
-; MAX1024-NEXT:    [[TMP140:%.*]] = extractelement <32 x float> [[TMP38]], i32 12
-; MAX1024-NEXT:    [[TMP141:%.*]] = insertelement <32 x float> [[TMP139]], float [[TMP140]], i32 12
-; MAX1024-NEXT:    [[TMP142:%.*]] = insertelement <32 x float> [[TMP141]], float [[FVAL]], i32 13
-; MAX1024-NEXT:    [[TMP143:%.*]] = insertelement <32 x float> [[TMP142]], float [[TMP59]], i32 14
-; MAX1024-NEXT:    [[TMP144:%.*]] = insertelement <32 x float> [[TMP143]], float [[FVAL]], i32 15
-; MAX1024-NEXT:    [[TMP145:%.*]] = extractelement <32 x float> [[TMP38]], i32 16
-; MAX1024-NEXT:    [[TMP146:%.*]] = insertelement <32 x float> [[TMP144]], float [[TMP145]], i32 16
-; MAX1024-NEXT:    [[TMP147:%.*]] = insertelement <32 x float> [[TMP146]], float [[FVAL]], i32 17
-; MAX1024-NEXT:    [[TMP148:%.*]] = insertelement <32 x float> [[TMP147]], float [[TMP65]], i32 18
-; MAX1024-NEXT:    [[TMP149:%.*]] = insertelement <32 x float> [[TMP148]], float [[FVAL]], i32 19
-; MAX1024-NEXT:    [[TMP150:%.*]] = extractelement <32 x float> [[TMP38]], i32 20
-; MAX1024-NEXT:    [[TMP151:%.*]] = insertelement <32 x float> [[TMP149]], float [[TMP150]], i32 20
-; MAX1024-NEXT:    [[TMP152:%.*]] = insertelement <32 x float> [[TMP151]], float [[FVAL]], i32 21
-; MAX1024-NEXT:    [[TMP153:%.*]] = insertelement <32 x float> [[TMP152]], float [[TMP71]], i32 22
-; MAX1024-NEXT:    [[TMP154:%.*]] = insertelement <32 x float> [[TMP153]], float [[FVAL]], i32 23
-; MAX1024-NEXT:    [[TMP155:%.*]] = extractelement <32 x float> [[TMP38]], i32 24
-; MAX1024-NEXT:    [[TMP156:%.*]] = insertelement <32 x float> [[TMP154]], float [[TMP155]], i32 24
-; MAX1024-NEXT:    [[TMP157:%.*]] = insertelement <32 x float> [[TMP156]], float [[FVAL]], i32 25
-; MAX1024-NEXT:    [[TMP158:%.*]] = insertelement <32 x float> [[TMP157]], float [[TMP77]], i32 26
-; MAX1024-NEXT:    [[TMP159:%.*]] = insertelement <32 x float> [[TMP158]], float [[FVAL]], i32 27
-; MAX1024-NEXT:    [[TMP160:%.*]] = extractelement <32 x float> [[TMP38]], i32 28
-; MAX1024-NEXT:    [[TMP161:%.*]] = insertelement <32 x float> [[TMP159]], float [[TMP160]], i32 28
-; MAX1024-NEXT:    [[TMP162:%.*]] = insertelement <32 x float> [[TMP161]], float [[FVAL]], i32 29
-; MAX1024-NEXT:    [[TMP163:%.*]] = insertelement <32 x float> [[TMP162]], float [[TMP83]], i32 30
-; MAX1024-NEXT:    [[TMP164:%.*]] = insertelement <32 x float> [[TMP163]], float [[FVAL]], i32 31
+; MAX1024-NEXT:    [[TMP97:%.*]] = insertelement <32 x float> [[TMP5]], float [[TMP42]], i32 1
+; MAX1024-NEXT:    [[TMP98:%.*]] = extractelement <32 x float> [[TMP38]], i32 3
+; MAX1024-NEXT:    [[TMP99:%.*]] = insertelement <32 x float> [[TMP97]], float [[TMP98]], i32 2
+; MAX1024-NEXT:    [[TMP100:%.*]] = insertelement <32 x float> [[TMP99]], float [[TMP46]], i32 3
+; MAX1024-NEXT:    [[TMP101:%.*]] = extractelement <32 x float> [[TMP38]], i32 7
+; MAX1024-NEXT:    [[TMP102:%.*]] = insertelement <32 x float> [[TMP100]], float [[TMP101]], i32 4
+; MAX1024-NEXT:    [[TMP103:%.*]] = extractelement <32 x float> [[TMP38]], i32 8
+; MAX1024-NEXT:    [[TMP104:%.*]] = insertelement <32 x float> [[TMP102]], float [[TMP103]], i32 5
+; MAX1024-NEXT:    [[TMP105:%.*]] = insertelement <32 x float> [[TMP104]], float [[TMP48]], i32 6
+; MAX1024-NEXT:    [[TMP106:%.*]] = extractelement <32 x float> [[TMP38]], i32 12
+; MAX1024-NEXT:    [[TMP107:%.*]] = insertelement <32 x float> [[TMP105]], float [[TMP106]], i32 7
+; MAX1024-NEXT:    [[TMP108:%.*]] = insertelement <32 x float> [[TMP107]], float [[TMP52]], i32 8
+; MAX1024-NEXT:    [[TMP109:%.*]] = extractelement <32 x float> [[TMP38]], i32 16
+; MAX1024-NEXT:    [[TMP110:%.*]] = insertelement <32 x float> [[TMP108]], float [[TMP109]], i32 9
+; MAX1024-NEXT:    [[TMP111:%.*]] = insertelement <32 x float> [[TMP110]], float [[TMP56]], i32 10
+; MAX1024-NEXT:    [[TMP112:%.*]] = extractelement <32 x float> [[TMP38]], i32 20
+; MAX1024-NEXT:    [[TMP113:%.*]] = insertelement <32 x float> [[TMP111]], float [[TMP112]], i32 11
+; MAX1024-NEXT:    [[TMP114:%.*]] = insertelement <32 x float> [[TMP113]], float [[TMP60]], i32 12
+; MAX1024-NEXT:    [[TMP115:%.*]] = extractelement <32 x float> [[TMP38]], i32 24
+; MAX1024-NEXT:    [[TMP116:%.*]] = insertelement <32 x float> [[TMP114]], float [[TMP115]], i32 13
+; MAX1024-NEXT:    [[TMP117:%.*]] = insertelement <32 x float> [[TMP116]], float [[TMP64]], i32 14
+; MAX1024-NEXT:    [[TMP118:%.*]] = extractelement <32 x float> [[TMP38]], i32 28
+; MAX1024-NEXT:    [[TMP119:%.*]] = insertelement <32 x float> [[TMP117]], float [[TMP118]], i32 15
+; MAX1024-NEXT:    [[TMP120:%.*]] = insertelement <32 x float> [[TMP119]], float [[TMP68]], i32 16
+; MAX1024-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <32 x float> [[TMP120]], <32 x float> poison, <32 x i32> <i32 0, i32 1, i32 0, i32 2, i32 0, i32 3, i32 0, i32 4, i32 5, i32 0, i32 6, i32 0, i32 7, i32 0, i32 8, i32 0, i32 9, i32 0, i32 10, i32 0, i32 11, i32 0, i32 12, i32 0, i32 13, i32 0, i32 14, i32 0, i32 15, i32 0, i32 16, i32 0>
 ; MAX1024-NEXT:    br label [[BB2]]
 ; MAX1024:       bb2:
-; MAX1024-NEXT:    [[TMP165:%.*]] = phi <32 x float> [ [[TMP38]], [[BB3]] ], [ [[TMP125]], [[BB4]] ], [ [[TMP164]], [[BB5]] ], [ [[TMP86]], [[BB1]] ]
-; MAX1024-NEXT:    [[TMP166:%.*]] = extractelement <32 x float> [[TMP165]], i32 30
-; MAX1024-NEXT:    store float [[TMP166]], float* undef, align 4
+; MAX1024-NEXT:    [[TMP121:%.*]] = phi <32 x float> [ [[TMP38]], [[BB3]] ], [ [[SHUFFLE1]], [[BB4]] ], [ [[SHUFFLE2]], [[BB5]] ], [ [[SHUFFLE3]], [[BB1]] ]
+; MAX1024-NEXT:    [[TMP122:%.*]] = extractelement <32 x float> [[TMP121]], i32 30
+; MAX1024-NEXT:    store float [[TMP122]], float* undef, align 4
 ; MAX1024-NEXT:    ret void
 ;
 bb:

diff  --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
index 3e86611857908..f48d5e27e8aa6 100644
--- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll
@@ -43,8 +43,8 @@ declare i32 @llvm.umin.i32(i32, i32)
 define void @test2() {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
-; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> poison, [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77)
 ; CHECK-NEXT:    [[E:%.*]] = icmp ugt i32 [[TMP3]], 1


        


More information about the llvm-commits mailing list