[llvm] r289043 - [SLP] Fix for PR6246: vectorization for scalar ops on vector elements.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 8 03:57:52 PST 2016


Author: abataev
Date: Thu Dec  8 05:57:51 2016
New Revision: 289043

URL: http://llvm.org/viewvc/llvm-project?rev=289043&view=rev
Log:
[SLP] Fix for PR6246: vectorization for scalar ops on vector elements.

When trying to vectorize trees that start at insertelement instructions
function tryToVectorizeList() uses vectorization factor calculated as
MinVecRegSize/ScalarTypeSize. But sometimes it does not work as tree
cost for this fixed vectorization factor is too high.
Patch tries to improve the situation. It tries different vectorization
factors from max(PowerOf2Floor(NumberOfVectorizedValues),
MinVecRegSize/ScalarTypeSize) to MinVecRegSize/ScalarTypeSize and tries
to choose the best one.

Differential Revision: https://reviews.llvm.org/D27215

Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/test/Transforms/SLPVectorizer/X86/arith-fp.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=289043&r1=289042&r2=289043&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Thu Dec  8 05:57:51 2016
@@ -3870,10 +3870,11 @@ bool SLPVectorizerPass::tryToVectorizeLi
 
   unsigned Opcode0 = I0->getOpcode();
 
-  // FIXME: Register size should be a parameter to this function, so we can
-  // try different vectorization factors.
   unsigned Sz = R.getVectorElementSize(I0);
-  unsigned VF = R.getMinVecRegSize() / Sz;
+  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+  if (MaxVF < 2)
+    return false;
 
   for (Value *V : VL) {
     Type *Ty = V->getType();
@@ -3889,76 +3890,89 @@ bool SLPVectorizerPass::tryToVectorizeLi
   // Keep track of values that were deleted by vectorizing in the loop below.
   SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
 
-  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
-    unsigned OpsWidth = 0;
-
-    if (i + VF > e)
-      OpsWidth = e - i;
-    else
-      OpsWidth = VF;
-
-    if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
-      break;
-
-    // Check that a previous iteration of this loop did not delete the Value.
-    if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
+  unsigned NextInst = 0, MaxInst = VL.size();
+  for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
+       VF /= 2) {
+    // No actual vectorization should happen, if number of parts is the same as
+    // provided vectorization factor (i.e. the scalar type is used for vector
+    // code during codegen).
+    auto *VecTy = VectorType::get(VL[0]->getType(), VF);
+    if (TTI->getNumberOfParts(VecTy) == VF)
       continue;
+    for (unsigned I = NextInst; I < MaxInst; ++I) {
+      unsigned OpsWidth = 0;
 
-    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
-                 << "\n");
-    ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
-
-    ArrayRef<Value *> BuildVectorSlice;
-    if (!BuildVector.empty())
-      BuildVectorSlice = BuildVector.slice(i, OpsWidth);
-
-    R.buildTree(Ops, BuildVectorSlice);
-    // TODO: check if we can allow reordering for more cases.
-    if (AllowReorder && R.shouldReorder()) {
-      // Conceptually, there is nothing actually preventing us from trying to
-      // reorder a larger list. In fact, we do exactly this when vectorizing
-      // reductions. However, at this point, we only expect to get here from
-      // tryToVectorizePair().
-      assert(Ops.size() == 2);
-      assert(BuildVectorSlice.empty());
-      Value *ReorderedOps[] = { Ops[1], Ops[0] };
-      R.buildTree(ReorderedOps, None);
-    }
-    if (R.isTreeTinyAndNotFullyVectorizable())
-      continue;
+      if (I + VF > MaxInst)
+        OpsWidth = MaxInst - I;
+      else
+        OpsWidth = VF;
+
+      if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+        break;
+
+      // Check that a previous iteration of this loop did not delete the Value.
+      if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
+        continue;
+
+      DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+                   << "\n");
+      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
+
+      ArrayRef<Value *> BuildVectorSlice;
+      if (!BuildVector.empty())
+        BuildVectorSlice = BuildVector.slice(I, OpsWidth);
+
+      R.buildTree(Ops, BuildVectorSlice);
+      // TODO: check if we can allow reordering for more cases.
+      if (AllowReorder && R.shouldReorder()) {
+        // Conceptually, there is nothing actually preventing us from trying to
+        // reorder a larger list. In fact, we do exactly this when vectorizing
+        // reductions. However, at this point, we only expect to get here from
+        // tryToVectorizePair().
+        assert(Ops.size() == 2);
+        assert(BuildVectorSlice.empty());
+        Value *ReorderedOps[] = {Ops[1], Ops[0]};
+        R.buildTree(ReorderedOps, None);
+      }
+      if (R.isTreeTinyAndNotFullyVectorizable())
+        continue;
 
-    R.computeMinimumValueSizes();
-    int Cost = R.getTreeCost();
+      R.computeMinimumValueSizes();
+      int Cost = R.getTreeCost();
 
-    if (Cost < -SLPCostThreshold) {
-      DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
-      Value *VectorizedRoot = R.vectorizeTree();
-
-      // Reconstruct the build vector by extracting the vectorized root. This
-      // way we handle the case where some elements of the vector are undefined.
-      //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
-      if (!BuildVectorSlice.empty()) {
-        // The insert point is the last build vector instruction. The vectorized
-        // root will precede it. This guarantees that we get an instruction. The
-        // vectorized tree could have been constant folded.
-        Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
-        unsigned VecIdx = 0;
-        for (auto &V : BuildVectorSlice) {
-          IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
-                                      ++BasicBlock::iterator(InsertAfter));
-          Instruction *I = cast<Instruction>(V);
-          assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
-          Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
-              VectorizedRoot, Builder.getInt32(VecIdx++)));
-          I->setOperand(1, Extract);
-          I->removeFromParent();
-          I->insertAfter(Extract);
-          InsertAfter = I;
+      if (Cost < -SLPCostThreshold) {
+        DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
+        Value *VectorizedRoot = R.vectorizeTree();
+
+        // Reconstruct the build vector by extracting the vectorized root. This
+        // way we handle the case where some elements of the vector are
+        // undefined.
+        //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+        if (!BuildVectorSlice.empty()) {
+          // The insert point is the last build vector instruction. The
+          // vectorized root will precede it. This guarantees that we get an
+          // instruction. The vectorized tree could have been constant folded.
+          Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+          unsigned VecIdx = 0;
+          for (auto &V : BuildVectorSlice) {
+            IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+                                        ++BasicBlock::iterator(InsertAfter));
+            Instruction *I = cast<Instruction>(V);
+            assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
+            Instruction *Extract =
+                cast<Instruction>(Builder.CreateExtractElement(
+                    VectorizedRoot, Builder.getInt32(VecIdx++)));
+            I->setOperand(1, Extract);
+            I->removeFromParent();
+            I->insertAfter(Extract);
+            InsertAfter = I;
+          }
         }
+        // Move to the next bundle.
+        I += VF - 1;
+        NextInst = I + 1;
+        Changed = true;
       }
-      // Move to the next bundle.
-      i += VF - 1;
-      Changed = true;
     }
   }
 

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/arith-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/arith-fp.ll?rev=289043&r1=289042&r2=289043&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/arith-fp.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/arith-fp.ll Thu Dec  8 05:57:51 2016
@@ -222,22 +222,15 @@ define <4 x float> @buildvector_div_4f32
 
 define <4 x double> @buildvector_add_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_add_4f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x double> %a, i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x double> %b, i32 3
-; CHECK-NEXT:    [[C0:%.*]] = fadd double [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fadd double [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fadd double [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fadd double [[A3]], [[B3]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x double> [[R3]]
 ;
   %a0 = extractelement <4 x double> %a, i32 0
@@ -261,22 +254,15 @@ define <4 x double> @buildvector_add_4f6
 
 define <4 x double> @buildvector_sub_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_sub_4f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x double> %a, i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x double> %b, i32 3
-; CHECK-NEXT:    [[C0:%.*]] = fsub double [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fsub double [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fsub double [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fsub double [[A3]], [[B3]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x double> [[R3]]
 ;
   %a0 = extractelement <4 x double> %a, i32 0
@@ -300,22 +286,15 @@ define <4 x double> @buildvector_sub_4f6
 
 define <4 x double> @buildvector_mul_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_mul_4f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x double> %a, i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x double> %b, i32 3
-; CHECK-NEXT:    [[C0:%.*]] = fmul double [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fmul double [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fmul double [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fmul double [[A3]], [[B3]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[C3]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x double> [[R3]]
 ;
   %a0 = extractelement <4 x double> %a, i32 0
@@ -339,32 +318,15 @@ define <4 x double> @buildvector_mul_4f6
 
 define <4 x double> @buildvector_div_4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: @buildvector_div_4f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x double> %a, i32 3
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x double> %b, i32 3
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> undef, double [[A2]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP12]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP13]], i32 2
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP14]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <4 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <4 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <4 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <4 x double> [[R2]], double [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x double> [[R3]]
 ;
   %a0 = extractelement <4 x double> %a, i32 0
@@ -388,38 +350,23 @@ define <4 x double> @buildvector_div_4f6
 
 define <8 x float> @buildvector_add_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_add_8f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x float> %b, i32 7
-; CHECK-NEXT:    [[C0:%.*]] = fadd float [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fadd float [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fadd float [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fadd float [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fadd float [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fadd float [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fadd float [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fadd float [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x float> [[R7]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
@@ -459,38 +406,23 @@ define <8 x float> @buildvector_add_8f32
 
 define <8 x float> @buildvector_sub_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_sub_8f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x float> %b, i32 7
-; CHECK-NEXT:    [[C0:%.*]] = fsub float [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fsub float [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fsub float [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fsub float [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fsub float [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fsub float [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fsub float [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fsub float [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x float> [[R7]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
@@ -530,38 +462,23 @@ define <8 x float> @buildvector_sub_8f32
 
 define <8 x float> @buildvector_mul_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_mul_8f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x float> %b, i32 7
-; CHECK-NEXT:    [[C0:%.*]] = fmul float [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fmul float [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fmul float [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fmul float [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fmul float [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fmul float [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fmul float [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fmul float [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[C7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x float> [[R7]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
@@ -601,56 +518,23 @@ define <8 x float> @buildvector_mul_8f32
 
 define <8 x float> @buildvector_div_8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: @buildvector_div_8f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x float> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x float> %b, i32 7
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float [[B0]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[B1]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[B2]], i32 2
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[B3]], i32 3
-; CHECK-NEXT:    [[TMP9:%.*]] = fdiv <4 x float> [[TMP4]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> undef, float [[A4]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[A5]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[A6]], i32 2
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[A7]], i32 3
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> undef, float [[B4]], i32 0
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[B5]], i32 1
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[B6]], i32 2
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float [[B7]], i32 3
-; CHECK-NEXT:    [[TMP18:%.*]] = fdiv <4 x float> [[TMP13]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP19]], i32 0
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP20]], i32 1
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP21]], i32 2
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP22]], i32 3
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x float> [[TMP18]], i32 0
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP23]], i32 4
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x float> [[TMP18]], i32 1
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP24]], i32 5
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x float> [[TMP18]], i32 2
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP25]], i32 6
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x float> [[TMP18]], i32 3
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP26]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x float> [[R6]], float [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x float> [[R7]]
 ;
   %a0 = extractelement <8 x float> %a, i32 0
@@ -694,38 +578,23 @@ define <8 x float> @buildvector_div_8f32
 
 define <8 x double> @buildvector_add_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_add_8f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x double> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x double> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x double> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x double> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x double> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x double> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x double> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x double> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x double> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x double> %b, i32 7
-; CHECK-NEXT:    [[C0:%.*]] = fadd double [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fadd double [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fadd double [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fadd double [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fadd double [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fadd double [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fadd double [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fadd double [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x double> [[R7]]
 ;
   %a0 = extractelement <8 x double> %a, i32 0
@@ -765,38 +634,23 @@ define <8 x double> @buildvector_add_8f6
 
 define <8 x double> @buildvector_sub_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_sub_8f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x double> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x double> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x double> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x double> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x double> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x double> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x double> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x double> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x double> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x double> %b, i32 7
-; CHECK-NEXT:    [[C0:%.*]] = fsub double [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fsub double [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fsub double [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fsub double [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fsub double [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fsub double [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fsub double [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fsub double [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x double> [[R7]]
 ;
   %a0 = extractelement <8 x double> %a, i32 0
@@ -836,38 +690,23 @@ define <8 x double> @buildvector_sub_8f6
 
 define <8 x double> @buildvector_mul_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_mul_8f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x double> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x double> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x double> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x double> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x double> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x double> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x double> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x double> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x double> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x double> %b, i32 7
-; CHECK-NEXT:    [[C0:%.*]] = fmul double [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fmul double [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fmul double [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fmul double [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fmul double [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fmul double [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fmul double [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fmul double [[A7]], [[B7]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[C7]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x double> [[R7]]
 ;
   %a0 = extractelement <8 x double> %a, i32 0
@@ -907,58 +746,23 @@ define <8 x double> @buildvector_mul_8f6
 
 define <8 x double> @buildvector_div_8f64(<8 x double> %a, <8 x double> %b) {
 ; CHECK-LABEL: @buildvector_div_8f64(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <8 x double> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <8 x double> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <8 x double> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <8 x double> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <8 x double> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <8 x double> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <8 x double> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <8 x double> %a, i32 7
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <8 x double> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <8 x double> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <8 x double> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <8 x double> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <8 x double> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <8 x double> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <8 x double> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <8 x double> %b, i32 7
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[B1]], i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fdiv <2 x double> [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> undef, double [[A2]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A3]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> undef, double [[B2]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x double> [[TMP8]], double [[B3]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = fdiv <2 x double> [[TMP7]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x double> undef, double [[A4]], i32 0
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[A5]], i32 1
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x double> undef, double [[B4]], i32 0
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x double> [[TMP13]], double [[B5]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = fdiv <2 x double> [[TMP12]], [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x double> undef, double [[A6]], i32 0
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[A7]], i32 1
-; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <2 x double> undef, double [[B6]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x double> [[TMP18]], double [[B7]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP19]]
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP21]], i32 0
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP22]], i32 1
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP23]], i32 2
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP24]], i32 3
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x double> [[TMP15]], i32 0
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP25]], i32 4
-; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <2 x double> [[TMP15]], i32 1
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP26]], i32 5
-; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP27]], i32 6
-; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[TMP20]], i32 1
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP28]], i32 7
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x double> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <8 x double> undef, double [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x double> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <8 x double> [[R0]], double [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x double> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <8 x double> [[R1]], double [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x double> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <8 x double> [[R2]], double [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x double> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <8 x double> [[R3]], double [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x double> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <8 x double> [[R4]], double [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x double> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <8 x double> [[R5]], double [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <8 x double> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <8 x double> [[R6]], double [[TMP9]], i32 7
 ; CHECK-NEXT:    ret <8 x double> [[R7]]
 ;
   %a0 = extractelement <8 x double> %a, i32 0
@@ -998,70 +802,39 @@ define <8 x double> @buildvector_div_8f6
 
 define <16 x float> @buildvector_add_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_add_16f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <16 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <16 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <16 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <16 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <16 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <16 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <16 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <16 x float> %a, i32 7
-; CHECK-NEXT:    [[A8:%.*]] = extractelement <16 x float> %a, i32 8
-; CHECK-NEXT:    [[A9:%.*]] = extractelement <16 x float> %a, i32 9
-; CHECK-NEXT:    [[A10:%.*]] = extractelement <16 x float> %a, i32 10
-; CHECK-NEXT:    [[A11:%.*]] = extractelement <16 x float> %a, i32 11
-; CHECK-NEXT:    [[A12:%.*]] = extractelement <16 x float> %a, i32 12
-; CHECK-NEXT:    [[A13:%.*]] = extractelement <16 x float> %a, i32 13
-; CHECK-NEXT:    [[A14:%.*]] = extractelement <16 x float> %a, i32 14
-; CHECK-NEXT:    [[A15:%.*]] = extractelement <16 x float> %a, i32 15
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <16 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <16 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <16 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <16 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <16 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <16 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <16 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <16 x float> %b, i32 7
-; CHECK-NEXT:    [[B8:%.*]] = extractelement <16 x float> %b, i32 8
-; CHECK-NEXT:    [[B9:%.*]] = extractelement <16 x float> %b, i32 9
-; CHECK-NEXT:    [[B10:%.*]] = extractelement <16 x float> %b, i32 10
-; CHECK-NEXT:    [[B11:%.*]] = extractelement <16 x float> %b, i32 11
-; CHECK-NEXT:    [[B12:%.*]] = extractelement <16 x float> %b, i32 12
-; CHECK-NEXT:    [[B13:%.*]] = extractelement <16 x float> %b, i32 13
-; CHECK-NEXT:    [[B14:%.*]] = extractelement <16 x float> %b, i32 14
-; CHECK-NEXT:    [[B15:%.*]] = extractelement <16 x float> %b, i32 15
-; CHECK-NEXT:    [[C0:%.*]] = fadd float [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fadd float [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fadd float [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fadd float [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fadd float [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fadd float [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fadd float [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fadd float [[A7]], [[B7]]
-; CHECK-NEXT:    [[C8:%.*]] = fadd float [[A8]], [[B8]]
-; CHECK-NEXT:    [[C9:%.*]] = fadd float [[A9]], [[B9]]
-; CHECK-NEXT:    [[C10:%.*]] = fadd float [[A10]], [[B10]]
-; CHECK-NEXT:    [[C11:%.*]] = fadd float [[A11]], [[B11]]
-; CHECK-NEXT:    [[C12:%.*]] = fadd float [[A12]], [[B12]]
-; CHECK-NEXT:    [[C13:%.*]] = fadd float [[A13]], [[B13]]
-; CHECK-NEXT:    [[C14:%.*]] = fadd float [[A14]], [[B14]]
-; CHECK-NEXT:    [[C15:%.*]] = fadd float [[A15]], [[B15]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7
-; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8
-; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9
-; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10
-; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11
-; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12
-; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13
-; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14
-; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8
+; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9
+; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10
+; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11
+; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12
+; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13
+; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15
+; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15
 ; CHECK-NEXT:    ret <16 x float> [[R15]]
 ;
   %a0  = extractelement <16 x float> %a, i32 0
@@ -1133,70 +906,39 @@ define <16 x float> @buildvector_add_16f
 
 define <16 x float> @buildvector_sub_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_sub_16f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <16 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <16 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <16 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <16 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <16 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <16 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <16 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <16 x float> %a, i32 7
-; CHECK-NEXT:    [[A8:%.*]] = extractelement <16 x float> %a, i32 8
-; CHECK-NEXT:    [[A9:%.*]] = extractelement <16 x float> %a, i32 9
-; CHECK-NEXT:    [[A10:%.*]] = extractelement <16 x float> %a, i32 10
-; CHECK-NEXT:    [[A11:%.*]] = extractelement <16 x float> %a, i32 11
-; CHECK-NEXT:    [[A12:%.*]] = extractelement <16 x float> %a, i32 12
-; CHECK-NEXT:    [[A13:%.*]] = extractelement <16 x float> %a, i32 13
-; CHECK-NEXT:    [[A14:%.*]] = extractelement <16 x float> %a, i32 14
-; CHECK-NEXT:    [[A15:%.*]] = extractelement <16 x float> %a, i32 15
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <16 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <16 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <16 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <16 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <16 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <16 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <16 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <16 x float> %b, i32 7
-; CHECK-NEXT:    [[B8:%.*]] = extractelement <16 x float> %b, i32 8
-; CHECK-NEXT:    [[B9:%.*]] = extractelement <16 x float> %b, i32 9
-; CHECK-NEXT:    [[B10:%.*]] = extractelement <16 x float> %b, i32 10
-; CHECK-NEXT:    [[B11:%.*]] = extractelement <16 x float> %b, i32 11
-; CHECK-NEXT:    [[B12:%.*]] = extractelement <16 x float> %b, i32 12
-; CHECK-NEXT:    [[B13:%.*]] = extractelement <16 x float> %b, i32 13
-; CHECK-NEXT:    [[B14:%.*]] = extractelement <16 x float> %b, i32 14
-; CHECK-NEXT:    [[B15:%.*]] = extractelement <16 x float> %b, i32 15
-; CHECK-NEXT:    [[C0:%.*]] = fsub float [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fsub float [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fsub float [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fsub float [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fsub float [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fsub float [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fsub float [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fsub float [[A7]], [[B7]]
-; CHECK-NEXT:    [[C8:%.*]] = fsub float [[A8]], [[B8]]
-; CHECK-NEXT:    [[C9:%.*]] = fsub float [[A9]], [[B9]]
-; CHECK-NEXT:    [[C10:%.*]] = fsub float [[A10]], [[B10]]
-; CHECK-NEXT:    [[C11:%.*]] = fsub float [[A11]], [[B11]]
-; CHECK-NEXT:    [[C12:%.*]] = fsub float [[A12]], [[B12]]
-; CHECK-NEXT:    [[C13:%.*]] = fsub float [[A13]], [[B13]]
-; CHECK-NEXT:    [[C14:%.*]] = fsub float [[A14]], [[B14]]
-; CHECK-NEXT:    [[C15:%.*]] = fsub float [[A15]], [[B15]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7
-; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8
-; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9
-; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10
-; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11
-; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12
-; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13
-; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14
-; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8
+; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9
+; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10
+; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11
+; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12
+; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13
+; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15
+; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15
 ; CHECK-NEXT:    ret <16 x float> [[R15]]
 ;
   %a0  = extractelement <16 x float> %a, i32 0
@@ -1268,70 +1010,39 @@ define <16 x float> @buildvector_sub_16f
 
 define <16 x float> @buildvector_mul_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_mul_16f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <16 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <16 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <16 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <16 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <16 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <16 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <16 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <16 x float> %a, i32 7
-; CHECK-NEXT:    [[A8:%.*]] = extractelement <16 x float> %a, i32 8
-; CHECK-NEXT:    [[A9:%.*]] = extractelement <16 x float> %a, i32 9
-; CHECK-NEXT:    [[A10:%.*]] = extractelement <16 x float> %a, i32 10
-; CHECK-NEXT:    [[A11:%.*]] = extractelement <16 x float> %a, i32 11
-; CHECK-NEXT:    [[A12:%.*]] = extractelement <16 x float> %a, i32 12
-; CHECK-NEXT:    [[A13:%.*]] = extractelement <16 x float> %a, i32 13
-; CHECK-NEXT:    [[A14:%.*]] = extractelement <16 x float> %a, i32 14
-; CHECK-NEXT:    [[A15:%.*]] = extractelement <16 x float> %a, i32 15
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <16 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <16 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <16 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <16 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <16 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <16 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <16 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <16 x float> %b, i32 7
-; CHECK-NEXT:    [[B8:%.*]] = extractelement <16 x float> %b, i32 8
-; CHECK-NEXT:    [[B9:%.*]] = extractelement <16 x float> %b, i32 9
-; CHECK-NEXT:    [[B10:%.*]] = extractelement <16 x float> %b, i32 10
-; CHECK-NEXT:    [[B11:%.*]] = extractelement <16 x float> %b, i32 11
-; CHECK-NEXT:    [[B12:%.*]] = extractelement <16 x float> %b, i32 12
-; CHECK-NEXT:    [[B13:%.*]] = extractelement <16 x float> %b, i32 13
-; CHECK-NEXT:    [[B14:%.*]] = extractelement <16 x float> %b, i32 14
-; CHECK-NEXT:    [[B15:%.*]] = extractelement <16 x float> %b, i32 15
-; CHECK-NEXT:    [[C0:%.*]] = fmul float [[A0]], [[B0]]
-; CHECK-NEXT:    [[C1:%.*]] = fmul float [[A1]], [[B1]]
-; CHECK-NEXT:    [[C2:%.*]] = fmul float [[A2]], [[B2]]
-; CHECK-NEXT:    [[C3:%.*]] = fmul float [[A3]], [[B3]]
-; CHECK-NEXT:    [[C4:%.*]] = fmul float [[A4]], [[B4]]
-; CHECK-NEXT:    [[C5:%.*]] = fmul float [[A5]], [[B5]]
-; CHECK-NEXT:    [[C6:%.*]] = fmul float [[A6]], [[B6]]
-; CHECK-NEXT:    [[C7:%.*]] = fmul float [[A7]], [[B7]]
-; CHECK-NEXT:    [[C8:%.*]] = fmul float [[A8]], [[B8]]
-; CHECK-NEXT:    [[C9:%.*]] = fmul float [[A9]], [[B9]]
-; CHECK-NEXT:    [[C10:%.*]] = fmul float [[A10]], [[B10]]
-; CHECK-NEXT:    [[C11:%.*]] = fmul float [[A11]], [[B11]]
-; CHECK-NEXT:    [[C12:%.*]] = fmul float [[A12]], [[B12]]
-; CHECK-NEXT:    [[C13:%.*]] = fmul float [[A13]], [[B13]]
-; CHECK-NEXT:    [[C14:%.*]] = fmul float [[A14]], [[B14]]
-; CHECK-NEXT:    [[C15:%.*]] = fmul float [[A15]], [[B15]]
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[C0]], i32 0
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[C1]], i32 1
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[C2]], i32 2
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[C3]], i32 3
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[C4]], i32 4
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[C5]], i32 5
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[C6]], i32 6
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[C7]], i32 7
-; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[C8]], i32 8
-; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[C9]], i32 9
-; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[C10]], i32 10
-; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[C11]], i32 11
-; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[C12]], i32 12
-; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[C13]], i32 13
-; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[C14]], i32 14
-; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[C15]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8
+; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9
+; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10
+; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11
+; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12
+; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13
+; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15
+; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15
 ; CHECK-NEXT:    ret <16 x float> [[R15]]
 ;
   %a0  = extractelement <16 x float> %a, i32 0
@@ -1403,106 +1114,39 @@ define <16 x float> @buildvector_mul_16f
 
 define <16 x float> @buildvector_div_16f32(<16 x float> %a, <16 x float> %b) {
 ; CHECK-LABEL: @buildvector_div_16f32(
-; CHECK-NEXT:    [[A0:%.*]] = extractelement <16 x float> %a, i32 0
-; CHECK-NEXT:    [[A1:%.*]] = extractelement <16 x float> %a, i32 1
-; CHECK-NEXT:    [[A2:%.*]] = extractelement <16 x float> %a, i32 2
-; CHECK-NEXT:    [[A3:%.*]] = extractelement <16 x float> %a, i32 3
-; CHECK-NEXT:    [[A4:%.*]] = extractelement <16 x float> %a, i32 4
-; CHECK-NEXT:    [[A5:%.*]] = extractelement <16 x float> %a, i32 5
-; CHECK-NEXT:    [[A6:%.*]] = extractelement <16 x float> %a, i32 6
-; CHECK-NEXT:    [[A7:%.*]] = extractelement <16 x float> %a, i32 7
-; CHECK-NEXT:    [[A8:%.*]] = extractelement <16 x float> %a, i32 8
-; CHECK-NEXT:    [[A9:%.*]] = extractelement <16 x float> %a, i32 9
-; CHECK-NEXT:    [[A10:%.*]] = extractelement <16 x float> %a, i32 10
-; CHECK-NEXT:    [[A11:%.*]] = extractelement <16 x float> %a, i32 11
-; CHECK-NEXT:    [[A12:%.*]] = extractelement <16 x float> %a, i32 12
-; CHECK-NEXT:    [[A13:%.*]] = extractelement <16 x float> %a, i32 13
-; CHECK-NEXT:    [[A14:%.*]] = extractelement <16 x float> %a, i32 14
-; CHECK-NEXT:    [[A15:%.*]] = extractelement <16 x float> %a, i32 15
-; CHECK-NEXT:    [[B0:%.*]] = extractelement <16 x float> %b, i32 0
-; CHECK-NEXT:    [[B1:%.*]] = extractelement <16 x float> %b, i32 1
-; CHECK-NEXT:    [[B2:%.*]] = extractelement <16 x float> %b, i32 2
-; CHECK-NEXT:    [[B3:%.*]] = extractelement <16 x float> %b, i32 3
-; CHECK-NEXT:    [[B4:%.*]] = extractelement <16 x float> %b, i32 4
-; CHECK-NEXT:    [[B5:%.*]] = extractelement <16 x float> %b, i32 5
-; CHECK-NEXT:    [[B6:%.*]] = extractelement <16 x float> %b, i32 6
-; CHECK-NEXT:    [[B7:%.*]] = extractelement <16 x float> %b, i32 7
-; CHECK-NEXT:    [[B8:%.*]] = extractelement <16 x float> %b, i32 8
-; CHECK-NEXT:    [[B9:%.*]] = extractelement <16 x float> %b, i32 9
-; CHECK-NEXT:    [[B10:%.*]] = extractelement <16 x float> %b, i32 10
-; CHECK-NEXT:    [[B11:%.*]] = extractelement <16 x float> %b, i32 11
-; CHECK-NEXT:    [[B12:%.*]] = extractelement <16 x float> %b, i32 12
-; CHECK-NEXT:    [[B13:%.*]] = extractelement <16 x float> %b, i32 13
-; CHECK-NEXT:    [[B14:%.*]] = extractelement <16 x float> %b, i32 14
-; CHECK-NEXT:    [[B15:%.*]] = extractelement <16 x float> %b, i32 15
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[A1]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[A2]], i32 2
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[TMP3]], float [[A3]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> undef, float [[B0]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[B1]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[B2]], i32 2
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[B3]], i32 3
-; CHECK-NEXT:    [[TMP9:%.*]] = fdiv <4 x float> [[TMP4]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> undef, float [[A4]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[A5]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[A6]], i32 2
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[A7]], i32 3
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> undef, float [[B4]], i32 0
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[B5]], i32 1
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[B6]], i32 2
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float [[B7]], i32 3
-; CHECK-NEXT:    [[TMP18:%.*]] = fdiv <4 x float> [[TMP13]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x float> undef, float [[A8]], i32 0
-; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x float> [[TMP19]], float [[A9]], i32 1
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[A10]], i32 2
-; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[A11]], i32 3
-; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> undef, float [[B8]], i32 0
-; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[B9]], i32 1
-; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[B10]], i32 2
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[TMP25]], float [[B11]], i32 3
-; CHECK-NEXT:    [[TMP27:%.*]] = fdiv <4 x float> [[TMP22]], [[TMP26]]
-; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <4 x float> undef, float [[A12]], i32 0
-; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[A13]], i32 1
-; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[A14]], i32 2
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x float> [[TMP30]], float [[A15]], i32 3
-; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x float> undef, float [[B12]], i32 0
-; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[B13]], i32 1
-; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[B14]], i32 2
-; CHECK-NEXT:    [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[B15]], i32 3
-; CHECK-NEXT:    [[TMP36:%.*]] = fdiv <4 x float> [[TMP31]], [[TMP35]]
-; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x float> [[TMP9]], i32 0
-; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP37]], i32 0
-; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <4 x float> [[TMP9]], i32 1
-; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP38]], i32 1
-; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x float> [[TMP9]], i32 2
-; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP39]], i32 2
-; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <4 x float> [[TMP9]], i32 3
-; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP40]], i32 3
-; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x float> [[TMP18]], i32 0
-; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP41]], i32 4
-; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <4 x float> [[TMP18]], i32 1
-; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP42]], i32 5
-; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <4 x float> [[TMP18]], i32 2
-; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP43]], i32 6
-; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <4 x float> [[TMP18]], i32 3
-; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP44]], i32 7
-; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <4 x float> [[TMP27]], i32 0
-; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP45]], i32 8
-; CHECK-NEXT:    [[TMP46:%.*]] = extractelement <4 x float> [[TMP27]], i32 1
-; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP46]], i32 9
-; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <4 x float> [[TMP27]], i32 2
-; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP47]], i32 10
-; CHECK-NEXT:    [[TMP48:%.*]] = extractelement <4 x float> [[TMP27]], i32 3
-; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP48]], i32 11
-; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <4 x float> [[TMP36]], i32 0
-; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP49]], i32 12
-; CHECK-NEXT:    [[TMP50:%.*]] = extractelement <4 x float> [[TMP36]], i32 1
-; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP50]], i32 13
-; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <4 x float> [[TMP36]], i32 2
-; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP51]], i32 14
-; CHECK-NEXT:    [[TMP52:%.*]] = extractelement <4 x float> [[TMP36]], i32 3
-; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP52]], i32 15
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> %a, %b
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[R0:%.*]] = insertelement <16 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <16 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[R1:%.*]] = insertelement <16 x float> [[R0]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <16 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[R2:%.*]] = insertelement <16 x float> [[R1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <16 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[R3:%.*]] = insertelement <16 x float> [[R2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <16 x float> [[TMP1]], i32 4
+; CHECK-NEXT:    [[R4:%.*]] = insertelement <16 x float> [[R3]], float [[TMP6]], i32 4
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <16 x float> [[TMP1]], i32 5
+; CHECK-NEXT:    [[R5:%.*]] = insertelement <16 x float> [[R4]], float [[TMP7]], i32 5
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <16 x float> [[TMP1]], i32 6
+; CHECK-NEXT:    [[R6:%.*]] = insertelement <16 x float> [[R5]], float [[TMP8]], i32 6
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x float> [[TMP1]], i32 7
+; CHECK-NEXT:    [[R7:%.*]] = insertelement <16 x float> [[R6]], float [[TMP9]], i32 7
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x float> [[TMP1]], i32 8
+; CHECK-NEXT:    [[R8:%.*]] = insertelement <16 x float> [[R7]], float [[TMP10]], i32 8
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <16 x float> [[TMP1]], i32 9
+; CHECK-NEXT:    [[R9:%.*]] = insertelement <16 x float> [[R8]], float [[TMP11]], i32 9
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <16 x float> [[TMP1]], i32 10
+; CHECK-NEXT:    [[R10:%.*]] = insertelement <16 x float> [[R9]], float [[TMP12]], i32 10
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <16 x float> [[TMP1]], i32 11
+; CHECK-NEXT:    [[R11:%.*]] = insertelement <16 x float> [[R10]], float [[TMP13]], i32 11
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <16 x float> [[TMP1]], i32 12
+; CHECK-NEXT:    [[R12:%.*]] = insertelement <16 x float> [[R11]], float [[TMP14]], i32 12
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <16 x float> [[TMP1]], i32 13
+; CHECK-NEXT:    [[R13:%.*]] = insertelement <16 x float> [[R12]], float [[TMP15]], i32 13
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <16 x float> [[TMP1]], i32 14
+; CHECK-NEXT:    [[R14:%.*]] = insertelement <16 x float> [[R13]], float [[TMP16]], i32 14
+; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <16 x float> [[TMP1]], i32 15
+; CHECK-NEXT:    [[R15:%.*]] = insertelement <16 x float> [[R14]], float [[TMP17]], i32 15
 ; CHECK-NEXT:    ret <16 x float> [[R15]]
 ;
   %a0  = extractelement <16 x float> %a, i32 0

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll?rev=289043&r1=289042&r2=289043&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll Thu Dec  8 05:57:51 2016
@@ -616,42 +616,38 @@ define <4 x float> @take_credit(<4 x flo
 define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
 ; CHECK-LABEL: @multi_tree(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double %w, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double %x, i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 0.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %y, i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double %z, i32 1
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], <double 2.000000e+00, double 3.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP2]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP5]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0
-; CHECK-NEXT:    [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP10]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1
-; CHECK-NEXT:    [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
+; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1
+; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2
+; CHECK-NEXT:    [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3
+; CHECK-NEXT:    [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0
 ; CHECK-NEXT:    ret <4 x double> [[I4]]
 ;
 ; ZEROTHRESH-LABEL: @multi_tree(
 ; ZEROTHRESH-NEXT:  entry:
-; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> undef, double %w, i32 0
-; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double %x, i32 1
-; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 0.000000e+00, double 1.000000e+00>
-; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> undef, double %y, i32 0
-; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double %z, i32 1
-; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], <double 2.000000e+00, double 3.000000e+00>
-; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = fmul <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP2]]
-; ZEROTHRESH-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
-; ZEROTHRESH-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP7]], i32 3
-; ZEROTHRESH-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP6]], i32 1
-; ZEROTHRESH-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP8]], i32 2
-; ZEROTHRESH-NEXT:    [[TMP9:%.*]] = fmul <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP5]]
-; ZEROTHRESH-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[TMP9]], i32 0
-; ZEROTHRESH-NEXT:    [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP10]], i32 1
-; ZEROTHRESH-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP9]], i32 1
-; ZEROTHRESH-NEXT:    [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP11]], i32 0
+; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
+; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
+; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
+; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
+; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
+; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
+; ZEROTHRESH-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3
+; ZEROTHRESH-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1
+; ZEROTHRESH-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2
+; ZEROTHRESH-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2
+; ZEROTHRESH-NEXT:    [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1
+; ZEROTHRESH-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3
+; ZEROTHRESH-NEXT:    [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0
 ; ZEROTHRESH-NEXT:    ret <4 x double> [[I4]]
 ;
 entry:
@@ -673,92 +669,44 @@ entry:
 define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: @_vadd256(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x float> %a, i32 0
-; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <8 x float> %b, i32 0
-; CHECK-NEXT:    [[VECEXT2:%.*]] = extractelement <8 x float> %a, i32 1
-; CHECK-NEXT:    [[VECEXT3:%.*]] = extractelement <8 x float> %b, i32 1
-; CHECK-NEXT:    [[VECEXT5:%.*]] = extractelement <8 x float> %a, i32 2
-; CHECK-NEXT:    [[VECEXT6:%.*]] = extractelement <8 x float> %b, i32 2
-; CHECK-NEXT:    [[VECEXT8:%.*]] = extractelement <8 x float> %a, i32 3
-; CHECK-NEXT:    [[VECEXT9:%.*]] = extractelement <8 x float> %b, i32 3
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> undef, float [[VECEXT]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[VECEXT2]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[VECEXT5]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[VECEXT8]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> undef, float [[VECEXT1]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[VECEXT3]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[VECEXT6]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[VECEXT9]], i32 3
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP3]], [[TMP7]]
-; CHECK-NEXT:    [[VECEXT11:%.*]] = extractelement <8 x float> %a, i32 4
-; CHECK-NEXT:    [[VECEXT12:%.*]] = extractelement <8 x float> %b, i32 4
-; CHECK-NEXT:    [[VECEXT14:%.*]] = extractelement <8 x float> %a, i32 5
-; CHECK-NEXT:    [[VECEXT15:%.*]] = extractelement <8 x float> %b, i32 5
-; CHECK-NEXT:    [[VECEXT17:%.*]] = extractelement <8 x float> %a, i32 6
-; CHECK-NEXT:    [[VECEXT18:%.*]] = extractelement <8 x float> %b, i32 6
-; CHECK-NEXT:    [[VECEXT20:%.*]] = extractelement <8 x float> %a, i32 7
-; CHECK-NEXT:    [[VECEXT21:%.*]] = extractelement <8 x float> %b, i32 7
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> undef, float [[VECEXT11]], i32 0
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[VECEXT14]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[VECEXT17]], i32 2
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP11]], float [[VECEXT20]], i32 3
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> undef, float [[VECEXT12]], i32 0
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[VECEXT15]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[VECEXT18]], i32 2
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[VECEXT21]], i32 3
-; CHECK-NEXT:    [[TMP17:%.*]] = fadd <4 x float> [[TMP12]], [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x float> [[TMP8]], i32 0
-; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP18]], i32 0
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x float> [[TMP8]], i32 1
-; CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP19]], i32 1
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x float> [[TMP8]], i32 2
-; CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP20]], i32 2
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x float> [[TMP8]], i32 3
-; CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP21]], i32 3
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x float> [[TMP17]], i32 0
-; CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP22]], i32 4
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x float> [[TMP17]], i32 1
-; CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP23]], i32 5
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x float> [[TMP17]], i32 2
-; CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP24]], i32 6
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x float> [[TMP17]], i32 3
-; CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP25]], i32 7
+; CHECK-NEXT:    [[TMP0:%.*]] = fadd <8 x float> %a, %b
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
+; CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
+; CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
+; CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
+; CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7
 ; CHECK-NEXT:    ret <8 x float> [[VECINIT7_I]]
 ;
 ; ZEROTHRESH-LABEL: @_vadd256(
 ; ZEROTHRESH-NEXT:  entry:
-; ZEROTHRESH-NEXT:    [[VECEXT:%.*]] = extractelement <8 x float> %a, i32 0
-; ZEROTHRESH-NEXT:    [[VECEXT1:%.*]] = extractelement <8 x float> %b, i32 0
-; ZEROTHRESH-NEXT:    [[ADD:%.*]] = fadd float [[VECEXT]], [[VECEXT1]]
-; ZEROTHRESH-NEXT:    [[VECEXT2:%.*]] = extractelement <8 x float> %a, i32 1
-; ZEROTHRESH-NEXT:    [[VECEXT3:%.*]] = extractelement <8 x float> %b, i32 1
-; ZEROTHRESH-NEXT:    [[ADD4:%.*]] = fadd float [[VECEXT2]], [[VECEXT3]]
-; ZEROTHRESH-NEXT:    [[VECEXT5:%.*]] = extractelement <8 x float> %a, i32 2
-; ZEROTHRESH-NEXT:    [[VECEXT6:%.*]] = extractelement <8 x float> %b, i32 2
-; ZEROTHRESH-NEXT:    [[ADD7:%.*]] = fadd float [[VECEXT5]], [[VECEXT6]]
-; ZEROTHRESH-NEXT:    [[VECEXT8:%.*]] = extractelement <8 x float> %a, i32 3
-; ZEROTHRESH-NEXT:    [[VECEXT9:%.*]] = extractelement <8 x float> %b, i32 3
-; ZEROTHRESH-NEXT:    [[ADD10:%.*]] = fadd float [[VECEXT8]], [[VECEXT9]]
-; ZEROTHRESH-NEXT:    [[VECEXT11:%.*]] = extractelement <8 x float> %a, i32 4
-; ZEROTHRESH-NEXT:    [[VECEXT12:%.*]] = extractelement <8 x float> %b, i32 4
-; ZEROTHRESH-NEXT:    [[ADD13:%.*]] = fadd float [[VECEXT11]], [[VECEXT12]]
-; ZEROTHRESH-NEXT:    [[VECEXT14:%.*]] = extractelement <8 x float> %a, i32 5
-; ZEROTHRESH-NEXT:    [[VECEXT15:%.*]] = extractelement <8 x float> %b, i32 5
-; ZEROTHRESH-NEXT:    [[ADD16:%.*]] = fadd float [[VECEXT14]], [[VECEXT15]]
-; ZEROTHRESH-NEXT:    [[VECEXT17:%.*]] = extractelement <8 x float> %a, i32 6
-; ZEROTHRESH-NEXT:    [[VECEXT18:%.*]] = extractelement <8 x float> %b, i32 6
-; ZEROTHRESH-NEXT:    [[ADD19:%.*]] = fadd float [[VECEXT17]], [[VECEXT18]]
-; ZEROTHRESH-NEXT:    [[VECEXT20:%.*]] = extractelement <8 x float> %a, i32 7
-; ZEROTHRESH-NEXT:    [[VECEXT21:%.*]] = extractelement <8 x float> %b, i32 7
-; ZEROTHRESH-NEXT:    [[ADD22:%.*]] = fadd float [[VECEXT20]], [[VECEXT21]]
-; ZEROTHRESH-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[ADD]], i32 0
-; ZEROTHRESH-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[ADD4]], i32 1
-; ZEROTHRESH-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[ADD7]], i32 2
-; ZEROTHRESH-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[ADD10]], i32 3
-; ZEROTHRESH-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[ADD13]], i32 4
-; ZEROTHRESH-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[ADD16]], i32 5
-; ZEROTHRESH-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[ADD19]], i32 6
-; ZEROTHRESH-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[ADD22]], i32 7
+; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = fadd <8 x float> %a, %b
+; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
+; ZEROTHRESH-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
+; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
+; ZEROTHRESH-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1
+; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
+; ZEROTHRESH-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2
+; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
+; ZEROTHRESH-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3
+; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
+; ZEROTHRESH-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4
+; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
+; ZEROTHRESH-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5
+; ZEROTHRESH-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
+; ZEROTHRESH-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6
+; ZEROTHRESH-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
+; ZEROTHRESH-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7
 ; ZEROTHRESH-NEXT:    ret <8 x float> [[VECINIT7_I]]
 ;
   entry:




More information about the llvm-commits mailing list