[llvm] r324893 - [SLP] Take user instructions cost into consideration in insertelement vectorization.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 12 06:54:48 PST 2018


Author: abataev
Date: Mon Feb 12 06:54:48 2018
New Revision: 324893

URL: http://llvm.org/viewvc/llvm-project?rev=324893&view=rev
Log:
[SLP] Take user instructions cost into consideration in insertelement vectorization.

Summary:
For better vectorization result we should take into consideration the
cost of the user insertelement instructions when we try to
vectorize sequences that build the whole vector. I.e. if we have the
following scalar code:
```
<Scalar code>
insertelement <ScalarCode>, ...
```
we should consider the cost of the last `insertelement ` instructions as
the cost of the scalar code.

Reviewers: RKSimon, spatel, hfinkel, mkuper

Subscribers: javed.absar, llvm-commits

Differential Revision: https://reviews.llvm.org/D42657

Modified:
    llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/sign-extend.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll

Modified: llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h (original)
+++ llvm/trunk/include/llvm/Transforms/Vectorize/SLPVectorizer.h Mon Feb 12 06:54:48 2018
@@ -95,9 +95,11 @@ private:
   bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
 
   /// \brief Try to vectorize a list of operands.
+  /// \param UserCost Cost of the user operations of \p VL if they may affect
+  /// the cost of the vectorization.
   /// \returns true if a value was vectorized.
   bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
-                          bool AllowReorder = false);
+                          int UserCost = 0, bool AllowReorder = false);
 
   /// \brief Try to vectorize a chain that may start at the operands of \p I.
   bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Mon Feb 12 06:54:48 2018
@@ -4702,11 +4702,11 @@ bool SLPVectorizerPass::tryToVectorizePa
   if (!A || !B)
     return false;
   Value *VL[] = { A, B };
-  return tryToVectorizeList(VL, R, true);
+  return tryToVectorizeList(VL, R, /*UserCost=*/0, true);
 }
 
 bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
-                                           bool AllowReorder) {
+                                           int UserCost, bool AllowReorder) {
   if (VL.size() < 2)
     return false;
 
@@ -4815,7 +4815,7 @@ bool SLPVectorizerPass::tryToVectorizeLi
         continue;
 
       R.computeMinimumValueSizes();
-      int Cost = R.getTreeCost();
+      int Cost = R.getTreeCost() - UserCost;
       CandidateFound = true;
       MinCost = std::min(MinCost, Cost);
 
@@ -5748,9 +5748,17 @@ private:
 ///
 /// Returns true if it matches
 static bool findBuildVector(InsertElementInst *LastInsertElem,
-                            SmallVectorImpl<Value *> &BuildVectorOpds) {
+                            TargetTransformInfo *TTI,
+                            SmallVectorImpl<Value *> &BuildVectorOpds,
+                            int &UserCost) {
+  UserCost = 0;
   Value *V = nullptr;
   do {
+    if (auto *CI = dyn_cast<ConstantInt>(LastInsertElem->getOperand(2))) {
+      UserCost += TTI->getVectorInstrCost(Instruction::InsertElement,
+                                          LastInsertElem->getType(),
+                                          CI->getZExtValue());
+    }
     BuildVectorOpds.push_back(LastInsertElem->getOperand(1));
     V = LastInsertElem->getOperand(0);
     if (isa<UndefValue>(V))
@@ -5965,13 +5973,17 @@ bool SLPVectorizerPass::vectorizeInsertV
 
 bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
                                                    BasicBlock *BB, BoUpSLP &R) {
+  int UserCost;
   SmallVector<Value *, 16> BuildVectorOpds;
-  if (!findBuildVector(IEI, BuildVectorOpds))
+  if (!findBuildVector(IEI, TTI, BuildVectorOpds, UserCost) ||
+      (llvm::all_of(BuildVectorOpds,
+                    [](Value *V) { return isa<ExtractElementInst>(V); }) &&
+       isShuffle(BuildVectorOpds)))
     return false;
 
   // Vectorize starting with the build vector operands ignoring the BuildVector
   // instructions for the purpose of scheduling and user extraction.
-  return tryToVectorizeList(BuildVectorOpds, R);
+  return tryToVectorizeList(BuildVectorOpds, R, UserCost);
 }
 
 bool SLPVectorizerPass::vectorizeCmpInst(CmpInst *CI, BasicBlock *BB,
@@ -6049,8 +6061,8 @@ bool SLPVectorizerPass::vectorizeChainsI
       // is done when there are exactly two elements since tryToVectorizeList
       // asserts that there are only two values when AllowReorder is true.
       bool AllowReorder = NumElts == 2;
-      if (NumElts > 1 &&
-          tryToVectorizeList(makeArrayRef(IncIt, NumElts), R, AllowReorder)) {
+      if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
+                                            /*UserCost=*/0, AllowReorder)) {
         // Success start over because instructions might have been changed.
         HaveVectorizedPhiNodes = true;
         Changed = true;

Modified: llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/AArch64/gather-root.ll Mon Feb 12 06:54:48 2018
@@ -31,50 +31,54 @@ define void @PR28330(i32 %n) {
 ;
 ; GATHER-LABEL: @PR28330(
 ; GATHER-NEXT:  entry:
-; GATHER-NEXT:    [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
-; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
-; GATHER-NEXT:    [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
-; GATHER-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
-; GATHER-NEXT:    [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
-; GATHER-NEXT:    [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
-; GATHER-NEXT:    [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
-; GATHER-NEXT:    [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
-; GATHER-NEXT:    [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
-; GATHER-NEXT:    [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
-; GATHER-NEXT:    [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
-; GATHER-NEXT:    [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
-; GATHER-NEXT:    [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
-; GATHER-NEXT:    [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
+; GATHER-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
-; GATHER-NEXT:    [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
-; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
-; GATHER-NEXT:    [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP3]]
-; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-; GATHER-NEXT:    [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
-; GATHER-NEXT:    [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
-; GATHER-NEXT:    [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
-; GATHER-NEXT:    [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
-; GATHER-NEXT:    [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
-; GATHER-NEXT:    [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
-; GATHER-NEXT:    [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
-; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
-; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
-; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
-; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
-; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
-; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
-; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
-; GATHER-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
-; GATHER-NEXT:    [[BIN_EXTRA]] = add i32 [[TMP13]], [[TMP17]]
-; GATHER-NEXT:    [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
+; GATHER-NEXT:    [[TMPP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
+; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0
+; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
+; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1
+; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
+; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2
+; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
+; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3
+; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
+; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4
+; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
+; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5
+; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
+; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6
+; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
+; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7
+; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
+; GATHER-NEXT:    [[TMPP20:%.*]] = add i32 [[TMPP17]], [[TMP19]]
+; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
+; GATHER-NEXT:    [[TMPP22:%.*]] = add i32 [[TMPP20]], [[TMP20]]
+; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
+; GATHER-NEXT:    [[TMPP24:%.*]] = add i32 [[TMPP22]], [[TMP21]]
+; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
+; GATHER-NEXT:    [[TMPP26:%.*]] = add i32 [[TMPP24]], [[TMP22]]
+; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
+; GATHER-NEXT:    [[TMPP28:%.*]] = add i32 [[TMPP26]], [[TMP23]]
+; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
+; GATHER-NEXT:    [[TMPP30:%.*]] = add i32 [[TMPP28]], [[TMP24]]
+; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
+; GATHER-NEXT:    [[TMPP32:%.*]] = add i32 [[TMPP30]], [[TMP25]]
+; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
+; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
+; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
+; GATHER-NEXT:    [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3
+; GATHER-NEXT:    [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP23]], i32 4
+; GATHER-NEXT:    [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP24]], i32 5
+; GATHER-NEXT:    [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6
+; GATHER-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7
+; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
+; GATHER-NEXT:    [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]])
+; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], [[TMPP17]]
+; GATHER-NEXT:    [[TMP34:%.*]] = add i32 [[TMPP32]], [[TMP33]]
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
 ; MAX-COST-LABEL: @PR28330(
@@ -179,50 +183,54 @@ define void @PR32038(i32 %n) {
 ;
 ; GATHER-LABEL: @PR32038(
 ; GATHER-NEXT:  entry:
-; GATHER-NEXT:    [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1
-; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer
-; GATHER-NEXT:    [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1
-; GATHER-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0
-; GATHER-NEXT:    [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4
-; GATHER-NEXT:    [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0
-; GATHER-NEXT:    [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1
-; GATHER-NEXT:    [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
-; GATHER-NEXT:    [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2
-; GATHER-NEXT:    [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
-; GATHER-NEXT:    [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1
-; GATHER-NEXT:    [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0
-; GATHER-NEXT:    [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8
-; GATHER-NEXT:    [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0
+; GATHER-NEXT:    [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <8 x i8>*), align 1
+; GATHER-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
-; GATHER-NEXT:    [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -720, i32 -720>, <2 x i32> <i32 -80, i32 -80>
-; GATHER-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
-; GATHER-NEXT:    [[TMP20:%.*]] = add i32 -5, [[TMP3]]
-; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
-; GATHER-NEXT:    [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP4]]
-; GATHER-NEXT:    [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]]
-; GATHER-NEXT:    [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]]
-; GATHER-NEXT:    [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]]
-; GATHER-NEXT:    [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]]
-; GATHER-NEXT:    [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]]
-; GATHER-NEXT:    [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80
-; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i32> undef, i32 [[TMP3]], i32 0
-; GATHER-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[TMP4]], i32 1
-; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[TMP23]], i32 2
-; GATHER-NEXT:    [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[TMP25]], i32 3
-; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP27]], i32 4
-; GATHER-NEXT:    [[TMP10:%.*]] = insertelement <8 x i32> [[TMP9]], i32 [[TMP29]], i32 5
-; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i32> [[TMP10]], i32 [[TMP31]], i32 6
-; GATHER-NEXT:    [[TMP12:%.*]] = insertelement <8 x i32> [[TMP11]], i32 [[TMP33]], i32 7
-; GATHER-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP12]])
-; GATHER-NEXT:    [[BIN_EXTRA]] = add i32 [[TMP13]], -5
-; GATHER-NEXT:    [[TMP34:%.*]] = add i32 [[TMP32]], [[TMP33]]
+; GATHER-NEXT:    [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; GATHER-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
+; GATHER-NEXT:    [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0
+; GATHER-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
+; GATHER-NEXT:    [[TMP5:%.*]] = insertelement <8 x i1> [[TMP3]], i1 [[TMP4]], i32 1
+; GATHER-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
+; GATHER-NEXT:    [[TMP7:%.*]] = insertelement <8 x i1> [[TMP5]], i1 [[TMP6]], i32 2
+; GATHER-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
+; GATHER-NEXT:    [[TMP9:%.*]] = insertelement <8 x i1> [[TMP7]], i1 [[TMP8]], i32 3
+; GATHER-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
+; GATHER-NEXT:    [[TMP11:%.*]] = insertelement <8 x i1> [[TMP9]], i1 [[TMP10]], i32 4
+; GATHER-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
+; GATHER-NEXT:    [[TMP13:%.*]] = insertelement <8 x i1> [[TMP11]], i1 [[TMP12]], i32 5
+; GATHER-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
+; GATHER-NEXT:    [[TMP15:%.*]] = insertelement <8 x i1> [[TMP13]], i1 [[TMP14]], i32 6
+; GATHER-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
+; GATHER-NEXT:    [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7
+; GATHER-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; GATHER-NEXT:    [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0
+; GATHER-NEXT:    [[TMPP20:%.*]] = add i32 -5, [[TMP19]]
+; GATHER-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1
+; GATHER-NEXT:    [[TMPP22:%.*]] = add i32 [[TMPP20]], [[TMP20]]
+; GATHER-NEXT:    [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2
+; GATHER-NEXT:    [[TMPP24:%.*]] = add i32 [[TMPP22]], [[TMP21]]
+; GATHER-NEXT:    [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3
+; GATHER-NEXT:    [[TMPP26:%.*]] = add i32 [[TMPP24]], [[TMP22]]
+; GATHER-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4
+; GATHER-NEXT:    [[TMPP28:%.*]] = add i32 [[TMPP26]], [[TMP23]]
+; GATHER-NEXT:    [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5
+; GATHER-NEXT:    [[TMPP30:%.*]] = add i32 [[TMPP28]], [[TMP24]]
+; GATHER-NEXT:    [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6
+; GATHER-NEXT:    [[TMPP32:%.*]] = add i32 [[TMPP30]], [[TMP25]]
+; GATHER-NEXT:    [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
+; GATHER-NEXT:    [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1
+; GATHER-NEXT:    [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2
+; GATHER-NEXT:    [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP22]], i32 3
+; GATHER-NEXT:    [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP23]], i32 4
+; GATHER-NEXT:    [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP24]], i32 5
+; GATHER-NEXT:    [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP25]], i32 6
+; GATHER-NEXT:    [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7
+; GATHER-NEXT:    [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7
+; GATHER-NEXT:    [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]])
+; GATHER-NEXT:    [[OP_EXTRA]] = add i32 [[TMP35]], -5
+; GATHER-NEXT:    [[TMP34:%.*]] = add i32 [[TMPP32]], [[TMP33]]
 ; GATHER-NEXT:    br label [[FOR_BODY]]
 ;
 ; MAX-COST-LABEL: @PR32038(

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll Mon Feb 12 06:54:48 2018
@@ -387,14 +387,14 @@ define <4 x float> @simple_select_no_use
 ; to do this backwards this backwards
 define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
 ; CHECK-LABEL: @reconstruct(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[C]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[C]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[C]], i32 0
-; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP4]], i32 0
-; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP3]], i32 1
-; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP2]], i32 2
-; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP1]], i32 3
+; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
+; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
+; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
+; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
+; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
+; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
+; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
+; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
 ; CHECK-NEXT:    ret <4 x i32> [[RD]]
 ;
 ; ZEROTHRESH-LABEL: @reconstruct(

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll Mon Feb 12 06:54:48 2018
@@ -12,19 +12,20 @@
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1), align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 2), align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP2]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP3]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 8, i32 3
-; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP11]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* bitcast (i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 1) to <2 x i32>*), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([4 x i32], [4 x i32]* @b, i64 0, i32 3), align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP2]], i32 2
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP4]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 ptrtoint (i32 ()* @fn1 to i32), i32 2
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 8, i32 3
+; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP12]], <4 x i32> <i32 6, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
 ; CHECK-NEXT:    ret i32 0
 ;
   entry:

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/sign-extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/sign-extend.ll?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/sign-extend.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/sign-extend.ll Mon Feb 12 06:54:48 2018
@@ -34,18 +34,15 @@ entry:
 define <4 x i16> @truncate_v_v(<4 x i32> %lhs) {
 ; CHECK-LABEL: @truncate_v_v(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[LHS:%.*]], i32 0
-; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[VECEXT]] to i16
-; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i16> undef, i16 [[CONV]], i32 0
-; CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <4 x i32> [[LHS]], i32 1
-; CHECK-NEXT:    [[CONV2:%.*]] = trunc i32 [[VECEXT1]] to i16
-; CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x i16> [[VECINIT]], i16 [[CONV2]], i32 1
-; CHECK-NEXT:    [[VECEXT4:%.*]] = extractelement <4 x i32> [[LHS]], i32 2
-; CHECK-NEXT:    [[CONV5:%.*]] = trunc i32 [[VECEXT4]] to i16
-; CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x i16> [[VECINIT3]], i16 [[CONV5]], i32 2
-; CHECK-NEXT:    [[VECEXT7:%.*]] = extractelement <4 x i32> [[LHS]], i32 3
-; CHECK-NEXT:    [[CONV8:%.*]] = trunc i32 [[VECEXT7]] to i16
-; CHECK-NEXT:    [[VECINIT9:%.*]] = insertelement <4 x i16> [[VECINIT6]], i16 [[CONV8]], i32 3
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[LHS:%.*]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i16> [[TMP0]], i32 0
+; CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x i16> undef, i16 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i16> [[TMP0]], i32 1
+; CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x i16> [[VECINIT]], i16 [[TMP2]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i16> [[TMP0]], i32 2
+; CHECK-NEXT:    [[VECINIT6:%.*]] = insertelement <4 x i16> [[VECINIT3]], i16 [[TMP3]], i32 2
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i16> [[TMP0]], i32 3
+; CHECK-NEXT:    [[VECINIT9:%.*]] = insertelement <4 x i16> [[VECINIT6]], i16 [[TMP4]], i32 3
 ; CHECK-NEXT:    ret <4 x i16> [[VECINIT9]]
 ;
 entry:

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll?rev=324893&r1=324892&r2=324893&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/value-bug.ll Mon Feb 12 06:54:48 2018
@@ -11,34 +11,32 @@ target datalayout = "e-m:e-i64:64-f80:12
 define void @test() {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  bb279:
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x float> undef, float undef, i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> [[TMP0]], float undef, i32 1
 ; CHECK-NEXT:    br label [[BB283:%.*]]
 ; CHECK:       bb283:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP11:%.*]], [[EXIT:%.*]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP13:%.*]], [[EXIT]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x float> [ undef, [[BB279:%.*]] ], [ [[TMP13:%.*]], [[EXIT:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi <2 x float> [ undef, [[BB279]] ], [ [[TMP1]], [[EXIT]] ]
 ; CHECK-NEXT:    br label [[BB284:%.*]]
 ; CHECK:       bb284:
-; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP0]] to <2 x double>
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef
+; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x double> [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP5]], undef
 ; CHECK-NEXT:    br label [[BB21_I:%.*]]
 ; CHECK:       bb21.i:
 ; CHECK-NEXT:    br i1 undef, label [[BB22_I:%.*]], label [[EXIT]]
 ; CHECK:       bb22.i:
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> undef, [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> undef, [[TMP6]]
 ; CHECK-NEXT:    br label [[BB32_I:%.*]]
 ; CHECK:       bb32.i:
-; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x double> [ [[TMP5]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x double> [ [[TMP7]], [[BB22_I]] ], [ zeroinitializer, [[BB32_I]] ]
 ; CHECK-NEXT:    br i1 undef, label [[BB32_I]], label [[BB21_I]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP7:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> undef, [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <2 x double> undef, [[TMP9]]
-; CHECK-NEXT:    [[TMP11]] = fptrunc <2 x double> [[TMP10]] to <2 x float>
-; CHECK-NEXT:    [[TMP317:%.*]] = fptrunc double undef to float
-; CHECK-NEXT:    [[TMP319:%.*]] = fptrunc double undef to float
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP317]], i32 0
-; CHECK-NEXT:    [[TMP13]] = insertelement <2 x float> [[TMP12]], float [[TMP319]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double>
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x double> <double undef, double 0.000000e+00>, [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = fadd <2 x double> undef, [[TMP11]]
+; CHECK-NEXT:    [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float>
 ; CHECK-NEXT:    br label [[BB283]]
 ;
 bb279:




More information about the llvm-commits mailing list