[llvm] 19c5cf4 - [SLP]Fix comparator for cmp instruction vectorization.

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 9 10:59:30 PST 2021


Author: Alexey Bataev
Date: 2021-12-09T10:57:57-08:00
New Revision: 19c5cf4167f645ac4f612a32abcc4d5a469cb214

URL: https://github.com/llvm/llvm-project/commit/19c5cf4167f645ac4f612a32abcc4d5a469cb214
DIFF: https://github.com/llvm/llvm-project/commit/19c5cf4167f645ac4f612a32abcc4d5a469cb214.diff

LOG: [SLP]Fix comparator for cmp instruction vectorization.

The comparator for the sort functions should provide strict weak
ordering relation between parameters. Current solution causes compiler
crash with some standard c++ library implementations, because it does
not meet this criteria. Tried to fix it + it improves the iverall
vectorization result.

Differential Revision: https://reviews.llvm.org/D115268

Added: 
    llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1ad1a7744af09..e1401171924b9 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -9483,6 +9483,59 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
   return Changed;
 }
 
+/// Compare two cmp instructions. If IsCompatibility is true, function returns
+/// true if 2 cmps have same/swapped predicates and mos compatible corresponding
+/// operands. If IsCompatibility is false, function implements strict weak
+/// ordering relation between two cmp instructions, returning true if the first
+/// instruction is "less" than the second, i.e. its predicate is less than the
+/// predicate of the second or the operands IDs are less than the operands IDs
+/// of the second cmp instruction.
+template <bool IsCompatibility>
+static bool compareCmp(Value *V, Value *V2,
+                       function_ref<bool(Instruction *)> IsDeleted) {
+  auto *CI1 = cast<CmpInst>(V);
+  auto *CI2 = cast<CmpInst>(V2);
+  if (IsDeleted(CI2) || !isValidElementType(CI2->getType()))
+    return false;
+  if (CI1->getOperand(0)->getType()->getTypeID() <
+      CI2->getOperand(0)->getType()->getTypeID())
+    return !IsCompatibility;
+  if (CI1->getOperand(0)->getType()->getTypeID() >
+      CI2->getOperand(0)->getType()->getTypeID())
+    return false;
+  CmpInst::Predicate Pred1 = CI1->getPredicate();
+  CmpInst::Predicate Pred2 = CI2->getPredicate();
+  CmpInst::Predicate SwapPred1 = CmpInst::getSwappedPredicate(Pred1);
+  CmpInst::Predicate SwapPred2 = CmpInst::getSwappedPredicate(Pred2);
+  CmpInst::Predicate BasePred1 = std::min(Pred1, SwapPred1);
+  CmpInst::Predicate BasePred2 = std::min(Pred2, SwapPred2);
+  if (BasePred1 < BasePred2)
+    return !IsCompatibility;
+  if (BasePred1 > BasePred2)
+    return false;
+  // Compare operands.
+  bool LEPreds = Pred1 <= Pred2;
+  bool GEPreds = Pred1 >= Pred2;
+  for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
+    auto *Op1 = CI1->getOperand(LEPreds ? I : E - I - 1);
+    auto *Op2 = CI2->getOperand(GEPreds ? I : E - I - 1);
+    if (Op1->getValueID() < Op2->getValueID())
+      return !IsCompatibility;
+    if (Op1->getValueID() > Op2->getValueID())
+      return false;
+    if (auto *I1 = dyn_cast<Instruction>(Op1))
+      if (auto *I2 = dyn_cast<Instruction>(Op2)) {
+        if (I1->getParent() != I2->getParent())
+          return false;
+        InstructionsState S = getSameOpcode({I1, I2});
+        if (S.getOpcode())
+          continue;
+        return false;
+      }
+  }
+  return IsCompatibility;
+}
+
 bool SLPVectorizerPass::vectorizeSimpleInstructions(
     SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
     bool AtTerminator) {
@@ -9514,37 +9567,16 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
     }
     // Try to vectorize list of compares.
     // Sort by type, compare predicate, etc.
-    // TODO: Add analysis on the operand opcodes (profitable to vectorize
-    // instructions with same/alternate opcodes/const values).
     auto &&CompareSorter = [&R](Value *V, Value *V2) {
-      auto *CI1 = cast<CmpInst>(V);
-      auto *CI2 = cast<CmpInst>(V2);
-      if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
-        return false;
-      if (CI1->getOperand(0)->getType()->getTypeID() <
-          CI2->getOperand(0)->getType()->getTypeID())
-        return true;
-      if (CI1->getOperand(0)->getType()->getTypeID() >
-          CI2->getOperand(0)->getType()->getTypeID())
-        return false;
-      return CI1->getPredicate() < CI2->getPredicate() ||
-             (CI1->getPredicate() > CI2->getPredicate() &&
-              CI1->getPredicate() <
-                  CmpInst::getSwappedPredicate(CI2->getPredicate()));
+      return compareCmp<false>(V, V2,
+                               [&R](Instruction *I) { return R.isDeleted(I); });
     };
 
     auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
       if (V1 == V2)
         return true;
-      auto *CI1 = cast<CmpInst>(V1);
-      auto *CI2 = cast<CmpInst>(V2);
-      if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
-        return false;
-      if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType())
-        return false;
-      return CI1->getPredicate() == CI2->getPredicate() ||
-             CI1->getPredicate() ==
-                 CmpInst::getSwappedPredicate(CI2->getPredicate());
+      return compareCmp<true>(V1, V2,
+                              [&R](Instruction *I) { return R.isDeleted(I); });
     };
     auto Limit = [&R](Value *V) {
       unsigned EltSize = R.getVectorElementSize(V);

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
index 24aded3bea515..12d88ffea828b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/used-reduced-op.ll
@@ -21,233 +21,222 @@ define void @n() local_unnamed_addr #0 {
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 1), align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 2), align 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 3, i64 3), align 4
-; CHECK-NEXT:    [[TMP13:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0), align 16
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 1), align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 2), align 8
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 3), align 4
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 0), align 16
-; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 1), align 4
-; CHECK-NEXT:    [[TMP19:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 2), align 8
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 5, i64 3), align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 0), align 16
-; CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 1), align 4
-; CHECK-NEXT:    [[TMP23:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 2), align 8
-; CHECK-NEXT:    [[TMP24:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 6, i64 3), align 4
-; CHECK-NEXT:    [[TMP25:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 0), align 16
-; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 1), align 4
-; CHECK-NEXT:    [[TMP27:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 2), align 8
-; CHECK-NEXT:    [[TMP28:%.*]] = load i32, i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 7, i64 3), align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([8 x [4 x i32]], [8 x [4 x i32]]* @k, i64 0, i64 4, i64 0) to <16 x i32>*), align 16
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_COND]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[SPEC_SELECT8_3_7:%.*]], [[FOR_COND]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    [[TMP29:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[TMP30:%.*]] = add i32 [[TMP29]], -183
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP30]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP31]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP32:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]]
-; CHECK-NEXT:    [[TMP33:%.*]] = icmp slt <4 x i32> [[TMP32]], zeroinitializer
-; CHECK-NEXT:    [[TMP34:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP32]]
-; CHECK-NEXT:    [[TMP35:%.*]] = select <4 x i1> [[TMP33]], <4 x i32> [[TMP34]], <4 x i32> [[TMP32]]
-; CHECK-NEXT:    [[TMP36:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP35]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP36]], [[B_0]]
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP36]], i32 [[B_0]]
-; CHECK-NEXT:    [[SUB_116:%.*]] = sub i32 [[TMP30]], [[TMP1]]
-; CHECK-NEXT:    [[TMP37:%.*]] = icmp slt i32 [[SUB_116]], 0
+; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], -183
+; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP16]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = sub <4 x i32> [[SHUFFLE]], [[TMP0]]
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp slt <4 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT:    [[TMP19:%.*]] = sub nsw <4 x i32> zeroinitializer, [[TMP17]]
+; CHECK-NEXT:    [[TMP20:%.*]] = select <4 x i1> [[TMP18]], <4 x i32> [[TMP19]], <4 x i32> [[TMP17]]
+; CHECK-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP20]])
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp slt i32 [[TMP21]], [[B_0]]
+; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP21]], i32 [[B_0]]
+; CHECK-NEXT:    [[SUB_116:%.*]] = sub i32 [[TMP15]], [[TMP1]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp slt i32 [[SUB_116]], 0
 ; CHECK-NEXT:    [[NEG_117:%.*]] = sub nsw i32 0, [[SUB_116]]
-; CHECK-NEXT:    [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[NEG_117]], i32 [[SUB_116]]
-; CHECK-NEXT:    [[CMP12_118:%.*]] = icmp slt i32 [[TMP38]], [[OP_EXTRA1]]
-; CHECK-NEXT:    [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP38]], i32 [[OP_EXTRA1]]
-; CHECK-NEXT:    [[SUB_1_1:%.*]] = sub i32 [[TMP30]], [[TMP2]]
-; CHECK-NEXT:    [[TMP39:%.*]] = icmp slt i32 [[SUB_1_1]], 0
+; CHECK-NEXT:    [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[NEG_117]], i32 [[SUB_116]]
+; CHECK-NEXT:    [[CMP12_118:%.*]] = icmp slt i32 [[TMP23]], [[OP_EXTRA1]]
+; CHECK-NEXT:    [[SPEC_SELECT8_120:%.*]] = select i1 [[CMP12_118]], i32 [[TMP23]], i32 [[OP_EXTRA1]]
+; CHECK-NEXT:    [[SUB_1_1:%.*]] = sub i32 [[TMP15]], [[TMP2]]
+; CHECK-NEXT:    [[TMP24:%.*]] = icmp slt i32 [[SUB_1_1]], 0
 ; CHECK-NEXT:    [[NEG_1_1:%.*]] = sub nsw i32 0, [[SUB_1_1]]
-; CHECK-NEXT:    [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
-; CHECK-NEXT:    [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP40]], [[SPEC_SELECT8_120]]
+; CHECK-NEXT:    [[TMP25:%.*]] = select i1 [[TMP24]], i32 [[NEG_1_1]], i32 [[SUB_1_1]]
+; CHECK-NEXT:    [[CMP12_1_1:%.*]] = icmp slt i32 [[TMP25]], [[SPEC_SELECT8_120]]
 ; CHECK-NEXT:    [[NARROW:%.*]] = or i1 [[CMP12_1_1]], [[CMP12_118]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP40]], i32 [[SPEC_SELECT8_120]]
-; CHECK-NEXT:    [[SUB_2_1:%.*]] = sub i32 [[TMP30]], [[TMP3]]
-; CHECK-NEXT:    [[TMP41:%.*]] = icmp slt i32 [[SUB_2_1]], 0
+; CHECK-NEXT:    [[SPEC_SELECT8_1_1:%.*]] = select i1 [[CMP12_1_1]], i32 [[TMP25]], i32 [[SPEC_SELECT8_120]]
+; CHECK-NEXT:    [[SUB_2_1:%.*]] = sub i32 [[TMP15]], [[TMP3]]
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp slt i32 [[SUB_2_1]], 0
 ; CHECK-NEXT:    [[NEG_2_1:%.*]] = sub nsw i32 0, [[SUB_2_1]]
-; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
-; CHECK-NEXT:    [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_1_1]]
+; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP26]], i32 [[NEG_2_1]], i32 [[SUB_2_1]]
+; CHECK-NEXT:    [[CMP12_2_1:%.*]] = icmp slt i32 [[TMP27]], [[SPEC_SELECT8_1_1]]
 ; CHECK-NEXT:    [[NARROW34:%.*]] = or i1 [[CMP12_2_1]], [[NARROW]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP42]], i32 [[SPEC_SELECT8_1_1]]
-; CHECK-NEXT:    [[SUB_3_1:%.*]] = sub i32 [[TMP30]], [[TMP4]]
-; CHECK-NEXT:    [[TMP43:%.*]] = icmp slt i32 [[SUB_3_1]], 0
+; CHECK-NEXT:    [[SPEC_SELECT8_2_1:%.*]] = select i1 [[CMP12_2_1]], i32 [[TMP27]], i32 [[SPEC_SELECT8_1_1]]
+; CHECK-NEXT:    [[SUB_3_1:%.*]] = sub i32 [[TMP15]], [[TMP4]]
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp slt i32 [[SUB_3_1]], 0
 ; CHECK-NEXT:    [[NEG_3_1:%.*]] = sub nsw i32 0, [[SUB_3_1]]
-; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
-; CHECK-NEXT:    [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_2_1]]
+; CHECK-NEXT:    [[TMP29:%.*]] = select i1 [[TMP28]], i32 [[NEG_3_1]], i32 [[SUB_3_1]]
+; CHECK-NEXT:    [[CMP12_3_1:%.*]] = icmp slt i32 [[TMP29]], [[SPEC_SELECT8_2_1]]
 ; CHECK-NEXT:    [[NARROW35:%.*]] = or i1 [[CMP12_3_1]], [[NARROW34]]
 ; CHECK-NEXT:    [[SPEC_SELECT_3_1:%.*]] = zext i1 [[NARROW35]] to i32
-; CHECK-NEXT:    [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP44]], i32 [[SPEC_SELECT8_2_1]]
-; CHECK-NEXT:    [[SUB_222:%.*]] = sub i32 [[TMP30]], [[TMP5]]
-; CHECK-NEXT:    [[TMP45:%.*]] = icmp slt i32 [[SUB_222]], 0
+; CHECK-NEXT:    [[SPEC_SELECT8_3_1:%.*]] = select i1 [[CMP12_3_1]], i32 [[TMP29]], i32 [[SPEC_SELECT8_2_1]]
+; CHECK-NEXT:    [[SUB_222:%.*]] = sub i32 [[TMP15]], [[TMP5]]
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp slt i32 [[SUB_222]], 0
 ; CHECK-NEXT:    [[NEG_223:%.*]] = sub nsw i32 0, [[SUB_222]]
-; CHECK-NEXT:    [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[NEG_223]], i32 [[SUB_222]]
-; CHECK-NEXT:    [[CMP12_224:%.*]] = icmp slt i32 [[TMP46]], [[SPEC_SELECT8_3_1]]
-; CHECK-NEXT:    [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP46]], i32 [[SPEC_SELECT8_3_1]]
-; CHECK-NEXT:    [[SUB_1_2:%.*]] = sub i32 [[TMP30]], [[TMP6]]
-; CHECK-NEXT:    [[TMP47:%.*]] = icmp slt i32 [[SUB_1_2]], 0
+; CHECK-NEXT:    [[TMP31:%.*]] = select i1 [[TMP30]], i32 [[NEG_223]], i32 [[SUB_222]]
+; CHECK-NEXT:    [[CMP12_224:%.*]] = icmp slt i32 [[TMP31]], [[SPEC_SELECT8_3_1]]
+; CHECK-NEXT:    [[SPEC_SELECT8_226:%.*]] = select i1 [[CMP12_224]], i32 [[TMP31]], i32 [[SPEC_SELECT8_3_1]]
+; CHECK-NEXT:    [[SUB_1_2:%.*]] = sub i32 [[TMP15]], [[TMP6]]
+; CHECK-NEXT:    [[TMP32:%.*]] = icmp slt i32 [[SUB_1_2]], 0
 ; CHECK-NEXT:    [[NEG_1_2:%.*]] = sub nsw i32 0, [[SUB_1_2]]
-; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
-; CHECK-NEXT:    [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP48]], [[SPEC_SELECT8_226]]
-; CHECK-NEXT:    [[TMP49:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP48]], i32 [[SPEC_SELECT8_226]]
-; CHECK-NEXT:    [[SUB_2_2:%.*]] = sub i32 [[TMP30]], [[TMP7]]
-; CHECK-NEXT:    [[TMP50:%.*]] = icmp slt i32 [[SUB_2_2]], 0
+; CHECK-NEXT:    [[TMP33:%.*]] = select i1 [[TMP32]], i32 [[NEG_1_2]], i32 [[SUB_1_2]]
+; CHECK-NEXT:    [[CMP12_1_2:%.*]] = icmp slt i32 [[TMP33]], [[SPEC_SELECT8_226]]
+; CHECK-NEXT:    [[TMP34:%.*]] = or i1 [[CMP12_1_2]], [[CMP12_224]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_2:%.*]] = select i1 [[CMP12_1_2]], i32 [[TMP33]], i32 [[SPEC_SELECT8_226]]
+; CHECK-NEXT:    [[SUB_2_2:%.*]] = sub i32 [[TMP15]], [[TMP7]]
+; CHECK-NEXT:    [[TMP35:%.*]] = icmp slt i32 [[SUB_2_2]], 0
 ; CHECK-NEXT:    [[NEG_2_2:%.*]] = sub nsw i32 0, [[SUB_2_2]]
-; CHECK-NEXT:    [[TMP51:%.*]] = select i1 [[TMP50]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
-; CHECK-NEXT:    [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP51]], [[SPEC_SELECT8_1_2]]
-; CHECK-NEXT:    [[TMP52:%.*]] = or i1 [[CMP12_2_2]], [[TMP49]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP51]], i32 [[SPEC_SELECT8_1_2]]
-; CHECK-NEXT:    [[SUB_3_2:%.*]] = sub i32 [[TMP30]], [[TMP8]]
-; CHECK-NEXT:    [[TMP53:%.*]] = icmp slt i32 [[SUB_3_2]], 0
+; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[NEG_2_2]], i32 [[SUB_2_2]]
+; CHECK-NEXT:    [[CMP12_2_2:%.*]] = icmp slt i32 [[TMP36]], [[SPEC_SELECT8_1_2]]
+; CHECK-NEXT:    [[TMP37:%.*]] = or i1 [[CMP12_2_2]], [[TMP34]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_2:%.*]] = select i1 [[CMP12_2_2]], i32 [[TMP36]], i32 [[SPEC_SELECT8_1_2]]
+; CHECK-NEXT:    [[SUB_3_2:%.*]] = sub i32 [[TMP15]], [[TMP8]]
+; CHECK-NEXT:    [[TMP38:%.*]] = icmp slt i32 [[SUB_3_2]], 0
 ; CHECK-NEXT:    [[NEG_3_2:%.*]] = sub nsw i32 0, [[SUB_3_2]]
-; CHECK-NEXT:    [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
-; CHECK-NEXT:    [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP54]], [[SPEC_SELECT8_2_2]]
-; CHECK-NEXT:    [[TMP55:%.*]] = or i1 [[CMP12_3_2]], [[TMP52]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP55]], i32 2, i32 [[SPEC_SELECT_3_1]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP54]], i32 [[SPEC_SELECT8_2_2]]
-; CHECK-NEXT:    [[SUB_328:%.*]] = sub i32 [[TMP30]], [[TMP9]]
-; CHECK-NEXT:    [[TMP56:%.*]] = icmp slt i32 [[SUB_328]], 0
+; CHECK-NEXT:    [[TMP39:%.*]] = select i1 [[TMP38]], i32 [[NEG_3_2]], i32 [[SUB_3_2]]
+; CHECK-NEXT:    [[CMP12_3_2:%.*]] = icmp slt i32 [[TMP39]], [[SPEC_SELECT8_2_2]]
+; CHECK-NEXT:    [[TMP40:%.*]] = or i1 [[CMP12_3_2]], [[TMP37]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_2:%.*]] = select i1 [[TMP40]], i32 2, i32 [[SPEC_SELECT_3_1]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_2:%.*]] = select i1 [[CMP12_3_2]], i32 [[TMP39]], i32 [[SPEC_SELECT8_2_2]]
+; CHECK-NEXT:    [[SUB_328:%.*]] = sub i32 [[TMP15]], [[TMP9]]
+; CHECK-NEXT:    [[TMP41:%.*]] = icmp slt i32 [[SUB_328]], 0
 ; CHECK-NEXT:    [[NEG_329:%.*]] = sub nsw i32 0, [[SUB_328]]
-; CHECK-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_329]], i32 [[SUB_328]]
-; CHECK-NEXT:    [[CMP12_330:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_2]]
-; CHECK-NEXT:    [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_2]]
-; CHECK-NEXT:    [[SUB_1_3:%.*]] = sub i32 [[TMP30]], [[TMP10]]
-; CHECK-NEXT:    [[TMP58:%.*]] = icmp slt i32 [[SUB_1_3]], 0
+; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[NEG_329]], i32 [[SUB_328]]
+; CHECK-NEXT:    [[CMP12_330:%.*]] = icmp slt i32 [[TMP42]], [[SPEC_SELECT8_3_2]]
+; CHECK-NEXT:    [[SPEC_SELECT8_332:%.*]] = select i1 [[CMP12_330]], i32 [[TMP42]], i32 [[SPEC_SELECT8_3_2]]
+; CHECK-NEXT:    [[SUB_1_3:%.*]] = sub i32 [[TMP15]], [[TMP10]]
+; CHECK-NEXT:    [[TMP43:%.*]] = icmp slt i32 [[SUB_1_3]], 0
 ; CHECK-NEXT:    [[NEG_1_3:%.*]] = sub nsw i32 0, [[SUB_1_3]]
-; CHECK-NEXT:    [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
-; CHECK-NEXT:    [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP59]], [[SPEC_SELECT8_332]]
-; CHECK-NEXT:    [[TMP60:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP59]], i32 [[SPEC_SELECT8_332]]
-; CHECK-NEXT:    [[SUB_2_3:%.*]] = sub i32 [[TMP30]], [[TMP11]]
-; CHECK-NEXT:    [[TMP61:%.*]] = icmp slt i32 [[SUB_2_3]], 0
+; CHECK-NEXT:    [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[NEG_1_3]], i32 [[SUB_1_3]]
+; CHECK-NEXT:    [[CMP12_1_3:%.*]] = icmp slt i32 [[TMP44]], [[SPEC_SELECT8_332]]
+; CHECK-NEXT:    [[TMP45:%.*]] = or i1 [[CMP12_1_3]], [[CMP12_330]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_3:%.*]] = select i1 [[CMP12_1_3]], i32 [[TMP44]], i32 [[SPEC_SELECT8_332]]
+; CHECK-NEXT:    [[SUB_2_3:%.*]] = sub i32 [[TMP15]], [[TMP11]]
+; CHECK-NEXT:    [[TMP46:%.*]] = icmp slt i32 [[SUB_2_3]], 0
 ; CHECK-NEXT:    [[NEG_2_3:%.*]] = sub nsw i32 0, [[SUB_2_3]]
-; CHECK-NEXT:    [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
-; CHECK-NEXT:    [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP62]], [[SPEC_SELECT8_1_3]]
-; CHECK-NEXT:    [[TMP63:%.*]] = or i1 [[CMP12_2_3]], [[TMP60]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP62]], i32 [[SPEC_SELECT8_1_3]]
-; CHECK-NEXT:    [[SUB_3_3:%.*]] = sub i32 [[TMP30]], [[TMP12]]
-; CHECK-NEXT:    [[TMP64:%.*]] = icmp slt i32 [[SUB_3_3]], 0
+; CHECK-NEXT:    [[TMP47:%.*]] = select i1 [[TMP46]], i32 [[NEG_2_3]], i32 [[SUB_2_3]]
+; CHECK-NEXT:    [[CMP12_2_3:%.*]] = icmp slt i32 [[TMP47]], [[SPEC_SELECT8_1_3]]
+; CHECK-NEXT:    [[TMP48:%.*]] = or i1 [[CMP12_2_3]], [[TMP45]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_3:%.*]] = select i1 [[CMP12_2_3]], i32 [[TMP47]], i32 [[SPEC_SELECT8_1_3]]
+; CHECK-NEXT:    [[SUB_3_3:%.*]] = sub i32 [[TMP15]], [[TMP12]]
+; CHECK-NEXT:    [[TMP49:%.*]] = icmp slt i32 [[SUB_3_3]], 0
 ; CHECK-NEXT:    [[NEG_3_3:%.*]] = sub nsw i32 0, [[SUB_3_3]]
-; CHECK-NEXT:    [[TMP65:%.*]] = select i1 [[TMP64]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
-; CHECK-NEXT:    [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP65]], [[SPEC_SELECT8_2_3]]
-; CHECK-NEXT:    [[TMP66:%.*]] = or i1 [[CMP12_3_3]], [[TMP63]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP66]], i32 3, i32 [[SPEC_SELECT_3_2]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP65]], i32 [[SPEC_SELECT8_2_3]]
-; CHECK-NEXT:    [[SUB_4:%.*]] = sub i32 [[TMP30]], [[TMP13]]
-; CHECK-NEXT:    [[TMP67:%.*]] = icmp slt i32 [[SUB_4]], 0
-; CHECK-NEXT:    [[NEG_4:%.*]] = sub nsw i32 0, [[SUB_4]]
-; CHECK-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_4]], i32 [[SUB_4]]
-; CHECK-NEXT:    [[CMP12_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_3_3]]
-; CHECK-NEXT:    [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_3_3]]
-; CHECK-NEXT:    [[SUB_1_4:%.*]] = sub i32 [[TMP30]], [[TMP14]]
-; CHECK-NEXT:    [[TMP69:%.*]] = icmp slt i32 [[SUB_1_4]], 0
-; CHECK-NEXT:    [[NEG_1_4:%.*]] = sub nsw i32 0, [[SUB_1_4]]
-; CHECK-NEXT:    [[TMP70:%.*]] = select i1 [[TMP69]], i32 [[NEG_1_4]], i32 [[SUB_1_4]]
-; CHECK-NEXT:    [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP70]], [[SPEC_SELECT8_4]]
-; CHECK-NEXT:    [[TMP71:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP70]], i32 [[SPEC_SELECT8_4]]
-; CHECK-NEXT:    [[SUB_2_4:%.*]] = sub i32 [[TMP30]], [[TMP15]]
-; CHECK-NEXT:    [[TMP72:%.*]] = icmp slt i32 [[SUB_2_4]], 0
-; CHECK-NEXT:    [[NEG_2_4:%.*]] = sub nsw i32 0, [[SUB_2_4]]
-; CHECK-NEXT:    [[TMP73:%.*]] = select i1 [[TMP72]], i32 [[NEG_2_4]], i32 [[SUB_2_4]]
-; CHECK-NEXT:    [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP73]], [[SPEC_SELECT8_1_4]]
-; CHECK-NEXT:    [[TMP74:%.*]] = or i1 [[CMP12_2_4]], [[TMP71]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP73]], i32 [[SPEC_SELECT8_1_4]]
-; CHECK-NEXT:    [[SUB_3_4:%.*]] = sub i32 [[TMP30]], [[TMP16]]
-; CHECK-NEXT:    [[TMP75:%.*]] = icmp slt i32 [[SUB_3_4]], 0
-; CHECK-NEXT:    [[NEG_3_4:%.*]] = sub nsw i32 0, [[SUB_3_4]]
-; CHECK-NEXT:    [[TMP76:%.*]] = select i1 [[TMP75]], i32 [[NEG_3_4]], i32 [[SUB_3_4]]
-; CHECK-NEXT:    [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP76]], [[SPEC_SELECT8_2_4]]
-; CHECK-NEXT:    [[TMP77:%.*]] = or i1 [[CMP12_3_4]], [[TMP74]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP77]], i32 4, i32 [[SPEC_SELECT_3_3]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP76]], i32 [[SPEC_SELECT8_2_4]]
-; CHECK-NEXT:    [[SUB_5:%.*]] = sub i32 [[TMP30]], [[TMP17]]
-; CHECK-NEXT:    [[TMP78:%.*]] = icmp slt i32 [[SUB_5]], 0
-; CHECK-NEXT:    [[NEG_5:%.*]] = sub nsw i32 0, [[SUB_5]]
-; CHECK-NEXT:    [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_5]], i32 [[SUB_5]]
-; CHECK-NEXT:    [[CMP12_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_3_4]]
-; CHECK-NEXT:    [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_3_4]]
-; CHECK-NEXT:    [[SUB_1_5:%.*]] = sub i32 [[TMP30]], [[TMP18]]
-; CHECK-NEXT:    [[TMP80:%.*]] = icmp slt i32 [[SUB_1_5]], 0
-; CHECK-NEXT:    [[NEG_1_5:%.*]] = sub nsw i32 0, [[SUB_1_5]]
-; CHECK-NEXT:    [[TMP81:%.*]] = select i1 [[TMP80]], i32 [[NEG_1_5]], i32 [[SUB_1_5]]
-; CHECK-NEXT:    [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP81]], [[SPEC_SELECT8_5]]
-; CHECK-NEXT:    [[TMP82:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP81]], i32 [[SPEC_SELECT8_5]]
-; CHECK-NEXT:    [[SUB_2_5:%.*]] = sub i32 [[TMP30]], [[TMP19]]
-; CHECK-NEXT:    [[TMP83:%.*]] = icmp slt i32 [[SUB_2_5]], 0
-; CHECK-NEXT:    [[NEG_2_5:%.*]] = sub nsw i32 0, [[SUB_2_5]]
-; CHECK-NEXT:    [[TMP84:%.*]] = select i1 [[TMP83]], i32 [[NEG_2_5]], i32 [[SUB_2_5]]
-; CHECK-NEXT:    [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP84]], [[SPEC_SELECT8_1_5]]
-; CHECK-NEXT:    [[TMP85:%.*]] = or i1 [[CMP12_2_5]], [[TMP82]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP84]], i32 [[SPEC_SELECT8_1_5]]
-; CHECK-NEXT:    [[SUB_3_5:%.*]] = sub i32 [[TMP30]], [[TMP20]]
-; CHECK-NEXT:    [[TMP86:%.*]] = icmp slt i32 [[SUB_3_5]], 0
-; CHECK-NEXT:    [[NEG_3_5:%.*]] = sub nsw i32 0, [[SUB_3_5]]
-; CHECK-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_3_5]], i32 [[SUB_3_5]]
-; CHECK-NEXT:    [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_2_5]]
-; CHECK-NEXT:    [[TMP88:%.*]] = or i1 [[CMP12_3_5]], [[TMP85]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP88]], i32 5, i32 [[SPEC_SELECT_3_4]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP87]], i32 [[SPEC_SELECT8_2_5]]
-; CHECK-NEXT:    [[SUB_6:%.*]] = sub i32 [[TMP30]], [[TMP21]]
-; CHECK-NEXT:    [[TMP89:%.*]] = icmp slt i32 [[SUB_6]], 0
-; CHECK-NEXT:    [[NEG_6:%.*]] = sub nsw i32 0, [[SUB_6]]
-; CHECK-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_6]], i32 [[SUB_6]]
-; CHECK-NEXT:    [[CMP12_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_3_5]]
-; CHECK-NEXT:    [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_3_5]]
-; CHECK-NEXT:    [[SUB_1_6:%.*]] = sub i32 [[TMP30]], [[TMP22]]
-; CHECK-NEXT:    [[TMP91:%.*]] = icmp slt i32 [[SUB_1_6]], 0
-; CHECK-NEXT:    [[NEG_1_6:%.*]] = sub nsw i32 0, [[SUB_1_6]]
-; CHECK-NEXT:    [[TMP92:%.*]] = select i1 [[TMP91]], i32 [[NEG_1_6]], i32 [[SUB_1_6]]
-; CHECK-NEXT:    [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP92]], [[SPEC_SELECT8_6]]
-; CHECK-NEXT:    [[TMP93:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP92]], i32 [[SPEC_SELECT8_6]]
-; CHECK-NEXT:    [[SUB_2_6:%.*]] = sub i32 [[TMP30]], [[TMP23]]
-; CHECK-NEXT:    [[TMP94:%.*]] = icmp slt i32 [[SUB_2_6]], 0
-; CHECK-NEXT:    [[NEG_2_6:%.*]] = sub nsw i32 0, [[SUB_2_6]]
-; CHECK-NEXT:    [[TMP95:%.*]] = select i1 [[TMP94]], i32 [[NEG_2_6]], i32 [[SUB_2_6]]
-; CHECK-NEXT:    [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP95]], [[SPEC_SELECT8_1_6]]
-; CHECK-NEXT:    [[TMP96:%.*]] = or i1 [[CMP12_2_6]], [[TMP93]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP95]], i32 [[SPEC_SELECT8_1_6]]
-; CHECK-NEXT:    [[SUB_3_6:%.*]] = sub i32 [[TMP30]], [[TMP24]]
-; CHECK-NEXT:    [[TMP97:%.*]] = icmp slt i32 [[SUB_3_6]], 0
-; CHECK-NEXT:    [[NEG_3_6:%.*]] = sub nsw i32 0, [[SUB_3_6]]
-; CHECK-NEXT:    [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[SUB_3_6]]
+; CHECK-NEXT:    [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[NEG_3_3]], i32 [[SUB_3_3]]
+; CHECK-NEXT:    [[CMP12_3_3:%.*]] = icmp slt i32 [[TMP50]], [[SPEC_SELECT8_2_3]]
+; CHECK-NEXT:    [[TMP51:%.*]] = or i1 [[CMP12_3_3]], [[TMP48]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_3:%.*]] = select i1 [[TMP51]], i32 3, i32 [[SPEC_SELECT_3_2]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_3:%.*]] = select i1 [[CMP12_3_3]], i32 [[TMP50]], i32 [[SPEC_SELECT8_2_3]]
+; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <16 x i32> poison, i32 [[TMP15]], i32 0
+; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <16 x i32> [[TMP52]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP53:%.*]] = sub <16 x i32> [[SHUFFLE2]], [[TMP13]]
+; CHECK-NEXT:    [[TMP54:%.*]] = extractelement <16 x i32> [[TMP53]], i32 0
+; CHECK-NEXT:    [[NEG_4:%.*]] = sub nsw i32 0, [[TMP54]]
+; CHECK-NEXT:    [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP53]], zeroinitializer
+; CHECK-NEXT:    [[TMP56:%.*]] = extractelement <16 x i1> [[TMP55]], i32 0
+; CHECK-NEXT:    [[TMP57:%.*]] = select i1 [[TMP56]], i32 [[NEG_4]], i32 [[TMP54]]
+; CHECK-NEXT:    [[CMP12_4:%.*]] = icmp slt i32 [[TMP57]], [[SPEC_SELECT8_3_3]]
+; CHECK-NEXT:    [[SPEC_SELECT8_4:%.*]] = select i1 [[CMP12_4]], i32 [[TMP57]], i32 [[SPEC_SELECT8_3_3]]
+; CHECK-NEXT:    [[TMP58:%.*]] = extractelement <16 x i32> [[TMP53]], i32 1
+; CHECK-NEXT:    [[NEG_1_4:%.*]] = sub nsw i32 0, [[TMP58]]
+; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <16 x i1> [[TMP55]], i32 1
+; CHECK-NEXT:    [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[NEG_1_4]], i32 [[TMP58]]
+; CHECK-NEXT:    [[CMP12_1_4:%.*]] = icmp slt i32 [[TMP60]], [[SPEC_SELECT8_4]]
+; CHECK-NEXT:    [[TMP61:%.*]] = or i1 [[CMP12_1_4]], [[CMP12_4]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_4:%.*]] = select i1 [[CMP12_1_4]], i32 [[TMP60]], i32 [[SPEC_SELECT8_4]]
+; CHECK-NEXT:    [[TMP62:%.*]] = extractelement <16 x i32> [[TMP53]], i32 2
+; CHECK-NEXT:    [[NEG_2_4:%.*]] = sub nsw i32 0, [[TMP62]]
+; CHECK-NEXT:    [[TMP63:%.*]] = extractelement <16 x i1> [[TMP55]], i32 2
+; CHECK-NEXT:    [[TMP64:%.*]] = select i1 [[TMP63]], i32 [[NEG_2_4]], i32 [[TMP62]]
+; CHECK-NEXT:    [[CMP12_2_4:%.*]] = icmp slt i32 [[TMP64]], [[SPEC_SELECT8_1_4]]
+; CHECK-NEXT:    [[TMP65:%.*]] = or i1 [[CMP12_2_4]], [[TMP61]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_4:%.*]] = select i1 [[CMP12_2_4]], i32 [[TMP64]], i32 [[SPEC_SELECT8_1_4]]
+; CHECK-NEXT:    [[TMP66:%.*]] = extractelement <16 x i32> [[TMP53]], i32 3
+; CHECK-NEXT:    [[NEG_3_4:%.*]] = sub nsw i32 0, [[TMP66]]
+; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <16 x i1> [[TMP55]], i32 3
+; CHECK-NEXT:    [[TMP68:%.*]] = select i1 [[TMP67]], i32 [[NEG_3_4]], i32 [[TMP66]]
+; CHECK-NEXT:    [[CMP12_3_4:%.*]] = icmp slt i32 [[TMP68]], [[SPEC_SELECT8_2_4]]
+; CHECK-NEXT:    [[TMP69:%.*]] = or i1 [[CMP12_3_4]], [[TMP65]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_4:%.*]] = select i1 [[TMP69]], i32 4, i32 [[SPEC_SELECT_3_3]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_4:%.*]] = select i1 [[CMP12_3_4]], i32 [[TMP68]], i32 [[SPEC_SELECT8_2_4]]
+; CHECK-NEXT:    [[TMP70:%.*]] = extractelement <16 x i32> [[TMP53]], i32 4
+; CHECK-NEXT:    [[NEG_5:%.*]] = sub nsw i32 0, [[TMP70]]
+; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <16 x i1> [[TMP55]], i32 4
+; CHECK-NEXT:    [[TMP72:%.*]] = select i1 [[TMP71]], i32 [[NEG_5]], i32 [[TMP70]]
+; CHECK-NEXT:    [[CMP12_5:%.*]] = icmp slt i32 [[TMP72]], [[SPEC_SELECT8_3_4]]
+; CHECK-NEXT:    [[SPEC_SELECT8_5:%.*]] = select i1 [[CMP12_5]], i32 [[TMP72]], i32 [[SPEC_SELECT8_3_4]]
+; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <16 x i32> [[TMP53]], i32 5
+; CHECK-NEXT:    [[NEG_1_5:%.*]] = sub nsw i32 0, [[TMP73]]
+; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <16 x i1> [[TMP55]], i32 5
+; CHECK-NEXT:    [[TMP75:%.*]] = select i1 [[TMP74]], i32 [[NEG_1_5]], i32 [[TMP73]]
+; CHECK-NEXT:    [[CMP12_1_5:%.*]] = icmp slt i32 [[TMP75]], [[SPEC_SELECT8_5]]
+; CHECK-NEXT:    [[TMP76:%.*]] = or i1 [[CMP12_1_5]], [[CMP12_5]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_5:%.*]] = select i1 [[CMP12_1_5]], i32 [[TMP75]], i32 [[SPEC_SELECT8_5]]
+; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <16 x i32> [[TMP53]], i32 6
+; CHECK-NEXT:    [[NEG_2_5:%.*]] = sub nsw i32 0, [[TMP77]]
+; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <16 x i1> [[TMP55]], i32 6
+; CHECK-NEXT:    [[TMP79:%.*]] = select i1 [[TMP78]], i32 [[NEG_2_5]], i32 [[TMP77]]
+; CHECK-NEXT:    [[CMP12_2_5:%.*]] = icmp slt i32 [[TMP79]], [[SPEC_SELECT8_1_5]]
+; CHECK-NEXT:    [[TMP80:%.*]] = or i1 [[CMP12_2_5]], [[TMP76]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_5:%.*]] = select i1 [[CMP12_2_5]], i32 [[TMP79]], i32 [[SPEC_SELECT8_1_5]]
+; CHECK-NEXT:    [[TMP81:%.*]] = extractelement <16 x i32> [[TMP53]], i32 7
+; CHECK-NEXT:    [[NEG_3_5:%.*]] = sub nsw i32 0, [[TMP81]]
+; CHECK-NEXT:    [[TMP82:%.*]] = extractelement <16 x i1> [[TMP55]], i32 7
+; CHECK-NEXT:    [[TMP83:%.*]] = select i1 [[TMP82]], i32 [[NEG_3_5]], i32 [[TMP81]]
+; CHECK-NEXT:    [[CMP12_3_5:%.*]] = icmp slt i32 [[TMP83]], [[SPEC_SELECT8_2_5]]
+; CHECK-NEXT:    [[TMP84:%.*]] = or i1 [[CMP12_3_5]], [[TMP80]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_5:%.*]] = select i1 [[TMP84]], i32 5, i32 [[SPEC_SELECT_3_4]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_5:%.*]] = select i1 [[CMP12_3_5]], i32 [[TMP83]], i32 [[SPEC_SELECT8_2_5]]
+; CHECK-NEXT:    [[TMP85:%.*]] = extractelement <16 x i32> [[TMP53]], i32 8
+; CHECK-NEXT:    [[NEG_6:%.*]] = sub nsw i32 0, [[TMP85]]
+; CHECK-NEXT:    [[TMP86:%.*]] = extractelement <16 x i1> [[TMP55]], i32 8
+; CHECK-NEXT:    [[TMP87:%.*]] = select i1 [[TMP86]], i32 [[NEG_6]], i32 [[TMP85]]
+; CHECK-NEXT:    [[CMP12_6:%.*]] = icmp slt i32 [[TMP87]], [[SPEC_SELECT8_3_5]]
+; CHECK-NEXT:    [[SPEC_SELECT8_6:%.*]] = select i1 [[CMP12_6]], i32 [[TMP87]], i32 [[SPEC_SELECT8_3_5]]
+; CHECK-NEXT:    [[TMP88:%.*]] = extractelement <16 x i32> [[TMP53]], i32 9
+; CHECK-NEXT:    [[NEG_1_6:%.*]] = sub nsw i32 0, [[TMP88]]
+; CHECK-NEXT:    [[TMP89:%.*]] = extractelement <16 x i1> [[TMP55]], i32 9
+; CHECK-NEXT:    [[TMP90:%.*]] = select i1 [[TMP89]], i32 [[NEG_1_6]], i32 [[TMP88]]
+; CHECK-NEXT:    [[CMP12_1_6:%.*]] = icmp slt i32 [[TMP90]], [[SPEC_SELECT8_6]]
+; CHECK-NEXT:    [[TMP91:%.*]] = or i1 [[CMP12_1_6]], [[CMP12_6]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_6:%.*]] = select i1 [[CMP12_1_6]], i32 [[TMP90]], i32 [[SPEC_SELECT8_6]]
+; CHECK-NEXT:    [[TMP92:%.*]] = extractelement <16 x i32> [[TMP53]], i32 10
+; CHECK-NEXT:    [[NEG_2_6:%.*]] = sub nsw i32 0, [[TMP92]]
+; CHECK-NEXT:    [[TMP93:%.*]] = extractelement <16 x i1> [[TMP55]], i32 10
+; CHECK-NEXT:    [[TMP94:%.*]] = select i1 [[TMP93]], i32 [[NEG_2_6]], i32 [[TMP92]]
+; CHECK-NEXT:    [[CMP12_2_6:%.*]] = icmp slt i32 [[TMP94]], [[SPEC_SELECT8_1_6]]
+; CHECK-NEXT:    [[TMP95:%.*]] = or i1 [[CMP12_2_6]], [[TMP91]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_6:%.*]] = select i1 [[CMP12_2_6]], i32 [[TMP94]], i32 [[SPEC_SELECT8_1_6]]
+; CHECK-NEXT:    [[TMP96:%.*]] = extractelement <16 x i32> [[TMP53]], i32 11
+; CHECK-NEXT:    [[NEG_3_6:%.*]] = sub nsw i32 0, [[TMP96]]
+; CHECK-NEXT:    [[TMP97:%.*]] = extractelement <16 x i1> [[TMP55]], i32 11
+; CHECK-NEXT:    [[TMP98:%.*]] = select i1 [[TMP97]], i32 [[NEG_3_6]], i32 [[TMP96]]
 ; CHECK-NEXT:    [[CMP12_3_6:%.*]] = icmp slt i32 [[TMP98]], [[SPEC_SELECT8_2_6]]
-; CHECK-NEXT:    [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP96]]
+; CHECK-NEXT:    [[TMP99:%.*]] = or i1 [[CMP12_3_6]], [[TMP95]]
 ; CHECK-NEXT:    [[SPEC_SELECT_3_6:%.*]] = select i1 [[TMP99]], i32 6, i32 [[SPEC_SELECT_3_5]]
 ; CHECK-NEXT:    [[SPEC_SELECT8_3_6:%.*]] = select i1 [[CMP12_3_6]], i32 [[TMP98]], i32 [[SPEC_SELECT8_2_6]]
-; CHECK-NEXT:    [[SUB_7:%.*]] = sub i32 [[TMP30]], [[TMP25]]
-; CHECK-NEXT:    [[TMP100:%.*]] = icmp slt i32 [[SUB_7]], 0
-; CHECK-NEXT:    [[NEG_7:%.*]] = sub nsw i32 0, [[SUB_7]]
-; CHECK-NEXT:    [[TMP101:%.*]] = select i1 [[TMP100]], i32 [[NEG_7]], i32 [[SUB_7]]
-; CHECK-NEXT:    [[CMP12_7:%.*]] = icmp slt i32 [[TMP101]], [[SPEC_SELECT8_3_6]]
-; CHECK-NEXT:    [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP101]], i32 [[SPEC_SELECT8_3_6]]
-; CHECK-NEXT:    [[SUB_1_7:%.*]] = sub i32 [[TMP30]], [[TMP26]]
-; CHECK-NEXT:    [[TMP102:%.*]] = icmp slt i32 [[SUB_1_7]], 0
-; CHECK-NEXT:    [[NEG_1_7:%.*]] = sub nsw i32 0, [[SUB_1_7]]
-; CHECK-NEXT:    [[TMP103:%.*]] = select i1 [[TMP102]], i32 [[NEG_1_7]], i32 [[SUB_1_7]]
-; CHECK-NEXT:    [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP103]], [[SPEC_SELECT8_7]]
-; CHECK-NEXT:    [[TMP104:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
-; CHECK-NEXT:    [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP103]], i32 [[SPEC_SELECT8_7]]
-; CHECK-NEXT:    [[SUB_2_7:%.*]] = sub i32 [[TMP30]], [[TMP27]]
-; CHECK-NEXT:    [[TMP105:%.*]] = icmp slt i32 [[SUB_2_7]], 0
-; CHECK-NEXT:    [[NEG_2_7:%.*]] = sub nsw i32 0, [[SUB_2_7]]
-; CHECK-NEXT:    [[TMP106:%.*]] = select i1 [[TMP105]], i32 [[NEG_2_7]], i32 [[SUB_2_7]]
-; CHECK-NEXT:    [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP106]], [[SPEC_SELECT8_1_7]]
-; CHECK-NEXT:    [[TMP107:%.*]] = or i1 [[CMP12_2_7]], [[TMP104]]
-; CHECK-NEXT:    [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP106]], i32 [[SPEC_SELECT8_1_7]]
-; CHECK-NEXT:    [[SUB_3_7:%.*]] = sub i32 [[TMP30]], [[TMP28]]
-; CHECK-NEXT:    [[TMP108:%.*]] = icmp slt i32 [[SUB_3_7]], 0
-; CHECK-NEXT:    [[NEG_3_7:%.*]] = sub nsw i32 0, [[SUB_3_7]]
-; CHECK-NEXT:    [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_3_7]], i32 [[SUB_3_7]]
-; CHECK-NEXT:    [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_2_7]]
-; CHECK-NEXT:    [[TMP110:%.*]] = or i1 [[CMP12_3_7]], [[TMP107]]
-; CHECK-NEXT:    [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP110]], i32 7, i32 [[SPEC_SELECT_3_6]]
-; CHECK-NEXT:    [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_2_7]]
+; CHECK-NEXT:    [[TMP100:%.*]] = extractelement <16 x i32> [[TMP53]], i32 12
+; CHECK-NEXT:    [[NEG_7:%.*]] = sub nsw i32 0, [[TMP100]]
+; CHECK-NEXT:    [[TMP101:%.*]] = extractelement <16 x i1> [[TMP55]], i32 12
+; CHECK-NEXT:    [[TMP102:%.*]] = select i1 [[TMP101]], i32 [[NEG_7]], i32 [[TMP100]]
+; CHECK-NEXT:    [[CMP12_7:%.*]] = icmp slt i32 [[TMP102]], [[SPEC_SELECT8_3_6]]
+; CHECK-NEXT:    [[SPEC_SELECT8_7:%.*]] = select i1 [[CMP12_7]], i32 [[TMP102]], i32 [[SPEC_SELECT8_3_6]]
+; CHECK-NEXT:    [[TMP103:%.*]] = extractelement <16 x i32> [[TMP53]], i32 13
+; CHECK-NEXT:    [[NEG_1_7:%.*]] = sub nsw i32 0, [[TMP103]]
+; CHECK-NEXT:    [[TMP104:%.*]] = extractelement <16 x i1> [[TMP55]], i32 13
+; CHECK-NEXT:    [[TMP105:%.*]] = select i1 [[TMP104]], i32 [[NEG_1_7]], i32 [[TMP103]]
+; CHECK-NEXT:    [[CMP12_1_7:%.*]] = icmp slt i32 [[TMP105]], [[SPEC_SELECT8_7]]
+; CHECK-NEXT:    [[TMP106:%.*]] = or i1 [[CMP12_1_7]], [[CMP12_7]]
+; CHECK-NEXT:    [[SPEC_SELECT8_1_7:%.*]] = select i1 [[CMP12_1_7]], i32 [[TMP105]], i32 [[SPEC_SELECT8_7]]
+; CHECK-NEXT:    [[TMP107:%.*]] = extractelement <16 x i32> [[TMP53]], i32 14
+; CHECK-NEXT:    [[NEG_2_7:%.*]] = sub nsw i32 0, [[TMP107]]
+; CHECK-NEXT:    [[TMP108:%.*]] = extractelement <16 x i1> [[TMP55]], i32 14
+; CHECK-NEXT:    [[TMP109:%.*]] = select i1 [[TMP108]], i32 [[NEG_2_7]], i32 [[TMP107]]
+; CHECK-NEXT:    [[CMP12_2_7:%.*]] = icmp slt i32 [[TMP109]], [[SPEC_SELECT8_1_7]]
+; CHECK-NEXT:    [[TMP110:%.*]] = or i1 [[CMP12_2_7]], [[TMP106]]
+; CHECK-NEXT:    [[SPEC_SELECT8_2_7:%.*]] = select i1 [[CMP12_2_7]], i32 [[TMP109]], i32 [[SPEC_SELECT8_1_7]]
+; CHECK-NEXT:    [[TMP111:%.*]] = extractelement <16 x i32> [[TMP53]], i32 15
+; CHECK-NEXT:    [[NEG_3_7:%.*]] = sub nsw i32 0, [[TMP111]]
+; CHECK-NEXT:    [[TMP112:%.*]] = extractelement <16 x i1> [[TMP55]], i32 15
+; CHECK-NEXT:    [[TMP113:%.*]] = select i1 [[TMP112]], i32 [[NEG_3_7]], i32 [[TMP111]]
+; CHECK-NEXT:    [[CMP12_3_7:%.*]] = icmp slt i32 [[TMP113]], [[SPEC_SELECT8_2_7]]
+; CHECK-NEXT:    [[TMP114:%.*]] = or i1 [[CMP12_3_7]], [[TMP110]]
+; CHECK-NEXT:    [[SPEC_SELECT_3_7:%.*]] = select i1 [[TMP114]], i32 7, i32 [[SPEC_SELECT_3_6]]
+; CHECK-NEXT:    [[SPEC_SELECT8_3_7]] = select i1 [[CMP12_3_7]], i32 [[TMP113]], i32 [[SPEC_SELECT8_2_7]]
 ; CHECK-NEXT:    [[K:%.*]] = getelementptr inbounds [366 x i32], [366 x i32]* @l, i64 0, i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store i32 [[SPEC_SELECT_3_7]], i32* [[K]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll
new file mode 100644
index 0000000000000..2800fcefe945c
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S --slp-vectorizer -mtriple=x86_64-unknown %s | FileCheck %s
+
+define i32 @test(float* %isec, float %0) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[ISEC:%.*]], i64 0
+; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[ISEC]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[ARRAYIDX5]] to <2 x float>*
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0:%.*]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[CMP61:%.*]] = fcmp fast oge float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[CMP63:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    br i1 [[CMP63]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    br label [[CLEANUP]]
+; CHECK:       cleanup:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %arrayidx5 = getelementptr inbounds float, float* %isec, i64 0
+  %1 = load float, float* %arrayidx5, align 4
+  %arrayidx10 = getelementptr inbounds float, float* %isec, i64 1
+  %2 = load float, float* %arrayidx10, align 4
+  %mul16 = fmul fast float %0, %2
+  %mul55 = fmul fast float 0.000000e+00, %1
+  %cmp61 = fcmp fast oge float 0.000000e+00, 0.000000e+00
+  %cmp63 = fcmp fast ogt float %mul55, %mul16
+  br i1 %cmp63, label %cleanup, label %if.end
+
+if.end:
+  br label %cleanup
+
+cleanup:
+  ret i32 0
+}


        


More information about the llvm-commits mailing list