[llvm] [VPlan] Add support for in-loop AnyOf reductions (PR #131830)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 05:32:14 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/131830
>From bb017c14972fbf627e1b586ff3f493f8158076ef Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Mar 2025 16:53:01 +0800
Subject: [PATCH] [VPlan] Add support for in-loop AnyOf reductions
---
llvm/lib/Analysis/IVDescriptors.cpp | 2 +
.../Transforms/Vectorize/LoopVectorize.cpp | 34 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 +-
...ze-force-tail-with-evl-inloop-reduction.ll | 26 +-
.../LoopVectorize/select-cmp-blend.ll | 190 +++
.../LoopVectorize/select-cmp-multiuse.ll | 431 +++++++
.../Transforms/LoopVectorize/select-cmp.ll | 1100 +++++++++++++++++
7 files changed, 1766 insertions(+), 27 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 45b5b2979a562..c6c2e50ce76a3 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1210,6 +1210,8 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
return SelectPatternResult::isMinOrMax(
matchSelectPattern(Cur, LHS, RHS).Flavor);
}
+ if (isAnyOfRecurrenceKind(getRecurrenceKind()))
+ return isa<SelectInst>(Cur);
// Recognize a call to the llvm.fmuladd intrinsic.
if (isFMulAddIntrinsic(Cur))
return true;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b64bac329e05d..fbc1e4cbcfb58 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5834,6 +5834,14 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
Intrinsic::ID MinMaxID = getMinMaxReductionIntrinsicOp(RK);
BaseCost = TTI.getMinMaxReductionCost(MinMaxID, VectorTy,
RdxDesc.getFastMathFlags(), CostKind);
+ } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
+ VectorType *BoolTy = VectorType::get(
+ Type::getInt1Ty(VectorTy->getContext()), VectorTy->getElementCount());
+ BaseCost =
+ TTI.getArithmeticReductionCost(Instruction::Or, BoolTy,
+ RdxDesc.getFastMathFlags(), CostKind) +
+ TTI.getArithmeticInstrCost(Instruction::Or, BoolTy->getScalarType(),
+ CostKind);
} else {
BaseCost = TTI.getArithmeticReductionCost(
RdxDesc.getOpcode(), VectorTy, RdxDesc.getFastMathFlags(), CostKind);
@@ -9666,10 +9674,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind Kind = RdxDesc.getRecurrenceKind();
- assert(
- !RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
- !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
- "AnyOf and FindLast reductions are not allowed for in-loop reductions");
+ assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
+ "FindLast reductions are not allowed for in-loop reductions");
// Collect the chain of "link" recipes for the reduction starting at PhiR.
SetVector<VPSingleDefRecipe *> Worklist;
@@ -9738,6 +9744,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
CurrentLinkI->getFastMathFlags());
LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
VecOp = FMulRecipe;
+ } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) {
+ assert(isa<VPWidenSelectRecipe>(CurrentLink) &&
+ "must be a select recipe");
+ VecOp = CurrentLink->getOperand(0);
+ Kind = RecurKind::Or;
} else {
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
if (isa<VPWidenRecipe>(CurrentLink)) {
@@ -9902,10 +9913,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// selected if the negated condition is true in any iteration.
if (Select->getOperand(1) == PhiR)
Cmp = Builder.createNot(Cmp);
- VPValue *Or = Builder.createOr(PhiR, Cmp);
- Select->getVPSingleValue()->replaceAllUsesWith(Or);
- // Delete Select now that it has invalid types.
- ToDelete.push_back(Select);
+
+ if (PhiR->isInLoop() && MinVF.isVector()) {
+ auto *Reduction = cast<VPReductionRecipe>(
+ *find_if(PhiR->users(), IsaPred<VPReductionRecipe>));
+ Reduction->setOperand(1, Cmp);
+ } else {
+ VPValue *Or = Builder.createOr(PhiR, Cmp);
+ Select->getVPSingleValue()->replaceAllUsesWith(Or);
+ // Delete Select now that it has invalid types.
+ ToDelete.push_back(Select);
+ }
// Convert the reduction phi to operate on bools.
PhiR->setOperand(0, Plan->getOrAddLiveIn(ConstantInt::getFalse(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index cdef7972f3bdc..f9ef2b4f1b593 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -668,10 +668,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
- if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
- !PhiR->isInLoop()) {
+ if (((State.VF.isVector() ||
+ RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) &&
+ !PhiR->isInLoop()) ||
+ RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
@@ -2302,7 +2302,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
RecurKind Kind = getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
- "In-loop AnyOf reductions aren't currently supported");
+ "In-loop AnyOf reduction should use Or reduction recipe");
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(getFastMathFlags());
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
index d3baf0e4dce09..ae9f6de94bcb4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-inloop-reduction.ll
@@ -1924,7 +1924,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
@@ -1932,15 +1932,14 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
-; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
-; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP15:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP19]] = or i1 [[TMP15]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; IF-EVL: middle.block:
-; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]])
; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]]
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1978,18 +1977,18 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP: vector.body:
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
; NO-VP-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3)
-; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]]
+; NO-VP-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
+; NO-VP-NEXT: [[TMP12]] = or i1 [[TMP10]], [[VEC_PHI]]
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; NO-VP: middle.block:
-; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]])
; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]]
; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]]
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
@@ -2051,7 +2050,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL: vector.body:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
@@ -2059,15 +2058,14 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
-; IF-EVL-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
-; IF-EVL-NEXT: [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP15:%.*]] = call i1 @llvm.vp.reduce.or.nxv4i1(i1 false, <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP19]] = or i1 [[TMP15]], [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; IF-EVL: middle.block:
-; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]])
; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]]
; IF-EVL-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]]
; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -2105,18 +2103,18 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
; NO-VP: vector.body:
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
-; NO-VP-NEXT: [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]]
+; NO-VP-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
+; NO-VP-NEXT: [[TMP12]] = or i1 [[TMP10]], [[VEC_PHI]]
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; NO-VP: middle.block:
-; NO-VP-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]])
; NO-VP-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]]
; NO-VP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]]
; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll
new file mode 100644
index 0000000000000..6ff54fe6c5753
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-blend.ll
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-OUTLOOP
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefix=CHECK-INLOOP
+
+; Check that a VPBlendRecipe in the chain is handled correctly, i.e. there's
+; another phi in between the recurrence phi and the select.
+
+define i32 @select_icmp_switch(i32 %n, i32 %case, ptr %a, ptr %b, i32 %anyof) {
+; CHECK-OUTLOOP-LABEL: define i32 @select_icmp_switch(
+; CHECK-OUTLOOP-SAME: i32 [[N:%.*]], i32 [[CASE:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], i32 [[ANYOF:%.*]]) {
+; CHECK-OUTLOOP-NEXT: [[ENTRY:.*]]:
+; CHECK-OUTLOOP-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-OUTLOOP-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK-OUTLOOP: [[FOR_BODY_PREHEADER]]:
+; CHECK-OUTLOOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-OUTLOOP: [[VECTOR_PH]]:
+; CHECK-OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-OUTLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CASE]], i64 0
+; CHECK-OUTLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-OUTLOOP-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
+; CHECK-OUTLOOP-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-OUTLOOP: [[VECTOR_BODY]]:
+; CHECK-OUTLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-OUTLOOP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-OUTLOOP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]]
+; CHECK-OUTLOOP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
+; CHECK-OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
+; CHECK-OUTLOOP-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 -1)
+; CHECK-OUTLOOP-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-OUTLOOP-NEXT: [[TMP6:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP5]]
+; CHECK-OUTLOOP-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP0]], <4 x i1> [[VEC_PHI]], <4 x i1> [[TMP6]]
+; CHECK-OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-OUTLOOP-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-OUTLOOP-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-OUTLOOP: [[MIDDLE_BLOCK]]:
+; CHECK-OUTLOOP-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]])
+; CHECK-OUTLOOP-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP8]]
+; CHECK-OUTLOOP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i32 [[ANYOF]], i32 0
+; CHECK-OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-OUTLOOP-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-OUTLOOP: [[SCALAR_PH]]:
+; CHECK-OUTLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-OUTLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-OUTLOOP-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK-OUTLOOP: [[FOR_BODY]]:
+; CHECK-OUTLOOP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-OUTLOOP-NEXT: [[RDX_PHI:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_PHI_NEXT:%.*]], %[[FOR_INC]] ]
+; CHECK-OUTLOOP-NEXT: [[A_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS]]
+; CHECK-OUTLOOP-NEXT: [[A_VALUE:%.*]] = load i8, ptr [[A_ARRAYIDX]], align 1
+; CHECK-OUTLOOP-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A_VALUE]], -1
+; CHECK-OUTLOOP-NEXT: switch i32 [[CASE]], label %[[SW_BB0:.*]] [
+; CHECK-OUTLOOP-NEXT: i32 0, label %[[SW_BB0]]
+; CHECK-OUTLOOP-NEXT: i32 1, label %[[SW_BB1:.*]]
+; CHECK-OUTLOOP-NEXT: ]
+; CHECK-OUTLOOP: [[SW_BB0]]:
+; CHECK-OUTLOOP-NEXT: [[SELECT_BB0:%.*]] = select i1 [[CMP_A]], i32 [[RDX_PHI]], i32 [[ANYOF]]
+; CHECK-OUTLOOP-NEXT: br label %[[FOR_INC]]
+; CHECK-OUTLOOP: [[SW_BB1]]:
+; CHECK-OUTLOOP-NEXT: br label %[[FOR_INC]]
+; CHECK-OUTLOOP: [[FOR_INC]]:
+; CHECK-OUTLOOP-NEXT: [[RDX_PHI_NEXT]] = phi i32 [ [[SELECT_BB0]], %[[SW_BB0]] ], [ [[RDX_PHI]], %[[SW_BB1]] ]
+; CHECK-OUTLOOP-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1
+; CHECK-OUTLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-OUTLOOP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-OUTLOOP: [[FOR_END_LOOPEXIT]]:
+; CHECK-OUTLOOP-NEXT: [[RDX_PHI_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_PHI_NEXT]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-OUTLOOP-NEXT: br label %[[FOR_END]]
+; CHECK-OUTLOOP: [[FOR_END]]:
+; CHECK-OUTLOOP-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_PHI_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-OUTLOOP-NEXT: ret i32 [[SELECT_LCSSA]]
+;
+; CHECK-INLOOP-LABEL: define i32 @select_icmp_switch(
+; CHECK-INLOOP-SAME: i32 [[N:%.*]], i32 [[CASE:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], i32 [[ANYOF:%.*]]) {
+; CHECK-INLOOP-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-INLOOP-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK-INLOOP: [[FOR_BODY_PREHEADER]]:
+; CHECK-INLOOP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
+; CHECK-INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP: [[VECTOR_PH]]:
+; CHECK-INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
+; CHECK-INLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-INLOOP-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[CASE]], i64 0
+; CHECK-INLOOP-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-INLOOP-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
+; CHECK-INLOOP-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-INLOOP-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP: [[VECTOR_BODY]]:
+; CHECK-INLOOP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP2]]
+; CHECK-INLOOP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0
+; CHECK-INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
+; CHECK-INLOOP-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], splat (i8 -1)
+; CHECK-INLOOP-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
+; CHECK-INLOOP-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
+; CHECK-INLOOP-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-INLOOP-NEXT: [[TMP8]] = or i1 [[TMP7]], [[VEC_PHI]]
+; CHECK-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-INLOOP: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP8]]
+; CHECK-INLOOP-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP10]], i32 [[ANYOF]], i32 0
+; CHECK-INLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-INLOOP-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP: [[SCALAR_PH]]:
+; CHECK-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-INLOOP-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK-INLOOP: [[FOR_BODY]]:
+; CHECK-INLOOP-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-INLOOP-NEXT: [[RDX_PHI:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_PHI_NEXT:%.*]], %[[FOR_INC]] ]
+; CHECK-INLOOP-NEXT: [[A_ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS]]
+; CHECK-INLOOP-NEXT: [[A_VALUE:%.*]] = load i8, ptr [[A_ARRAYIDX]], align 1
+; CHECK-INLOOP-NEXT: [[CMP_A:%.*]] = icmp eq i8 [[A_VALUE]], -1
+; CHECK-INLOOP-NEXT: switch i32 [[CASE]], label %[[SW_BB0:.*]] [
+; CHECK-INLOOP-NEXT: i32 0, label %[[SW_BB0]]
+; CHECK-INLOOP-NEXT: i32 1, label %[[SW_BB1:.*]]
+; CHECK-INLOOP-NEXT: ]
+; CHECK-INLOOP: [[SW_BB0]]:
+; CHECK-INLOOP-NEXT: [[SELECT_BB0:%.*]] = select i1 [[CMP_A]], i32 [[RDX_PHI]], i32 [[ANYOF]]
+; CHECK-INLOOP-NEXT: br label %[[FOR_INC]]
+; CHECK-INLOOP: [[SW_BB1]]:
+; CHECK-INLOOP-NEXT: br label %[[FOR_INC]]
+; CHECK-INLOOP: [[FOR_INC]]:
+; CHECK-INLOOP-NEXT: [[RDX_PHI_NEXT]] = phi i32 [ [[SELECT_BB0]], %[[SW_BB0]] ], [ [[RDX_PHI]], %[[SW_BB1]] ]
+; CHECK-INLOOP-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1
+; CHECK-INLOOP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-INLOOP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-INLOOP: [[FOR_END_LOOPEXIT]]:
+; CHECK-INLOOP-NEXT: [[RDX_PHI_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_PHI_NEXT]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-NEXT: br label %[[FOR_END]]
+; CHECK-INLOOP: [[FOR_END]]:
+; CHECK-INLOOP-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_PHI_NEXT_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-INLOOP-NEXT: ret i32 [[SELECT_LCSSA]]
+;
+entry:
+ %cmp.sgt = icmp sgt i32 %n, 0
+ br i1 %cmp.sgt, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+ %wide.trip.count = zext i32 %n to i64
+ br label %for.body
+
+for.body:
+ %indvars = phi i64 [ 0, %for.body.preheader ], [ %indvars.next, %for.inc ]
+ %rdx.phi = phi i32 [ 0, %for.body.preheader ], [ %rdx.phi.next, %for.inc ]
+ %a.arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars
+ %a.value = load i8, ptr %a.arrayidx, align 1
+ %cmp.a = icmp eq i8 %a.value, -1
+ switch i32 %case, label %sw.bb0 [
+ i32 0, label %sw.bb0
+ i32 1, label %sw.bb1
+ ]
+
+sw.bb0:
+ %select.bb0 = select i1 %cmp.a, i32 %rdx.phi, i32 %anyof
+ br label %for.inc
+
+sw.bb1:
+ br label %for.inc
+
+for.inc:
+ %rdx.phi.next = phi i32 [ %select.bb0, %sw.bb0 ], [ %rdx.phi, %sw.bb1 ]
+ %indvars.next = add nuw nsw i64 %indvars, 1
+ %exitcond.not = icmp eq i64 %indvars.next, %wide.trip.count
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ %select.lcssa = phi i32 [ %rdx.phi.next, %for.inc ], [ 0, %entry ]
+ ret i32 %select.lcssa
+}
+;.
+; CHECK-OUTLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-OUTLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-OUTLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-OUTLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
+; CHECK-INLOOP: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-INLOOP: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-INLOOP: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-INLOOP: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
index 576a971c5eaa8..c322cc325f606 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll
@@ -2,6 +2,7 @@
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC1 --check-prefix=CHECK
; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4-IC2 --check-prefix=CHECK
; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1-IC2 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefix=CHECK-INLOOP-VF4-IC1 --check-prefix=CHECK
; int multi_user_cmp(float* a, long long n) {
@@ -206,6 +207,63 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP17]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP18]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7]] = or i1 [[TMP6]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: middle.block:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP7]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT2:%.*]] = select i1 [[TMP10]], i1 true, i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-INLOOP-VF4-IC1: scalar.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP11]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP12]]
+;
entry:
br label %for.body
@@ -430,6 +488,63 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP18:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP17]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP18]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_int(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7]] = or i1 [[TMP6]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: middle.block:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP7]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT2:%.*]] = select i1 [[TMP10]], i1 true, i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-INLOOP-VF4-IC1: scalar.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[LOAD1]], 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP11]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP12]]
+;
entry:
br label %for.body
@@ -839,6 +954,118 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP27:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP26]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP27]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_branch_use(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], ptr [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.memcheck:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = shl i64 [[N]], 2
+; CHECK-INLOOP-VF4-IC1-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE8]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP33:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP33]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6]] = or i1 [[TMP5]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8]] = or i1 [[TMP7]], [[VEC_PHI2]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-INLOOP-VF4-IC1: pred.store.if:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP12]], ptr [[TMP10]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE]]
+; CHECK-INLOOP-VF4-IC1: pred.store.continue:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK-INLOOP-VF4-IC1: pred.store.if3:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP17:%.*]] = add nsw i32 [[TMP16]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP17]], ptr [[TMP15]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK-INLOOP-VF4-IC1: pred.store.continue4:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
+; CHECK-INLOOP-VF4-IC1: pred.store.if5:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP19]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP22:%.*]] = add nsw i32 [[TMP21]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP22]], ptr [[TMP20]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE6]]
+; CHECK-INLOOP-VF4-IC1: pred.store.continue6:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK-INLOOP-VF4-IC1: pred.store.if7:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP24:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP24]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP27:%.*]] = add nsw i32 [[TMP26]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[TMP27]], ptr [[TMP25]], align 4, !alias.scope [[META9]], !noalias [[META6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[PRED_STORE_CONTINUE8]]
+; CHECK-INLOOP-VF4-IC1: pred.store.continue8:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: middle.block:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP29:%.*]] = freeze i1 [[TMP6]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP29]], i1 false, i1 true
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP30:%.*]] = freeze i1 [[TMP8]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP30]], i1 true, i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-INLOOP-VF4-IC1: scalar.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ], [ true, [[VECTOR_MEMCHECK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i1 [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ], [ false, [[VECTOR_MEMCHECK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[IF_END6:%.*]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[IF_END6]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX10]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[IF_END6]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP1]], label [[IF_THEN3:%.*]], label [[IF_END6]]
+; CHECK-INLOOP-VF4-IC1: if.then3:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INC:%.*]] = add nsw i32 [[LOAD2]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX5]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[IF_END6]]
+; CHECK-INLOOP-VF4-IC1: if.end6:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[IF_END6]] ], [ [[RDX_SELECT9]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[IF_END6]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP31:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP32:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP31]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP32]]
+;
entry:
br label %for.body
@@ -1083,6 +1310,66 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no
; CHECK-VF1-IC2-NEXT: [[TMP19:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP17]], i32 [[TMP18]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP19]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_branch_use_and_outside_bb_use(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: vector.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4-IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP15]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP7]] = or i1 [[TMP6]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: middle.block:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i1 false, i1 true
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP10:%.*]] = freeze i1 [[TMP7]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[RDX_SELECT2:%.*]] = select i1 [[TMP10]], i1 true, i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-INLOOP-VF4-IC1: scalar.ph:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ true, [[ENTRY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[BC_MERGE_RDX3:%.*]] = phi i1 [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ], [ false, [[ENTRY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ [[BC_MERGE_RDX3]], [[SCALAR_PH]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1_LCSSA:%.*]] = phi i1 [ [[CMP1]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ], [ [[RDX_SELECT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP12:%.*]] = zext i1 [[CMP1_LCSSA]] to i32
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP13:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP14:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 [[TMP12]], i32 [[TMP13]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP14]]
+;
entry:
br label %for.body
@@ -1198,6 +1485,31 @@ define i32 @multi_user_cmp_fmax(ptr readonly %a, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP1]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_fmax(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[MAX_015:%.*]] = phi float [ 0xFFF0000000000000, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp ogt float [[LOAD1]], [[MAX_015]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTMAX_0]] = select i1 [[CMP1]], float [[LOAD1]], float [[MAX_015]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP1]]
+;
entry:
br label %for.body
@@ -1314,6 +1626,31 @@ define i32 @multi_user_cmp_max(ptr readonly %a, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP1]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_max(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[MAX_015:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[DOTMAX_0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[LOAD1]], [[MAX_015]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTMAX_0]] = tail call i32 @llvm.smax.i32(i32 [[LOAD1]], i32 [[MAX_015]])
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP1]]
+;
entry:
br label %for.body
@@ -1445,6 +1782,35 @@ define i32 @multi_user_cmp_use_store_offset(ptr readonly %a, ptr writeonly %b, i
; CHECK-VF1-IC2-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP1]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_use_store_offset(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], ptr writeonly [[B:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CONV4:%.*]] = zext i1 [[CMP1]] to i32
+; CHECK-INLOOP-VF4-IC1-NEXT: [[N32:%.*]] = trunc i64 [[N]] to i32
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CONV4]], [[N32]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[IDXPROM5:%.*]] = zext nneg i32 [[ADD]] to i64
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IDXPROM5]]
+; CHECK-INLOOP-VF4-IC1-NEXT: store i32 [[CONV4]], ptr [[ARRAYIDX6]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP0]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP1]]
+;
entry:
br label %for.body
@@ -1550,6 +1916,31 @@ define i32 @multi_user_cmp_no_vectorise(ptr readonly %a, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP3]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_no_vectorise(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = sext i1 [[CMP1]] to i64
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP3:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP2]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP3]]
+;
entry:
br label %for.body
@@ -1648,6 +2039,30 @@ define i32 @multi_user_cmp_extra_select(ptr readonly %a, i64 noundef %n) {
; CHECK-VF1-IC2-NEXT: [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
; CHECK-VF1-IC2-NEXT: ret i32 [[TMP2]]
;
+; CHECK-INLOOP-VF4-IC1-LABEL: define i32 @multi_user_cmp_extra_select(
+; CHECK-INLOOP-VF4-IC1-SAME: ptr readonly [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-INLOOP-VF4-IC1-NEXT: entry:
+; CHECK-INLOOP-VF4-IC1-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-INLOOP-VF4-IC1: for.body:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF010:%.*]] = phi i1 [ true, [[ENTRY]] ], [ [[ALL_0_OFF0_:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ANY_0_OFF09:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[DOTANY_0_OFF0:%.*]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[LOAD1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-INLOOP-VF4-IC1-NEXT: [[CMP1:%.*]] = fcmp olt float [[LOAD1]], 0.000000e+00
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0]] = select i1 [[CMP1]], i1 true, i1 [[ANY_0_OFF09]]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0_]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP0:%.*]] = select i1 [[CMP1]], i1 [[ALL_0_OFF010]], i1 false
+; CHECK-INLOOP-VF4-IC1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-INLOOP-VF4-IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4-IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK-INLOOP-VF4-IC1: exit:
+; CHECK-INLOOP-VF4-IC1-NEXT: [[DOTANY_0_OFF0_LCSSA:%.*]] = phi i1 [ [[DOTANY_0_OFF0]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[ALL_0_OFF0__LCSSA:%.*]] = phi i1 [ [[ALL_0_OFF0_]], [[FOR_BODY]] ]
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP1:%.*]] = select i1 [[DOTANY_0_OFF0_LCSSA]], i32 2, i32 3
+; CHECK-INLOOP-VF4-IC1-NEXT: [[TMP2:%.*]] = select i1 [[ALL_0_OFF0__LCSSA]], i32 1, i32 [[TMP1]]
+; CHECK-INLOOP-VF4-IC1-NEXT: ret i32 [[TMP2]]
+;
entry:
br label %for.body
@@ -1719,5 +2134,21 @@ exit:
; CHECK-VF1-IC2: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
; CHECK-VF1-IC2: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]]}
;.
+; CHECK-INLOOP-VF4-IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-INLOOP-VF4-IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-INLOOP-VF4-IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-INLOOP-VF4-IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4-IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4-IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4-IC1: [[META6]] = !{[[META7:![0-9]+]]}
+; CHECK-INLOOP-VF4-IC1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; CHECK-INLOOP-VF4-IC1: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; CHECK-INLOOP-VF4-IC1: [[META9]] = !{[[META10:![0-9]+]]}
+; CHECK-INLOOP-VF4-IC1: [[META10]] = distinct !{[[META10]], [[META8]]}
+; CHECK-INLOOP-VF4-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]}
+; CHECK-INLOOP-VF4-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META2]], [[META1]]}
+;.
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK: {{.*}}
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
index 550e52d318230..124f18150d4a0 100644
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -2,6 +2,9 @@
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC1
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4IC4
; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF1IC4
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLOOP-VF4IC1
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLOOP-VF4IC4
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -prefer-inloop-reductions -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLOOP-VF1IC4
define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) {
; CHECK-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp(
@@ -185,6 +188,190 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
+; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp(
+; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC1: [[LOOP]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[EXIT]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp(
+; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 7, i32 3
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC4: [[LOOP]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[EXIT]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp(
+; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 7, i32 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF1IC4: [[LOOP]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 7
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[EXIT]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
entry:
br label %loop
@@ -376,6 +563,181 @@ define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
+; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_icmp2(
+; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC1: [[LOOP]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[EXIT]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_icmp2(
+; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP9]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 7, i32 3
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC4: [[LOOP]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[EXIT]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_icmp2(
+; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16]] = or i1 [[VEC_PHI]], [[TMP12]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17]] = or i1 [[VEC_PHI1]], [[TMP13]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18]] = or i1 [[VEC_PHI2]], [[TMP14]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19]] = or i1 [[VEC_PHI3]], [[TMP15]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP17]], [[TMP16]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP18]], [[BIN_RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP19]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21:%.*]] = freeze i1 [[BIN_RDX5]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP21]], i32 7, i32 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF1IC4: [[LOOP]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 7, i32 [[RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[EXIT]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
entry:
br label %loop
@@ -576,6 +938,190 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
+; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_i32_from_icmp(
+; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC1: [[LOOP]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[EXIT]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_i32_from_icmp(
+; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD4]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD5]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD6]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[B]], i32 [[A]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC4: [[LOOP]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[EXIT]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_i32_from_icmp(
+; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP2]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP3]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP9]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP11]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 [[B]], i32 [[A]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF1IC4: [[LOOP]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load i32, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = icmp eq i32 [[LOAD_V_IV]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 [[B]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[EXIT]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
entry:
br label %loop
@@ -776,6 +1322,190 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
+; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp_fast(
+; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC1: [[LOOP]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[EXIT]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast(
+; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 1, i32 2
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC4: [[LOOP]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[EXIT]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp_fast(
+; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp fast ueq float [[TMP8]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp fast ueq float [[TMP9]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp fast ueq float [[TMP11]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF1IC4: [[LOOP]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_LOAD_IV_3:%.*]] = fcmp fast ueq float [[LOAD_V_IV]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_LOAD_IV_3]], i32 [[RDX]], i32 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[EXIT]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
entry:
br label %loop
@@ -976,6 +1706,190 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) {
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
+; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_const_i32_from_fcmp(
+; CHECK-INLOOP-VF4IC1-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]])
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5]] = or i1 [[TMP4]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP5]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC1: [[LOOP]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[EXIT]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_const_i32_from_fcmp(
+; CHECK-INLOOP-VF4IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD4]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD5]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp ueq <4 x float> [[WIDE_LOAD6]], splat (float 3.000000e+00)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP20:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP21:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP22:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP23:%.*]] = xor <4 x i1> [[TMP9]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP11]] = or i1 [[TMP10]], [[VEC_PHI]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP21]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP13]] = or i1 [[TMP12]], [[VEC_PHI1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP14:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP22]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP15]] = or i1 [[TMP14]], [[VEC_PHI2]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP23]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP17]] = or i1 [[TMP16]], [[VEC_PHI3]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP13]], [[TMP11]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX7:%.*]] = or i1 [[TMP15]], [[BIN_RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX8:%.*]] = or i1 [[TMP17]], [[BIN_RDX7]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[BIN_RDX8]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 1, i32 2
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC4: [[LOOP]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[EXIT]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_const_i32_from_fcmp(
+; CHECK-INLOOP-VF1IC4-SAME: ptr [[V:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP2]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP3]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP4]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP5]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP7]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP12:%.*]] = fcmp ueq float [[TMP8]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP13:%.*]] = fcmp ueq float [[TMP9]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP14:%.*]] = fcmp ueq float [[TMP10]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP11]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP16:%.*]] = xor i1 [[TMP12]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP17:%.*]] = xor i1 [[TMP13]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP18:%.*]] = xor i1 [[TMP14]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP19:%.*]] = xor i1 [[TMP15]], true
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI]], [[TMP16]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP21]] = or i1 [[VEC_PHI1]], [[TMP17]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP22]] = or i1 [[VEC_PHI2]], [[TMP18]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP23]] = or i1 [[VEC_PHI3]], [[TMP19]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP21]], [[TMP20]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP22]], [[BIN_RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP23]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[BIN_RDX5]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP25]], i32 1, i32 2
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF1IC4: [[LOOP]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[GEP_V_IV:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[IV]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[LOAD_V_IV:%.*]] = load float, ptr [[GEP_V_IV]], align 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_V_IV_3:%.*]] = fcmp ueq float [[LOAD_V_IV]], 3.000000e+00
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_V_IV_3]], i32 [[RDX]], i32 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[EXIT]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
entry:
br label %loop
@@ -1136,6 +2050,147 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) {
; CHECK-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
; CHECK-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
;
+; CHECK-INLOOP-VF4IC1-LABEL: define i32 @select_i32_from_icmp_same_inputs(
+; CHECK-INLOOP-VF4IC1-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC1-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+; CHECK-INLOOP-VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3)
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC1: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
+; CHECK-INLOOP-VF4IC1-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP5]], i32 [[B]], i32 [[A]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC1: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC1: [[LOOP]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]]
+; CHECK-INLOOP-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-INLOOP-VF4IC1: [[EXIT]]:
+; CHECK-INLOOP-VF4IC1-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC1-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF4IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs(
+; CHECK-INLOOP-VF4IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF4IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
+; CHECK-INLOOP-VF4IC4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 3)
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF4IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI]], [[TMP1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI1]], [[TMP1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP4]] = or <4 x i1> [[VEC_PHI2]], [[TMP1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI3]], [[TMP1]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP3]], [[TMP2]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX4:%.*]] = or <4 x i1> [[TMP4]], [[BIN_RDX]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BIN_RDX5:%.*]] = or <4 x i1> [[TMP5]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX5]])
+; CHECK-INLOOP-VF4IC4-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 [[B]], i32 [[A]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF4IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF4IC4: [[LOOP]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]]
+; CHECK-INLOOP-VF4IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF4IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF4IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-INLOOP-VF4IC4: [[EXIT]]:
+; CHECK-INLOOP-VF4IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF4IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
+; CHECK-INLOOP-VF1IC4-LABEL: define i32 @select_i32_from_icmp_same_inputs(
+; CHECK-INLOOP-VF1IC4-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) {
+; CHECK-INLOOP-VF1IC4-NEXT: [[ENTRY:.*]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP0:%.*]] = icmp eq i32 [[A]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP1:%.*]] = xor i1 [[TMP0]], true
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK-INLOOP-VF1IC4: [[VECTOR_BODY]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP2]] = or i1 [[VEC_PHI]], [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP3]] = or i1 [[VEC_PHI1]], [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP4]] = or i1 [[VEC_PHI2]], [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP5]] = or i1 [[VEC_PHI3]], [[TMP1]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[MIDDLE_BLOCK]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP3]], [[TMP2]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX4:%.*]] = or i1 [[TMP4]], [[BIN_RDX]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BIN_RDX5:%.*]] = or i1 [[TMP5]], [[BIN_RDX4]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[TMP7:%.*]] = freeze i1 [[BIN_RDX5]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK-INLOOP-VF1IC4: [[SCALAR_PH]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: br label %[[LOOP:.*]]
+; CHECK-INLOOP-VF1IC4: [[LOOP]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: [[CMP_RDX_3:%.*]] = icmp eq i32 [[RDX]], 3
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL]] = select i1 [[CMP_RDX_3]], i32 [[RDX]], i32 [[B]]
+; CHECK-INLOOP-VF1IC4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-INLOOP-VF1IC4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-INLOOP-VF1IC4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-INLOOP-VF1IC4: [[EXIT]]:
+; CHECK-INLOOP-VF1IC4-NEXT: [[SEL_LCSSA:%.*]] = phi i32 [ [[SEL]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; CHECK-INLOOP-VF1IC4-NEXT: ret i32 [[SEL_LCSSA]]
+;
entry:
br label %loop
@@ -1315,3 +2370,48 @@ exit: ; preds = %loop
; CHECK-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; CHECK-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
;.
+; CHECK-INLOOP-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-INLOOP-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-INLOOP-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-INLOOP-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+;.
+; CHECK-INLOOP-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-INLOOP-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-INLOOP-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-INLOOP-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF4IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+;.
+; CHECK-INLOOP-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK-INLOOP-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-INLOOP-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-INLOOP-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK-INLOOP-VF1IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
+;.
More information about the llvm-commits
mailing list