[llvm] [LV] Add support for cmp reductions with decreasing IVs. (PR #140451)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sat Jun 28 14:42:05 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/140451
>From 1e8ef2e14f700ce33d7a12e955ef66a6f6fb3c80 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 26 Jun 2025 17:55:18 +0100
Subject: [PATCH 1/3] [LV] Add support for cmp reductions with decreasing IVs
using SMin.
Similar to FindLastIV, add FindFirstIV to support select (icmp(), x, y)
reductions where one of x or y is a decreasing induction. This is done
via a new recurrence kind FindFirstIVSMin, which selects the first
value from the reduction vector using smin instead of the last value
(FindLastIV). It uses signed max as sentinel value. The
---
llvm/include/llvm/Analysis/IVDescriptors.h | 42 +-
llvm/lib/Analysis/IVDescriptors.cpp | 68 +-
llvm/lib/Transforms/Utils/LoopUtils.cpp | 13 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 62 +-
.../Transforms/Vectorize/SLPVectorizer.cpp | 3 +
llvm/lib/Transforms/Vectorize/VPlan.h | 2 +-
.../Transforms/Vectorize/VPlanAnalysis.cpp | 2 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 32 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +-
.../LoopVectorize/iv-select-cmp-decreasing.ll | 1060 +++++++++++++++--
.../vplan-printing-reductions.ll | 2 +-
11 files changed, 1140 insertions(+), 148 deletions(-)
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 463249461483f..310d05b6f04af 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -54,6 +54,9 @@ enum class RecurKind {
FMulAdd, ///< Sum of float products with llvm.fmuladd(a * b + sum).
AnyOf, ///< AnyOf reduction with select(cmp(),x,y) where one of (x,y) is
///< loop invariant, and both x and y are integer type.
+ FindFirstIVSMin, /// FindFirst reduction with select(icmp(),x,y) where one of
+ ///< (x,y) is a decreasing loop induction, and both x and y
+ ///< are integer type, producing a SMin reduction.
FindLastIVSMax, ///< FindLast reduction with select(cmp(),x,y) where one of
///< (x,y) is increasing loop induction, and both x and y
///< are integer type, producing a SMax reduction.
@@ -165,13 +168,13 @@ class RecurrenceDescriptor {
/// Returns a struct describing whether the instruction is either a
/// Select(ICmp(A, B), X, Y), or
/// Select(FCmp(A, B), X, Y)
- /// where one of (X, Y) is an increasing loop induction variable, and the
- /// other is a PHI value.
+ /// where one of (X, Y) is an increasing (FindLast) or decreasing (FindFirst)
+ /// loop induction variable, and the other is a PHI value.
// TODO: Support non-monotonic variable. FindLast does not need be restricted
// to increasing loop induction variables.
- LLVM_ABI static InstDesc isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
- Instruction *I,
- ScalarEvolution &SE);
+ LLVM_ABI static InstDesc isFindIVPattern(RecurKind Kind, Loop *TheLoop,
+ PHINode *OrigPhi, Instruction *I,
+ ScalarEvolution &SE);
/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -259,6 +262,12 @@ class RecurrenceDescriptor {
return Kind == RecurKind::AnyOf;
}
+ /// Returns true if the recurrence kind is of the form
+ /// select(cmp(),x,y) where one of (x,y) is decreasing loop induction.
+ static bool isFindFirstIVRecurrenceKind(RecurKind Kind) {
+ return Kind == RecurKind::FindFirstIVSMin;
+ }
+
/// Returns true if the recurrence kind is of the form
/// select(cmp(),x,y) where one of (x,y) is increasing loop induction.
static bool isFindLastIVRecurrenceKind(RecurKind Kind) {
@@ -269,22 +278,35 @@ class RecurrenceDescriptor {
/// Returns true if recurrece kind is a signed redux kind.
static bool isSignedRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::SMax || Kind == RecurKind::SMin ||
+ Kind == RecurKind::FindFirstIVSMin ||
Kind == RecurKind::FindLastIVSMax;
}
+ /// Returns true if the recurrence kind is of the form
+ /// select(cmp(),x,y) where one of (x,y) is an increasing or decreasing loop
+ /// induction.
+ static bool isFindIVRecurrenceKind(RecurKind Kind) {
+ return isFindFirstIVRecurrenceKind(Kind) ||
+ isFindLastIVRecurrenceKind(Kind);
+ }
+
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
- /// Returns the sentinel value for FindLastIV recurrences to replace the start
- /// value.
+ /// Returns the sentinel value for FindFirstIV &FindLastIV recurrences to
+ /// replace the start value.
Value *getSentinelValue() const {
- assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
Type *Ty = StartValue->getType();
unsigned BW = Ty->getIntegerBitWidth();
+ if (isFindLastIVRecurrenceKind(Kind)) {
+ return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind)
+ ? APInt::getSignedMinValue(BW)
+ : APInt::getMinValue(BW));
+ }
return ConstantInt::get(Ty, isSignedRecurrenceKind(Kind)
- ? APInt::getSignedMinValue(BW)
- : APInt::getMinValue(BW));
+ ? APInt::getSignedMaxValue(BW)
+ : APInt::getMaxValue(BW));
}
/// Returns a reference to the instructions used for type-promoting the
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index c8e97e5ec0e58..b49258e3b54ef 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -50,6 +50,7 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::UMax:
case RecurKind::UMin:
case RecurKind::AnyOf:
+ case RecurKind::FindFirstIVSMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
return true;
@@ -684,8 +685,9 @@ RecurrenceDescriptor::isAnyOfPattern(Loop *Loop, PHINode *OrigPhi,
// value of the data type or a non-constant value by using mask and multiple
// reduction operations.
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
- Instruction *I, ScalarEvolution &SE) {
+RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
+ PHINode *OrigPhi, Instruction *I,
+ ScalarEvolution &SE) {
// TODO: Support the vectorization of FindLastIV when the reduction phi is
// used by more than one select instruction. This vectorization is only
// performed when the SCEV of each increasing induction variable used by the
@@ -713,25 +715,49 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
return std::nullopt;
const SCEV *Step = AR->getStepRecurrence(SE);
- if (!SE.isKnownPositive(Step))
+
+ if (isFindFirstIVRecurrenceKind(Kind)) {
+ if (!SE.isKnownNegative(Step))
+ return std::nullopt;
+ } else if (!SE.isKnownPositive(Step))
return std::nullopt;
// Keep the minimum value of the recurrence type as the sentinel value.
// The maximum acceptable range for the increasing induction variable,
// called the valid range, will be defined as
+
+ const ConstantRange IVRange = SE.getSignedRange(AR);
+ // Keep the minimum (FindLast) or maximum (FindFirst) value of the
+ // recurrence type as the sentinel value. The maximum acceptable range for
+ // the induction variable, called the valid range, will be defined as
// [<sentinel value> + 1, <sentinel value>)
- // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>)
+ // where <sentinel value> is [Signed|Unsigned]Min(<recurrence type>) for
+ // FindLastIV or [Signed|Unsigned]Max(<recurrence type>) for FindFirstIV.
// TODO: This range restriction can be lifted by adding an additional
// virtual OR reduction.
auto CheckRange = [&](bool IsSigned) {
const ConstantRange IVRange =
IsSigned ? SE.getSignedRange(AR) : SE.getUnsignedRange(AR);
unsigned NumBits = Ty->getIntegerBitWidth();
- const APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
- : APInt::getMinValue(NumBits);
- const ConstantRange ValidRange =
- ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
- LLVM_DEBUG(dbgs() << "LV: FindLastIV valid range is " << ValidRange
+ ConstantRange ValidRange = ConstantRange::getEmpty(NumBits);
+ if (isFindLastIVRecurrenceKind(Kind)) {
+ APInt Sentinel = IsSigned ? APInt::getSignedMinValue(NumBits)
+ : APInt::getMinValue(NumBits);
+ ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
+ } else {
+ assert(isFindFirstIVRecurrenceKind(Kind) &&
+ "Kind must either be a FindLastIV or FindFirstIV");
+ assert(IsSigned &&
+ "only FindFirstIV with SMax is supported at the moment");
+ ValidRange =
+ ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
+ APInt::getSignedMaxValue(NumBits) - 1);
+ }
+
+ LLVM_DEBUG(dbgs() << "LV: "
+ << (isFindLastIVRecurrenceKind(Kind) ? "FindLastIV"
+ : "FindFirstIV")
+ << " valid range is " << ValidRange
<< ", and the range of " << *AR << " is " << IVRange
<< "\n");
@@ -739,10 +765,18 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
// its range is fully contained within the valid range.
return ValidRange.contains(IVRange);
};
+ if (isFindLastIVRecurrenceKind(Kind)) {
+ if (CheckRange(true))
+ return RecurKind::FindLastIVSMax;
+ if (CheckRange(false))
+ return RecurKind::FindLastIVUMax;
+ return std::nullopt;
+ }
+ assert(isFindFirstIVRecurrenceKind(Kind) &&
+ "Kind must either be a FindLastIV or FindFirstIV");
+
if (CheckRange(true))
- return RecurKind::FindLastIVSMax;
- if (CheckRange(false))
- return RecurKind::FindLastIVUMax;
+ return RecurKind::FindFirstIVSMin;
return std::nullopt;
};
@@ -888,8 +922,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
Kind == RecurKind::Add || Kind == RecurKind::Mul)
return isConditionalRdxPattern(I);
- if (isFindLastIVRecurrenceKind(Kind) && SE)
- return isFindLastIVPattern(L, OrigPhi, I, *SE);
+ if (isFindIVRecurrenceKind(Kind) && SE)
+ return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
[[fallthrough]];
case Instruction::FCmp:
case Instruction::ICmp:
@@ -1003,6 +1037,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a FindLastIV reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::FindFirstIVSMin, TheLoop, FMF, RedDes, DB,
+ AC, DT, SE)) {
+ LLVM_DEBUG(dbgs() << "Found a FindFirstIV reduction PHI." << *Phi << "\n");
+ return true;
+ }
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
SE)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
@@ -1150,6 +1189,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::Mul:
return Instruction::Mul;
case RecurKind::AnyOf:
+ case RecurKind::FindFirstIVSMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::Or:
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index c50bb4a497c6a..ac27ccf409d6b 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1227,9 +1227,12 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
RecurKind RdxKind, Value *Start,
Value *Sentinel) {
bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind);
- Value *MaxRdx = Src->getType()->isVectorTy()
- ? Builder.CreateIntMaxReduce(Src, IsSigned)
- : Src;
+ Value *MaxRdx =
+ Src->getType()->isVectorTy()
+ ? (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind)
+ ? Builder.CreateIntMaxReduce(Src, IsSigned)
+ : Builder.CreateIntMinReduce(Src, IsSigned))
+ : Src;
// Correct the final reduction result back to the start value if the maximum
// reduction is sentinel value.
Value *Cmp =
@@ -1324,8 +1327,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
RecurKind Kind, Value *Mask, Value *EVL) {
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
- !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
- "AnyOf or FindLastIV reductions are not supported.");
+ !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
+ "AnyOf, FindFirstIV and FindLastIV reductions are not supported.");
Intrinsic::ID Id = getReductionIntrinsicID(Kind);
auto VPID = VPIntrinsic::getForIntrinsic(Id);
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa16083829625..95479373b4393 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4922,7 +4922,7 @@ LoopVectorizationCostModel::selectInterleaveCount(VPlan &Plan, ElementCount VF,
const RecurrenceDescriptor &RdxDesc = Reduction.second;
RecurKind RK = RdxDesc.getRecurrenceKind();
return RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
- RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK);
+ RecurrenceDescriptor::isFindIVRecurrenceKind(RK);
});
if (HasSelectCmpReductions) {
LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
@@ -7240,8 +7240,8 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
using namespace VPlanPatternMatch;
- assert(RdxResult->getOpcode() == VPInstruction::ComputeFindLastIVResult &&
- "RdxResult must be ComputeFindLastIVResult");
+ assert(RdxResult->getOpcode() == VPInstruction::ComputeFindIVResult &&
+ "RdxResult must be ComputeFindIVResult");
VPValue *StartVPV = RdxResult->getOperand(1);
match(StartVPV, m_Freeze(m_VPValue(StartVPV)));
return StartVPV->getLiveInIRValue();
@@ -7259,7 +7259,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
if (!EpiRedResult ||
(EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
- EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
+ EpiRedResult->getOpcode() != VPInstruction::ComputeFindIVResult))
return;
auto *EpiRedHeaderPhi =
@@ -7285,7 +7285,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
- } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
@@ -9041,8 +9041,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurKind Kind = RdxDesc.getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
- !RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
- "AnyOf and FindLast reductions are not allowed for in-loop reductions");
+ !RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
+ "AnyOf and FindIV reductions are not allowed for in-loop reductions");
// Collect the chain of "link" recipes for the reduction starting at PhiR.
SetVector<VPSingleDefRecipe *> Worklist;
@@ -9200,7 +9200,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeReductionResult ||
cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ComputeFindLastIVResult);
+ VPInstruction::ComputeFindIVResult);
});
if (CM.usePredicatedReductionSelect())
PhiR->setOperand(1, NewExitingVPV);
@@ -9244,12 +9244,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
VPInstruction *FinalReductionResult;
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(MiddleVPBB, IP);
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ if (RecurrenceDescriptor::isFindIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
VPValue *Start = PhiR->getStartValue();
VPValue *Sentinel = Plan->getOrAddLiveIn(RdxDesc.getSentinelValue());
FinalReductionResult =
- Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
+ Builder.createNaryOp(VPInstruction::ComputeFindIVResult,
{PhiR, Start, Sentinel, NewExitingVPV}, ExitDL);
} else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
@@ -9312,16 +9312,16 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
}
- if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
+ if (RecurrenceDescriptor::isFindIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
- // Adjust the start value for FindLastIV recurrences to use the sentinel
- // value after generating the ResumePhi recipe, which uses the original
- // start value.
+ // Adjust the start value for FindFirstIV/FindLastIV recurrences to use
+ // the sentinel value after generating the ResumePhi recipe, which uses
+ // the original start value.
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
}
RecurKind RK = RdxDesc.getRecurrenceKind();
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
- !RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+ !RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
VPBuilder PHBuilder(Plan->getVectorPreheader());
VPValue *Iden = Plan->getOrAddLiveIn(
@@ -9704,18 +9704,18 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPlanTransforms::runPass(VPlanTransforms::removeDeadRecipes, MainPlan);
using namespace VPlanPatternMatch;
- // When vectorizing the epilogue, FindLastIV reductions can introduce multiple
- // uses of undef/poison. If the reduction start value may be undef or poison
- // it needs to be frozen and the frozen start has to be used when computing
- // the reduction result. We also need to use the frozen value in the resume
- // phi generated by the main vector loop, as this is also used to compute the
- // reduction result after the epilogue vector loop.
+ // When vectorizing the epilogue, FindFirstIV & FindLastIV reductions can
+ // introduce multiple uses of undef/poison. If the reduction start value may
+ // be undef or poison it needs to be frozen and the frozen start has to be
+ // used when computing the reduction result. We also need to use the frozen
+ // value in the resume phi generated by the main vector loop, as this is also
+ // used to compute the reduction result after the epilogue vector loop.
auto AddFreezeForFindLastIVReductions = [](VPlan &Plan,
bool UpdateResumePhis) {
VPBuilder Builder(Plan.getEntry());
for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
auto *VPI = dyn_cast<VPInstruction>(&R);
- if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindLastIVResult)
+ if (!VPI || VPI->getOpcode() != VPInstruction::ComputeFindIVResult)
continue;
VPValue *OrigStart = VPI->getOperand(1);
if (isGuaranteedNotToBeUndefOrPoison(OrigStart->getLiveInIRValue()))
@@ -9810,7 +9810,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
return VPI &&
(VPI->getOpcode() == VPInstruction::ComputeAnyOfResult ||
VPI->getOpcode() == VPInstruction::ComputeReductionResult ||
- VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult);
+ VPI->getOpcode() == VPInstruction::ComputeFindIVResult);
}));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
@@ -9828,20 +9828,20 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
BasicBlock *PBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt());
ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
- } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+ } else if (RecurrenceDescriptor::isFindIVRecurrenceKind(RK)) {
Value *StartV = getStartValueFromReductionResult(RdxResult);
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
- "start value from ComputeFindLastIVResult must match");
+ "start value from ComputeFinIVResult must match");
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
- // VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
- // to the resume value. The resume value is adjusted to the sentinel
- // value when the final value from the main vector loop equals the start
- // value. This ensures correctness when the start value might not be
- // less than the minimum value of a monotonically increasing induction
- // variable.
+ // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires
+ // an adjustment to the resume value. The resume value is adjusted to
+ // the sentinel value when the final value from the main vector loop
+ // equals the start value. This ensures correctness when the start value
+ // might not be less than the minimum value of a monotonically
+ // increasing induction variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 27a7538ecd939..0941bf61953f1 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -23180,6 +23180,7 @@ class HorizontalReduction {
case RecurKind::FMul:
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
+ case RecurKind::FindFirstIVSMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
@@ -23315,6 +23316,7 @@ class HorizontalReduction {
case RecurKind::FMul:
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
+ case RecurKind::FindFirstIVSMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
@@ -23415,6 +23417,7 @@ class HorizontalReduction {
case RecurKind::FMul:
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
+ case RecurKind::FindFirstIVSMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 32e788be76cb7..61b5ccd85bc6e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -946,7 +946,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
/// where one of (x,y) is loop invariant, and both x and y are integer type.
ComputeAnyOfResult,
- ComputeFindLastIVResult,
+ ComputeFindIVResult,
ComputeReductionResult,
// Extracts the last lane from its operand if it is a vector, or the last
// part if scalar. In the latter case, the recipe will be removed during
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index da4a52203db3f..8b48dda5bcf71 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -91,7 +91,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
case VPInstruction::ComputeAnyOfResult:
- case VPInstruction::ComputeFindLastIVResult:
+ case VPInstruction::ComputeFindIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7b302407519e7..73d82319e13ec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -460,7 +460,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ReductionStartVector:
return 3;
- case VPInstruction::ComputeFindLastIVResult:
+ case VPInstruction::ComputeFindIVResult:
return 4;
case Instruction::Call:
case Instruction::GetElementPtr:
@@ -725,14 +725,14 @@ Value *VPInstruction::generate(VPTransformState &State) {
return createAnyOfReduction(Builder, ReducedPartRdx,
State.get(getOperand(1), VPLane(0)), OrigPhi);
}
- case VPInstruction::ComputeFindLastIVResult: {
+ case VPInstruction::ComputeFindIVResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
// Get its reduction variable descriptor.
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
- assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
+ assert(RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
"Unexpected reduction kind");
assert(!PhiR->isInLoop() &&
"In-loop FindLastIV reduction is not supported yet");
@@ -741,9 +741,17 @@ Value *VPInstruction::generate(VPTransformState &State) {
// sentinel value, followed by one operand for each part of the reduction.
unsigned UF = getNumOperands() - 3;
Value *ReducedPartRdx = State.get(getOperand(3));
- RecurKind MinMaxKind = RecurrenceDescriptor::isSignedRecurrenceKind(RK)
- ? RecurKind::SMax
- : RecurKind::UMax;
+ RecurKind MinMaxKind;
+ bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RK);
+ if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
+ MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
+ } else {
+ assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) &&
+ "Kind must either be a FindLastIV or FindFirstIV");
+ assert(IsSigned &&
+ "only FindFirstIV with SMax is supported at the moment");
+ MinMaxKind = RecurKind::SMin;
+ }
for (unsigned Part = 1; Part < UF; ++Part)
ReducedPartRdx = createMinMaxOp(Builder, MinMaxKind, ReducedPartRdx,
State.get(getOperand(3 + Part)));
@@ -761,8 +769,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
- assert(!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
- "should be handled by ComputeFindLastIVResult");
+ assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
+ "should be handled by ComputeFindIVResult");
Type *ResultTy = State.TypeAnalysis.inferScalarType(this);
// The recipe's operands are the reduction phi, followed by one operand for
@@ -977,7 +985,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
- getOpcode() == VPInstruction::ComputeFindLastIVResult ||
+ getOpcode() == VPInstruction::ComputeFindIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
@@ -1079,7 +1087,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
case VPInstruction::ComputeAnyOfResult:
- case VPInstruction::ComputeFindLastIVResult:
+ case VPInstruction::ComputeFindIVResult:
return Op == getOperand(1);
};
llvm_unreachable("switch should return");
@@ -1168,8 +1176,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeAnyOfResult:
O << "compute-anyof-result";
break;
- case VPInstruction::ComputeFindLastIVResult:
- O << "compute-find-last-iv-result";
+ case VPInstruction::ComputeFindIVResult:
+ O << "compute-find-iv-result";
break;
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 6c2b8210ffb7b..2dd43c092ff7a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -355,7 +355,7 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(), m_VPValue(Op1))) ||
- match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
+ match(&R, m_VPInstruction<VPInstruction::ComputeFindIVResult>(
m_VPValue(), m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
addUniformForAllParts(cast<VPInstruction>(&R));
for (unsigned Part = 1; Part != UF; ++Part)
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index cf10d32e063ab..ee0a64397e2ac 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -1,26 +1,185 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC1VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC4VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefix=IC4VF1 %s
define i64 @select_decreasing_induction_icmp_const_start(ptr %a) {
-; CHECK-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 19999, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
+; IC1VF4-SAME: ptr [[A:%.*]]) {
+; IC1VF4-NEXT: [[ENTRY:.*]]:
+; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4: [[VECTOR_PH]]:
+; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC1VF4: [[VECTOR_BODY]]:
+; IC1VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
+; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
+; IC1VF4-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; IC1VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
+; IC1VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC1VF4: [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP4]])
+; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP6]], 9223372036854775807
+; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP6]], i64 331
+; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4: [[SCALAR_PH]]:
+; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ]
+; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC1VF4-NEXT: br label %[[LOOP:.*]]
+; IC1VF4: [[LOOP]]:
+; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC1VF4: [[EXIT]]:
+; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
+; IC4VF4-SAME: ptr [[A:%.*]]) {
+; IC4VF4-NEXT: [[ENTRY:.*]]:
+; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4: [[VECTOR_PH]]:
+; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF4: [[VECTOR_BODY]]:
+; IC4VF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 19999, i64 19998, i64 19997, i64 19996>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
+; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 -4)
+; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4)
+; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
+; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0
+; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3
+; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4
+; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3
+; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8
+; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3
+; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12
+; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3
+; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
+; IC4VF4-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD4]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
+; IC4VF4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD6]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8
+; IC4VF4-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD8]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC4VF4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i64> [[REVERSE5]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i64> [[REVERSE7]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i64> [[REVERSE9]], splat (i64 3)
+; IC4VF4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; IC4VF4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI1]]
+; IC4VF4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD_2]], <4 x i64> [[VEC_PHI2]]
+; IC4VF4-NEXT: [[TMP16]] = select <4 x i1> [[TMP12]], <4 x i64> [[STEP_ADD_3]], <4 x i64> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 -4)
+; IC4VF4-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
+; IC4VF4-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC4VF4: [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP13]], <4 x i64> [[TMP14]])
+; IC4VF4-NEXT: [[RDX_MINMAX10:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP15]])
+; IC4VF4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX10]], <4 x i64> [[TMP16]])
+; IC4VF4-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX11]])
+; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP18]], 9223372036854775807
+; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP18]], i64 331
+; IC4VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF4: [[SCALAR_PH]]:
+; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ]
+; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC4VF4-NEXT: br label %[[LOOP:.*]]
+; IC4VF4: [[LOOP]]:
+; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC4VF4: [[EXIT]]:
+; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
+; IC4VF1-SAME: ptr [[A:%.*]]) {
+; IC4VF1-NEXT: [[ENTRY:.*]]:
+; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1: [[VECTOR_PH]]:
+; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF1: [[VECTOR_BODY]]:
+; IC4VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]]
+; IC4VF1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]]
+; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
+; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; IC4VF1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP3]], align 8
+; IC4VF1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8
+; IC4VF1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8
+; IC4VF1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8
+; IC4VF1-NEXT: [[TMP11:%.*]] = icmp sgt i64 [[TMP7]], 3
+; IC4VF1-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3
+; IC4VF1-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3
+; IC4VF1-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3
+; IC4VF1-NEXT: [[TMP15]] = select i1 [[TMP11]], i64 [[OFFSET_IDX]], i64 [[VEC_PHI]]
+; IC4VF1-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI1]]
+; IC4VF1-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI2]]
+; IC4VF1-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI3]]
+; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC4VF1-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
+; IC4VF1-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IC4VF1: [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP15]], i64 [[TMP16]])
+; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP17]])
+; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP18]])
+; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807
+; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331
+; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1: [[SCALAR_PH]]:
+; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 19999, %[[ENTRY]] ]
+; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
+; IC4VF1-NEXT: br label %[[LOOP:.*]]
+; IC4VF1: [[LOOP]]:
+; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IC4VF1: [[EXIT]]:
+; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %loop
@@ -43,23 +202,354 @@ exit: ; preds = %loop
@table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
-; CHECK-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
-; CHECK-SAME: i16 noundef [[VAL:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
-; CHECK-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
-; CHECK-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
-; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
+; IC1VF4-SAME: i16 noundef [[VAL:%.*]]) {
+; IC1VF4-NEXT: [[ENTRY:.*]]:
+; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4: [[VECTOR_PH]]:
+; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC1VF4: [[VECTOR_BODY]]:
+; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1
+; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]]
+; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
+; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC1VF4: [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
+; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
+; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
+; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4: [[SCALAR_PH]]:
+; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC1VF4-NEXT: br label %[[LOOP:.*]]
+; IC1VF4: [[LOOP]]:
+; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
+; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
+; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC1VF4: [[EXIT]]:
+; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
+; IC4VF4-SAME: i16 noundef [[VAL:%.*]]) {
+; IC4VF4-NEXT: [[ENTRY:.*]]:
+; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4: [[VECTOR_PH]]:
+; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[VAL]], i64 0
+; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF4: [[VECTOR_BODY]]:
+; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
+; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
+; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
+; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
+; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
+; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
+; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
+; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
+; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4: [[PRED_LOAD_IF]]:
+; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
+; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 1
+; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x i16> poison, i16 [[TMP7]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; IC4VF4: [[PRED_LOAD_CONTINUE]]:
+; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
+; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4: [[PRED_LOAD_IF15]]:
+; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 1
+; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP9]], i16 [[TMP13]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
+; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
+; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x i16> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
+; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4: [[PRED_LOAD_IF17]]:
+; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP18]], align 1
+; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
+; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
+; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
+; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4: [[PRED_LOAD_IF19]]:
+; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
+; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
+; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
+; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
+; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4: [[PRED_LOAD_IF21]]:
+; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
+; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
+; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
+; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
+; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
+; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4: [[PRED_LOAD_IF23]]:
+; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
+; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
+; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
+; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
+; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
+; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4: [[PRED_LOAD_IF25]]:
+; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
+; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
+; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
+; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
+; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
+; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4: [[PRED_LOAD_IF27]]:
+; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
+; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
+; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
+; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
+; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
+; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4: [[PRED_LOAD_IF29]]:
+; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
+; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
+; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
+; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
+; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
+; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4: [[PRED_LOAD_IF31]]:
+; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
+; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
+; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
+; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
+; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
+; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4: [[PRED_LOAD_IF33]]:
+; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
+; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
+; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
+; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
+; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
+; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4: [[PRED_LOAD_IF35]]:
+; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
+; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
+; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
+; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
+; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
+; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4: [[PRED_LOAD_IF37]]:
+; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
+; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
+; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
+; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
+; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
+; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4: [[PRED_LOAD_IF39]]:
+; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
+; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
+; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
+; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
+; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
+; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4: [[PRED_LOAD_IF41]]:
+; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
+; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT: [[TMP91:%.*]] = load i16, ptr [[TMP90]], align 1
+; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x i16> [[TMP87]], i16 [[TMP91]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
+; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
+; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x i16> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
+; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4: [[PRED_LOAD_IF43]]:
+; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
+; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT: [[TMP97:%.*]] = load i16, ptr [[TMP96]], align 1
+; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x i16> [[TMP93]], i16 [[TMP97]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4: [[PRED_LOAD_CONTINUE44]]:
+; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x i16> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ]
+; IC4VF4-NEXT: [[TMP100:%.*]] = icmp ugt <4 x i16> [[TMP27]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP101:%.*]] = icmp ugt <4 x i16> [[TMP51]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP102:%.*]] = icmp ugt <4 x i16> [[TMP75]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP103:%.*]] = icmp ugt <4 x i16> [[TMP99]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
+; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC4VF4: [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
+; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
+; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
+; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
+; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
+; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
+; IC4VF4-NEXT: br label %[[EXIT:.*]]
+; IC4VF4: [[SCALAR_PH]]:
+; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ]
+; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ]
+; IC4VF4-NEXT: br label %[[LOOP:.*]]
+; IC4VF4: [[LOOP]]:
+; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
+; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC4VF4: [[EXIT]]:
+; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_i16(
+; IC4VF1-SAME: i16 noundef [[VAL:%.*]]) {
+; IC4VF1-NEXT: [[ENTRY:.*]]:
+; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1: [[VECTOR_PH]]:
+; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF1: [[VECTOR_BODY]]:
+; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
+; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
+; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
+; IC4VF1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP3]], align 1
+; IC4VF1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 1
+; IC4VF1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP5]], align 1
+; IC4VF1-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP6]], align 1
+; IC4VF1-NEXT: [[TMP11:%.*]] = icmp ugt i16 [[TMP7]], [[VAL]]
+; IC4VF1-NEXT: [[TMP12:%.*]] = icmp ugt i16 [[TMP8]], [[VAL]]
+; IC4VF1-NEXT: [[TMP13:%.*]] = icmp ugt i16 [[TMP9]], [[VAL]]
+; IC4VF1-NEXT: [[TMP14:%.*]] = icmp ugt i16 [[TMP10]], [[VAL]]
+; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
+; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
+; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
+; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
+; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
+; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
+; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
+; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IC4VF1: [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
+; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
+; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
+; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
+; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
+; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1: [[SCALAR_PH]]:
+; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC4VF1-NEXT: br label %[[LOOP:.*]]
+; IC4VF1: [[LOOP]]:
+; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load i16, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = icmp ugt i16 [[LD_TABLE]], [[VAL]]
+; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; IC4VF1: [[EXIT]]:
+; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %loop
@@ -83,23 +573,354 @@ exit: ; preds = %loop
@tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) {
-; CHECK-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
-; CHECK-SAME: half noundef [[VAL:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
-; CHECK-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
-; CHECK-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
-; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
-; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
+; IC1VF4-SAME: half noundef [[VAL:%.*]]) {
+; IC1VF4-NEXT: [[ENTRY:.*]]:
+; IC1VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC1VF4: [[VECTOR_PH]]:
+; IC1VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
+; IC1VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
+; IC1VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC1VF4: [[VECTOR_BODY]]:
+; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0
+; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3
+; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1
+; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]]
+; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
+; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC1VF4: [[MIDDLE_BLOCK]]:
+; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
+; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
+; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
+; IC1VF4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC1VF4: [[SCALAR_PH]]:
+; IC1VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC1VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC1VF4-NEXT: br label %[[LOOP:.*]]
+; IC1VF4: [[LOOP]]:
+; IC1VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC1VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
+; IC1VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
+; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC1VF4: [[EXIT]]:
+; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC1VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
+; IC4VF4-SAME: half noundef [[VAL:%.*]]) {
+; IC4VF4-NEXT: [[ENTRY:.*]]:
+; IC4VF4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF4: [[VECTOR_PH]]:
+; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x half> poison, half [[VAL]], i64 0
+; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x half> [[BROADCAST_SPLATINSERT]], <4 x half> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF4: [[VECTOR_BODY]]:
+; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
+; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
+; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
+; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
+; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
+; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
+; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
+; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
+; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
+; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
+; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
+; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
+; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; IC4VF4: [[PRED_LOAD_IF]]:
+; IC4VF4-NEXT: [[TMP5:%.*]] = add i16 [[OFFSET_IDX]], 0
+; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP5]]
+; IC4VF4-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP6]], align 1
+; IC4VF4-NEXT: [[TMP8:%.*]] = insertelement <4 x half> poison, half [[TMP7]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; IC4VF4: [[PRED_LOAD_CONTINUE]]:
+; IC4VF4-NEXT: [[TMP9:%.*]] = phi <4 x half> [ poison, %[[VECTOR_BODY]] ], [ [[TMP8]], %[[PRED_LOAD_IF]] ]
+; IC4VF4-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF15:.*]], label %[[PRED_LOAD_CONTINUE16:.*]]
+; IC4VF4: [[PRED_LOAD_IF15]]:
+; IC4VF4-NEXT: [[TMP11:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP11]]
+; IC4VF4-NEXT: [[TMP13:%.*]] = load half, ptr [[TMP12]], align 1
+; IC4VF4-NEXT: [[TMP14:%.*]] = insertelement <4 x half> [[TMP9]], half [[TMP13]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE16]]
+; IC4VF4: [[PRED_LOAD_CONTINUE16]]:
+; IC4VF4-NEXT: [[TMP15:%.*]] = phi <4 x half> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF15]] ]
+; IC4VF4-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF17:.*]], label %[[PRED_LOAD_CONTINUE18:.*]]
+; IC4VF4: [[PRED_LOAD_IF17]]:
+; IC4VF4-NEXT: [[TMP17:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP17]]
+; IC4VF4-NEXT: [[TMP19:%.*]] = load half, ptr [[TMP18]], align 1
+; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x half> [[TMP15]], half [[TMP19]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
+; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
+; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x half> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
+; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4: [[PRED_LOAD_IF19]]:
+; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
+; IC4VF4-NEXT: [[TMP25:%.*]] = load half, ptr [[TMP24]], align 1
+; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x half> [[TMP21]], half [[TMP25]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
+; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
+; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x half> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
+; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
+; IC4VF4: [[PRED_LOAD_IF21]]:
+; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
+; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
+; IC4VF4-NEXT: [[TMP31:%.*]] = load half, ptr [[TMP30]], align 1
+; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x half> poison, half [[TMP31]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
+; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
+; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
+; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
+; IC4VF4: [[PRED_LOAD_IF23]]:
+; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
+; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
+; IC4VF4-NEXT: [[TMP37:%.*]] = load half, ptr [[TMP36]], align 1
+; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x half> [[TMP33]], half [[TMP37]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
+; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
+; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x half> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
+; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
+; IC4VF4: [[PRED_LOAD_IF25]]:
+; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
+; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
+; IC4VF4-NEXT: [[TMP43:%.*]] = load half, ptr [[TMP42]], align 1
+; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x half> [[TMP39]], half [[TMP43]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
+; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
+; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x half> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
+; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
+; IC4VF4: [[PRED_LOAD_IF27]]:
+; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
+; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
+; IC4VF4-NEXT: [[TMP49:%.*]] = load half, ptr [[TMP48]], align 1
+; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x half> [[TMP45]], half [[TMP49]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
+; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
+; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x half> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
+; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
+; IC4VF4: [[PRED_LOAD_IF29]]:
+; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
+; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
+; IC4VF4-NEXT: [[TMP55:%.*]] = load half, ptr [[TMP54]], align 1
+; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x half> poison, half [[TMP55]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
+; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
+; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
+; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
+; IC4VF4: [[PRED_LOAD_IF31]]:
+; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
+; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
+; IC4VF4-NEXT: [[TMP61:%.*]] = load half, ptr [[TMP60]], align 1
+; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x half> [[TMP57]], half [[TMP61]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
+; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
+; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x half> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
+; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
+; IC4VF4: [[PRED_LOAD_IF33]]:
+; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
+; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
+; IC4VF4-NEXT: [[TMP67:%.*]] = load half, ptr [[TMP66]], align 1
+; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x half> [[TMP63]], half [[TMP67]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
+; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
+; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x half> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
+; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
+; IC4VF4: [[PRED_LOAD_IF35]]:
+; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
+; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
+; IC4VF4-NEXT: [[TMP73:%.*]] = load half, ptr [[TMP72]], align 1
+; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x half> [[TMP69]], half [[TMP73]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
+; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
+; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x half> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
+; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
+; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
+; IC4VF4: [[PRED_LOAD_IF37]]:
+; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
+; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
+; IC4VF4-NEXT: [[TMP79:%.*]] = load half, ptr [[TMP78]], align 1
+; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x half> poison, half [[TMP79]], i32 0
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
+; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
+; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x half> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
+; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
+; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
+; IC4VF4: [[PRED_LOAD_IF39]]:
+; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
+; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
+; IC4VF4-NEXT: [[TMP85:%.*]] = load half, ptr [[TMP84]], align 1
+; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x half> [[TMP81]], half [[TMP85]], i32 1
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
+; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
+; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x half> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
+; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
+; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
+; IC4VF4: [[PRED_LOAD_IF41]]:
+; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
+; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP89]]
+; IC4VF4-NEXT: [[TMP91:%.*]] = load half, ptr [[TMP90]], align 1
+; IC4VF4-NEXT: [[TMP92:%.*]] = insertelement <4 x half> [[TMP87]], half [[TMP91]], i32 2
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE42]]
+; IC4VF4: [[PRED_LOAD_CONTINUE42]]:
+; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x half> [ [[TMP87]], %[[PRED_LOAD_CONTINUE40]] ], [ [[TMP92]], %[[PRED_LOAD_IF41]] ]
+; IC4VF4-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
+; IC4VF4-NEXT: br i1 [[TMP94]], label %[[PRED_LOAD_IF43:.*]], label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4: [[PRED_LOAD_IF43]]:
+; IC4VF4-NEXT: [[TMP95:%.*]] = add i16 [[OFFSET_IDX]], -15
+; IC4VF4-NEXT: [[TMP96:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP95]]
+; IC4VF4-NEXT: [[TMP97:%.*]] = load half, ptr [[TMP96]], align 1
+; IC4VF4-NEXT: [[TMP98:%.*]] = insertelement <4 x half> [[TMP93]], half [[TMP97]], i32 3
+; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE44]]
+; IC4VF4: [[PRED_LOAD_CONTINUE44]]:
+; IC4VF4-NEXT: [[TMP99:%.*]] = phi <4 x half> [ [[TMP93]], %[[PRED_LOAD_CONTINUE42]] ], [ [[TMP98]], %[[PRED_LOAD_IF43]] ]
+; IC4VF4-NEXT: [[TMP100:%.*]] = fcmp ugt <4 x half> [[TMP27]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP101:%.*]] = fcmp ugt <4 x half> [[TMP51]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP102:%.*]] = fcmp ugt <4 x half> [[TMP75]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP103:%.*]] = fcmp ugt <4 x half> [[TMP99]], [[BROADCAST_SPLAT]]
+; IC4VF4-NEXT: [[TMP104:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP105:%.*]] = add nsw <4 x i16> [[STEP_ADD]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP106:%.*]] = add nsw <4 x i16> [[STEP_ADD_2]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP107:%.*]] = add nsw <4 x i16> [[STEP_ADD_3]], splat (i16 -1)
+; IC4VF4-NEXT: [[TMP108]] = select <4 x i1> [[TMP100]], <4 x i16> [[TMP104]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT: [[TMP109]] = select <4 x i1> [[TMP101]], <4 x i16> [[TMP105]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT: [[TMP110]] = select <4 x i1> [[TMP102]], <4 x i16> [[TMP106]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT: [[TMP111]] = select <4 x i1> [[TMP103]], <4 x i16> [[TMP107]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[TMP112:%.*]] = select <4 x i1> [[TMP0]], <4 x i16> [[TMP108]], <4 x i16> [[VEC_PHI]]
+; IC4VF4-NEXT: [[TMP113:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP109]], <4 x i16> [[VEC_PHI1]]
+; IC4VF4-NEXT: [[TMP114:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> [[TMP110]], <4 x i16> [[VEC_PHI2]]
+; IC4VF4-NEXT: [[TMP115:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP111]], <4 x i16> [[VEC_PHI3]]
+; IC4VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; IC4VF4-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD_3]], splat (i16 -4)
+; IC4VF4-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC4VF4: [[MIDDLE_BLOCK]]:
+; IC4VF4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP112]], <4 x i16> [[TMP113]])
+; IC4VF4-NEXT: [[RDX_MINMAX45:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX]], <4 x i16> [[TMP114]])
+; IC4VF4-NEXT: [[RDX_MINMAX46:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[RDX_MINMAX45]], <4 x i16> [[TMP115]])
+; IC4VF4-NEXT: [[TMP116:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[RDX_MINMAX46]])
+; IC4VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP116]], 32767
+; IC4VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP116]], i16 0
+; IC4VF4-NEXT: br label %[[EXIT:.*]]
+; IC4VF4: [[SCALAR_PH]]:
+; IC4VF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 12, %[[ENTRY]] ]
+; IC4VF4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, %[[ENTRY]] ]
+; IC4VF4-NEXT: br label %[[LOOP:.*]]
+; IC4VF4: [[LOOP]]:
+; IC4VF4-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF4-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF4-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
+; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC4VF4: [[EXIT]]:
+; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF4-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i16 @select_decreasing_induction_icmp_table_half(
+; IC4VF1-SAME: half noundef [[VAL:%.*]]) {
+; IC4VF1-NEXT: [[ENTRY:.*]]:
+; IC4VF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; IC4VF1: [[VECTOR_PH]]:
+; IC4VF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; IC4VF1: [[VECTOR_BODY]]:
+; IC4VF1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI1:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI2:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[VEC_PHI3:%.*]] = phi i16 [ 32767, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
+; IC4VF1-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
+; IC4VF1-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
+; IC4VF1-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], -2
+; IC4VF1-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], -3
+; IC4VF1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
+; IC4VF1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP0]]
+; IC4VF1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP1]]
+; IC4VF1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP2]]
+; IC4VF1-NEXT: [[TMP7:%.*]] = load half, ptr [[TMP3]], align 1
+; IC4VF1-NEXT: [[TMP8:%.*]] = load half, ptr [[TMP4]], align 1
+; IC4VF1-NEXT: [[TMP9:%.*]] = load half, ptr [[TMP5]], align 1
+; IC4VF1-NEXT: [[TMP10:%.*]] = load half, ptr [[TMP6]], align 1
+; IC4VF1-NEXT: [[TMP11:%.*]] = fcmp ugt half [[TMP7]], [[VAL]]
+; IC4VF1-NEXT: [[TMP12:%.*]] = fcmp ugt half [[TMP8]], [[VAL]]
+; IC4VF1-NEXT: [[TMP13:%.*]] = fcmp ugt half [[TMP9]], [[VAL]]
+; IC4VF1-NEXT: [[TMP14:%.*]] = fcmp ugt half [[TMP10]], [[VAL]]
+; IC4VF1-NEXT: [[TMP15:%.*]] = add nsw i16 [[OFFSET_IDX]], -1
+; IC4VF1-NEXT: [[TMP16:%.*]] = add nsw i16 [[TMP0]], -1
+; IC4VF1-NEXT: [[TMP17:%.*]] = add nsw i16 [[TMP1]], -1
+; IC4VF1-NEXT: [[TMP18:%.*]] = add nsw i16 [[TMP2]], -1
+; IC4VF1-NEXT: [[TMP19]] = select i1 [[TMP11]], i16 [[TMP15]], i16 [[VEC_PHI]]
+; IC4VF1-NEXT: [[TMP20]] = select i1 [[TMP12]], i16 [[TMP16]], i16 [[VEC_PHI1]]
+; IC4VF1-NEXT: [[TMP21]] = select i1 [[TMP13]], i16 [[TMP17]], i16 [[VEC_PHI2]]
+; IC4VF1-NEXT: [[TMP22]] = select i1 [[TMP14]], i16 [[TMP18]], i16 [[VEC_PHI3]]
+; IC4VF1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; IC4VF1-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
+; IC4VF1-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC4VF1: [[MIDDLE_BLOCK]]:
+; IC4VF1-NEXT: [[RDX_MINMAX:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP19]], i16 [[TMP20]])
+; IC4VF1-NEXT: [[RDX_MINMAX4:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX]], i16 [[TMP21]])
+; IC4VF1-NEXT: [[RDX_MINMAX5:%.*]] = call i16 @llvm.smin.i16(i16 [[RDX_MINMAX4]], i16 [[TMP22]])
+; IC4VF1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[RDX_MINMAX5]], 32767
+; IC4VF1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[RDX_MINMAX5]], i16 0
+; IC4VF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; IC4VF1: [[SCALAR_PH]]:
+; IC4VF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; IC4VF1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i16 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; IC4VF1-NEXT: br label %[[LOOP:.*]]
+; IC4VF1: [[LOOP]]:
+; IC4VF1-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[RDX:%.*]] = phi i16 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[GEP_TABLE_IV:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[IV]]
+; IC4VF1-NEXT: [[LD_TABLE:%.*]] = load half, ptr [[GEP_TABLE_IV]], align 1
+; IC4VF1-NEXT: [[CMP_TABLE_VAL:%.*]] = fcmp ugt half [[LD_TABLE]], [[VAL]]
+; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], -1
+; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_TABLE_VAL]], i16 [[IV_NEXT]], i16 [[RDX]]
+; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i16 [[IV_NEXT]], 0
+; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; IC4VF1: [[EXIT]]:
+; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT]], %[[LOOP]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
+; IC4VF1-NEXT: ret i16 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %loop
@@ -141,6 +962,66 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
;
+; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
+; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
+; IC1VF4-NEXT: [[ENTRY:.*]]:
+; IC1VF4-NEXT: br label %[[LOOP:.*]]
+; IC1VF4: [[LOOP]]:
+; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
+; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
+; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
+; IC1VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
+; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
+; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
+; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
+; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; IC1VF4: [[EXIT]]:
+; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
+; IC1VF4-NEXT: ret i64 [[COND_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
+; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
+; IC4VF4-NEXT: [[ENTRY:.*]]:
+; IC4VF4-NEXT: br label %[[LOOP:.*]]
+; IC4VF4: [[LOOP]]:
+; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
+; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
+; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
+; IC4VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
+; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
+; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
+; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
+; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; IC4VF4: [[EXIT]]:
+; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
+; IC4VF4-NEXT: ret i64 [[COND_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
+; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
+; IC4VF1-NEXT: [[ENTRY:.*]]:
+; IC4VF1-NEXT: br label %[[LOOP:.*]]
+; IC4VF1: [[LOOP]]:
+; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
+; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
+; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
+; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
+; IC4VF1-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
+; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
+; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
+; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
+; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
+; IC4VF1: [[EXIT]]:
+; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
+; IC4VF1-NEXT: ret i64 [[COND_LCSSA]]
+;
entry:
br label %loop
@@ -164,23 +1045,59 @@ exit: ; preds = %loop
; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since
; the IV hits this value, it is impossible to vectorize this case.
define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) {
-; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; CHECK-SAME: ptr [[A:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; IC1VF4-SAME: ptr [[A:%.*]]) {
+; IC1VF4-NEXT: [[ENTRY:.*]]:
+; IC1VF4-NEXT: br label %[[LOOP:.*]]
+; IC1VF4: [[LOOP]]:
+; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; IC1VF4: [[EXIT]]:
+; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; IC4VF4-SAME: ptr [[A:%.*]]) {
+; IC4VF4-NEXT: [[ENTRY:.*]]:
+; IC4VF4-NEXT: br label %[[LOOP:.*]]
+; IC4VF4: [[LOOP]]:
+; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; IC4VF4: [[EXIT]]:
+; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+;
+; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; IC4VF1-SAME: ptr [[A:%.*]]) {
+; IC4VF1-NEXT: [[ENTRY:.*]]:
+; IC4VF1-NEXT: br label %[[LOOP:.*]]
+; IC4VF1: [[LOOP]]:
+; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; IC4VF1: [[EXIT]]:
+; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %loop
@@ -199,4 +1116,3 @@ loop: ; preds = %entry, %loop
exit: ; preds = %loop
ret i64 %spec.select
}
-
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 9428737814146..2e8109c18948e 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -238,7 +238,7 @@ define i64 @find_last_iv(ptr %a, i64 %n, i64 %start) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-last-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond>
+; CHECK-NEXT: EMIT vp<[[RDX_RES:%.+]]> = compute-find-iv-result ir<%rdx>, ir<%start>, ir<-9223372036854775808>, ir<%cond>
; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<%n>, vp<{{.+}}>
; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
>From 3d8d79aad7efb3c505cf19048efaeec3a276ed97 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 27 Jun 2025 13:02:43 +0100
Subject: [PATCH 2/3] !fixup address comments, thanks
---
llvm/include/llvm/Analysis/IVDescriptors.h | 2 +-
llvm/lib/Analysis/IVDescriptors.cpp | 6 +-
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 5 +-
.../LoopVectorize/iv-select-cmp-decreasing.ll | 136 +++---------------
4 files changed, 25 insertions(+), 124 deletions(-)
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 310d05b6f04af..3b92cbff28de4 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -294,7 +294,7 @@ class RecurrenceDescriptor {
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
- /// Returns the sentinel value for FindFirstIV &FindLastIV recurrences to
+ /// Returns the sentinel value for FindFirstIV & FindLastIV recurrences to
/// replace the start value.
Value *getSentinelValue() const {
Type *Ty = StartValue->getType();
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b49258e3b54ef..523f3694559e6 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -726,7 +726,6 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
// The maximum acceptable range for the increasing induction variable,
// called the valid range, will be defined as
- const ConstantRange IVRange = SE.getSignedRange(AR);
// Keep the minimum (FindLast) or maximum (FindFirst) value of the
// recurrence type as the sentinel value. The maximum acceptable range for
// the induction variable, called the valid range, will be defined as
@@ -746,9 +745,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
} else {
assert(isFindFirstIVRecurrenceKind(Kind) &&
- "Kind must either be a FindLastIV or FindFirstIV");
- assert(IsSigned &&
- "only FindFirstIV with SMax is supported at the moment");
+ "Kind must either be FindLastIV or FindFirstIV");
+ assert(IsSigned && "Only FindFirstIV with SMax is supported currently");
ValidRange =
ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
APInt::getSignedMaxValue(NumBits) - 1);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 73d82319e13ec..472b5700bd358 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -747,9 +747,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
MinMaxKind = IsSigned ? RecurKind::SMax : RecurKind::UMax;
} else {
assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK) &&
- "Kind must either be a FindLastIV or FindFirstIV");
- assert(IsSigned &&
- "only FindFirstIV with SMax is supported at the moment");
+ "Kind must either be FindLastIV or FindFirstIV");
+ assert(IsSigned && "Only FindFirstIV with SMax is currently supported");
MinMaxKind = RecurKind::SMin;
}
for (unsigned Part = 1; Part < UF; ++Part)
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index ee0a64397e2ac..d224da795997d 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC1VF4 %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefix=IC4VF4 %s
-; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefix=IC4VF1 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC1VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF4 %s
+; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck --check-prefixes=CHECK,IC4VF1 %s
define i64 @select_decreasing_induction_icmp_const_start(ptr %a) {
; IC1VF4-LABEL: define i64 @select_decreasing_induction_icmp_const_start(
@@ -962,66 +962,6 @@ define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr
; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
; CHECK-NEXT: ret i64 [[COND_LCSSA]]
;
-; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
-; IC1VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
-; IC1VF4-NEXT: [[ENTRY:.*]]:
-; IC1VF4-NEXT: br label %[[LOOP:.*]]
-; IC1VF4: [[LOOP]]:
-; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
-; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
-; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
-; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC1VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
-; IC1VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
-; IC1VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
-; IC1VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
-; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
-; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
-; IC1VF4: [[EXIT]]:
-; IC1VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
-; IC1VF4-NEXT: ret i64 [[COND_LCSSA]]
-;
-; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
-; IC4VF4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
-; IC4VF4-NEXT: [[ENTRY:.*]]:
-; IC4VF4-NEXT: br label %[[LOOP:.*]]
-; IC4VF4: [[LOOP]]:
-; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
-; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
-; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
-; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF4-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
-; IC4VF4-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
-; IC4VF4-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
-; IC4VF4-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
-; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
-; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
-; IC4VF4: [[EXIT]]:
-; IC4VF4-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
-; IC4VF4-NEXT: ret i64 [[COND_LCSSA]]
-;
-; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(
-; IC4VF1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[RDX_START:%.*]], i64 [[N:%.*]]) {
-; IC4VF1-NEXT: [[ENTRY:.*]]:
-; IC4VF1-NEXT: br label %[[LOOP:.*]]
-; IC4VF1: [[LOOP]]:
-; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[ENTRY]] ]
-; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ [[COND:%.*]], %[[LOOP]] ], [ [[RDX_START]], %[[ENTRY]] ]
-; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV_NEXT]]
-; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF1-NEXT: [[GEP_B_IV:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV_NEXT]]
-; IC4VF1-NEXT: [[LD_B:%.*]] = load i64, ptr [[GEP_B_IV]], align 8
-; IC4VF1-NEXT: [[CMP_A_B:%.*]] = icmp sgt i64 [[LD_A]], [[LD_B]]
-; IC4VF1-NEXT: [[COND]] = select i1 [[CMP_A_B]], i64 [[IV_NEXT]], i64 [[RDX]]
-; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp ugt i64 [[IV]], 1
-; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[LOOP]], label %[[EXIT:.*]]
-; IC4VF1: [[EXIT]]:
-; IC4VF1-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[LOOP]] ]
-; IC4VF1-NEXT: ret i64 [[COND_LCSSA]]
-;
entry:
br label %loop
@@ -1045,59 +985,23 @@ exit: ; preds = %loop
; The sentinel value for decreasing-IV vectorization is LONG_MAX, and since
; the IV hits this value, it is impossible to vectorize this case.
define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(ptr %a) {
-; IC1VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; IC1VF4-SAME: ptr [[A:%.*]]) {
-; IC1VF4-NEXT: [[ENTRY:.*]]:
-; IC1VF4-NEXT: br label %[[LOOP:.*]]
-; IC1VF4: [[LOOP]]:
-; IC1VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; IC1VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; IC1VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; IC1VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC1VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; IC1VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; IC1VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC1VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; IC1VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; IC1VF4: [[EXIT]]:
-; IC1VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; IC1VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
-;
-; IC4VF4-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; IC4VF4-SAME: ptr [[A:%.*]]) {
-; IC4VF4-NEXT: [[ENTRY:.*]]:
-; IC4VF4-NEXT: br label %[[LOOP:.*]]
-; IC4VF4: [[LOOP]]:
-; IC4VF4-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; IC4VF4-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; IC4VF4-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; IC4VF4-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF4-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; IC4VF4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; IC4VF4-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF4-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; IC4VF4-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; IC4VF4: [[EXIT]]:
-; IC4VF4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; IC4VF4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
-;
-; IC4VF1-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
-; IC4VF1-SAME: ptr [[A:%.*]]) {
-; IC4VF1-NEXT: [[ENTRY:.*]]:
-; IC4VF1-NEXT: br label %[[LOOP:.*]]
-; IC4VF1: [[LOOP]]:
-; IC4VF1-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; IC4VF1-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
-; IC4VF1-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
-; IC4VF1-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
-; IC4VF1-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
-; IC4VF1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
-; IC4VF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
-; IC4VF1-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
-; IC4VF1-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
-; IC4VF1: [[EXIT]]:
-; IC4VF1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
-; IC4VF1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
+; CHECK-LABEL: define i64 @not_vectorized_select_decreasing_induction_icmp_iv_out_of_bound(
+; CHECK-SAME: ptr [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 9223372036854775807, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi i64 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A_IV:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[LD_A:%.*]] = load i64, ptr [[GEP_A_IV]], align 8
+; CHECK-NEXT: [[CMP_A_3:%.*]] = icmp sgt i64 [[LD_A]], 3
+; CHECK-NEXT: [[SPEC_SELECT]] = select i1 [[CMP_A_3]], i64 [[IV]], i64 [[RDX]]
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV]], 0
+; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], %[[LOOP]] ]
+; CHECK-NEXT: ret i64 [[SPEC_SELECT_LCSSA]]
;
entry:
br label %loop
>From a87ebd6e292976eb21c9389d15e059a82ba55734 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 28 Jun 2025 22:38:15 +0100
Subject: [PATCH 3/3] !fixup address latest comments, thanks!
---
llvm/lib/Analysis/IVDescriptors.cpp | 9 ++-------
llvm/lib/Transforms/Utils/LoopUtils.cpp | 13 ++++++-------
2 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 523f3694559e6..b275b1064cef2 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -715,11 +715,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
return std::nullopt;
const SCEV *Step = AR->getStepRecurrence(SE);
-
- if (isFindFirstIVRecurrenceKind(Kind)) {
- if (!SE.isKnownNegative(Step))
- return std::nullopt;
- } else if (!SE.isKnownPositive(Step))
+ if ((isFindFirstIVRecurrenceKind(Kind) && !SE.isKnownNegative(Step)) ||
+ (isFindLastIVRecurrenceKind(Kind) && !SE.isKnownPositive(Step)))
return std::nullopt;
// Keep the minimum value of the recurrence type as the sentinel value.
@@ -744,8 +741,6 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
: APInt::getMinValue(NumBits);
ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
} else {
- assert(isFindFirstIVRecurrenceKind(Kind) &&
- "Kind must either be FindLastIV or FindFirstIV");
assert(IsSigned && "Only FindFirstIV with SMax is supported currently");
ValidRange =
ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index ac27ccf409d6b..e44fa6af29ffb 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1227,12 +1227,11 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
RecurKind RdxKind, Value *Start,
Value *Sentinel) {
bool IsSigned = RecurrenceDescriptor::isSignedRecurrenceKind(RdxKind);
- Value *MaxRdx =
- Src->getType()->isVectorTy()
- ? (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind)
- ? Builder.CreateIntMaxReduce(Src, IsSigned)
- : Builder.CreateIntMinReduce(Src, IsSigned))
- : Src;
+ bool IsMaxRdx = RecurrenceDescriptor::isFindLastIVRecurrenceKind(RdxKind);
+ Value *MaxRdx = Src->getType()->isVectorTy()
+ ? (IsMaxRdx ? Builder.CreateIntMaxReduce(Src, IsSigned)
+ : Builder.CreateIntMinReduce(Src, IsSigned))
+ : Src;
// Correct the final reduction result back to the start value if the maximum
// reduction is sentinel value.
Value *Cmp =
@@ -1328,7 +1327,7 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
RecurKind Kind, Value *Mask, Value *EVL) {
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
- "AnyOf, FindFirstIV and FindLastIV reductions are not supported.");
+ "AnyOf and FindIV reductions are not supported.");
Intrinsic::ID Id = getReductionIntrinsicID(Kind);
auto VPID = VPIntrinsic::getForIntrinsic(Id);
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
More information about the llvm-commits
mailing list