[llvm] 685f1bf - Revert "[LoopVectorize] Permit vectorisation of more select(cmp(), X, Y) reduction patterns"
Krasimir Georgiev via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 1 02:40:09 PDT 2021
Author: Krasimir Georgiev
Date: 2021-10-01T11:39:43+02:00
New Revision: 685f1bfd0af3d5ee6023056d2f656f73990ef2c9
URL: https://github.com/llvm/llvm-project/commit/685f1bfd0af3d5ee6023056d2f656f73990ef2c9
DIFF: https://github.com/llvm/llvm-project/commit/685f1bfd0af3d5ee6023056d2f656f73990ef2c9.diff
LOG: Revert "[LoopVectorize] Permit vectorisation of more select(cmp(), X, Y) reduction patterns"
It appears to cause stage2 clang build failures, e.g.,
https://lab.llvm.org/buildbot/#/builders/74/builds/7145.
This reverts commit 1fb37334bdb3cdb028977382fbd84cebde64ebb2.
Added:
Modified:
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/include/llvm/Transforms/Utils/LoopUtils.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
Removed:
llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
llvm/test/Transforms/LoopVectorize/select-cmp.ll
################################################################################
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 50ee4273f70bb..59ad0a335a3a0 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -36,24 +36,20 @@ class DominatorTree;
/// These are the kinds of recurrences that we support.
enum class RecurKind {
- None, ///< Not a recurrence.
- Add, ///< Sum of integers.
- Mul, ///< Product of integers.
- Or, ///< Bitwise or logical OR of integers.
- And, ///< Bitwise or logical AND of integers.
- Xor, ///< Bitwise or logical XOR of integers.
- SMin, ///< Signed integer min implemented in terms of select(cmp()).
- SMax, ///< Signed integer max implemented in terms of select(cmp()).
- UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
- UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
- FAdd, ///< Sum of floats.
- FMul, ///< Product of floats.
- FMin, ///< FP min implemented in terms of select(cmp()).
- FMax, ///< FP max implemented in terms of select(cmp()).
- SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
- ///< invariant
- SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
- ///< invariant
+ None, ///< Not a recurrence.
+ Add, ///< Sum of integers.
+ Mul, ///< Product of integers.
+ Or, ///< Bitwise or logical OR of integers.
+ And, ///< Bitwise or logical AND of integers.
+ Xor, ///< Bitwise or logical XOR of integers.
+ SMin, ///< Signed integer min implemented in terms of select(cmp()).
+ SMax, ///< Signed integer max implemented in terms of select(cmp()).
+ UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
+ UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
+ FAdd, ///< Sum of floats.
+ FMul, ///< Product of floats.
+ FMin, ///< FP min implemented in terms of select(cmp()).
+ FMax ///< FP max implemented in terms of select(cmp()).
};
/// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -120,7 +116,7 @@ class RecurrenceDescriptor {
/// select(icmp()) this function advances the instruction pointer 'I' from the
/// compare instruction to the select instruction and stores this pointer in
/// 'PatternLastInst' member of the returned struct.
- static InstDesc isRecurrenceInstr(Loop *L, Instruction *I, RecurKind Kind,
+ static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
InstDesc &Prev, FastMathFlags FuncFMF);
/// Returns true if instruction I has multiple uses in Insts
@@ -139,21 +135,13 @@ class RecurrenceDescriptor {
static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind,
const InstDesc &Prev);
- /// Returns a struct describing whether the instruction is either a
- /// Select(ICmp(A, B), X, Y), or
- /// Select(FCmp(A, B), X, Y)
- /// where one of (X, Y) is a loop invariant integer and the other is a PHI
- /// value. \p Prev specifies the description of an already processed select
- /// instruction, so its corresponding cmp can be matched to it.
- static InstDesc isSelectCmpPattern(Loop *Loop, Instruction *I,
- InstDesc &Prev);
-
/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
/// Returns identity corresponding to the RecurrenceKind.
- Value *getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF);
+ static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
+ FastMathFlags FMF);
/// Returns the opcode corresponding to the RecurrenceKind.
static unsigned getOpcode(RecurKind Kind);
@@ -233,12 +221,6 @@ class RecurrenceDescriptor {
return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind);
}
- /// Returns true if the recurrence kind is of the form
- /// select(cmp(),x,y) where one of (x,y) is loop invariant.
- static bool isSelectCmpRecurrenceKind(RecurKind Kind) {
- return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp;
- }
-
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index aa450d5a8f16e..a425aa2a9fba1 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -348,15 +348,6 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
SinkAndHoistLICMFlags *LICMFlags = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
-/// See RecurrenceDescriptor::isSelectCmpPattern for a description of the
-/// pattern we are trying to match. In this pattern we are only ever selecting
-/// between two values: 1) an initial PHI start value, and 2) a loop invariant
-/// value. This function uses \p LoopExitInst to determine 2), which we then use
-/// to select between \p Left and \p Right. Any lane value in \p Left that
-/// matches 2) will be merged into \p Right.
-Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
- Value *Left, Value *Right);
-
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
/// The Builder's fast-math-flags must be set to propagate the expected values.
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
@@ -384,22 +375,12 @@ Value *createSimpleTargetReduction(IRBuilderBase &B,
RecurKind RdxKind,
ArrayRef<Value *> RedOps = None);
-/// Create a target reduction of the given vector \p Src for a reduction of the
-/// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation
-/// is described by \p Desc.
-Value *createSelectCmpTargetReduction(IRBuilderBase &B,
- const TargetTransformInfo *TTI,
- Value *Src,
- const RecurrenceDescriptor &Desc,
- PHINode *OrigPhi);
-
/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
- const RecurrenceDescriptor &Desc, Value *Src,
- PHINode *OrigPhi = nullptr);
+ const RecurrenceDescriptor &Desc, Value *Src);
/// Create an ordered reduction intrinsic using the given recurrence
/// descriptor \p Desc.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 03d0ec87d11f0..c04083c2a6101 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -62,8 +62,6 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
return true;
}
return false;
@@ -329,7 +327,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).
if (Cur != Start) {
- ReduxDesc = isRecurrenceInstr(TheLoop, Cur, Kind, ReduxDesc, FuncFMF);
+ ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, FuncFMF);
if (!ReduxDesc.isRecurrence())
return false;
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -362,7 +360,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
- !isSelectCmpRecurrenceKind(Kind) &&
hasMultipleUsesOf(Cur, VisitedInsts, 1))
return false;
@@ -370,10 +367,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
- if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
+ if (isIntMinMaxRecurrenceKind(Kind) &&
(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
- if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
+ if (isFPMinMaxRecurrenceKind(Kind) &&
(isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
@@ -426,8 +423,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
!isa<SelectInst>(UI)) ||
(!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
- !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence() &&
- !isSelectCmpPattern(TheLoop, UI, IgnoredVal)
+ !isMinMaxPattern(UI, Kind, IgnoredVal)
.isRecurrence())))
return false;
@@ -446,9 +442,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
NumCmpSelectPatternInst != 0)
return false;
- if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
- return false;
-
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
@@ -515,63 +508,6 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
return true;
}
-// We are looking for loops that do something like this:
-// int r = 0;
-// for (int i = 0; i < n; i++) {
-// if (src[i] > 3)
-// r = 3;
-// }
-// where the reduction value (r) only has two states, in this example 0 or 3.
-// The generated LLVM IR for this type of loop will be like this:
-// for.body:
-// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
-// ...
-// %cmp = icmp sgt i32 %5, 3
-// %spec.select = select i1 %cmp, i32 3, i32 %r
-// ...
-// In general we can support vectorization of loops where 'r' flips between
-// any two non-constants, provided they are loop invariant. The only thing
-// we actually care about at the end of the loop is whether or not any lane
-// in the selected vector is
diff erent from the start value. The final
-// across-vector reduction after the loop simply involves choosing the start
-// value if nothing changed (0 in the example above) or the other selected
-// value (3 in the example above).
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, Instruction *I,
- InstDesc &Prev) {
- // We must handle the select(cmp(),x,y) as a single instruction. Advance to
- // the select.
- CmpInst::Predicate Pred;
- if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
- if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
- return InstDesc(Select, Prev.getRecKind());
- }
-
- // Only match select with single use cmp condition.
- if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
- m_Value())))
- return InstDesc(false, I);
-
- SelectInst *SI = cast<SelectInst>(I);
- Value *NonPhi = nullptr;
-
- if (isa<PHINode>(SI->getTrueValue()))
- NonPhi = SI->getFalseValue();
- else if (isa<PHINode>(SI->getFalseValue()))
- NonPhi = SI->getTrueValue();
- else
- return InstDesc(false, I);
-
- // We are looking for selects of the form:
- // select(cmp(), phi, loop_invariant) or
- // select(cmp(), loop_invariant, phi)
- if (!Loop->isLoopInvariant(NonPhi))
- return InstDesc(false, I);
-
- return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
- : RecurKind::SelectFCmp);
-}
-
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
const InstDesc &Prev) {
@@ -666,7 +602,7 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
}
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Loop *L, Instruction *I, RecurKind Kind,
+RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
InstDesc &Prev, FastMathFlags FuncFMF) {
assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
switch (I->getOpcode()) {
@@ -700,8 +636,6 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, Instruction *I, RecurKind Kind,
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Call:
- if (isSelectCmpRecurrenceKind(Kind))
- return isSelectCmpPattern(L, I, Prev);
if (isIntMinMaxRecurrenceKind(Kind) ||
(((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
(isa<FPMathOperator>(I) && I->hasNoNaNs() &&
@@ -730,6 +664,7 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes,
DemandedBits *DB, AssumptionCache *AC,
DominatorTree *DT) {
+
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
FastMathFlags FMF;
@@ -774,12 +709,6 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
- DT)) {
- LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
- << *Phi << "\n");
- return true;
- }
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
@@ -796,12 +725,6 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
- DT)) {
- LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
- << " PHI." << *Phi << "\n");
- return true;
- }
// Not a reduction of known type.
return false;
}
@@ -908,8 +831,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
/// This function returns the identity element (or neutral element) for
/// the operation K.
-Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
- FastMathFlags FMF) {
+Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
+ FastMathFlags FMF) {
switch (K) {
case RecurKind::Xor:
case RecurKind::Add:
@@ -949,9 +872,6 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
return ConstantFP::getInfinity(Tp, true);
case RecurKind::FMax:
return ConstantFP::getInfinity(Tp, false);
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
- return getRecurrenceStartValue();
default:
llvm_unreachable("Unknown recurrence kind");
}
@@ -977,11 +897,9 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
- case RecurKind::SelectICmp:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
- case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown recurrence operation");
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b6a8e0fb64e15..1f0494621b691 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1949,8 +1949,6 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::UMax:
case RecurKind::FMin:
case RecurKind::FMax:
- case RecurKind::SelectICmp:
- case RecurKind::SelectFCmp:
return true;
default:
return false;
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 1ca188847b826..b8f8ad579d58f 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -889,15 +889,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
- RecurKind RK, Value *Left, Value *Right) {
- if (auto VTy = dyn_cast<VectorType>(Left->getType()))
- StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
- Value *Cmp =
- Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
- return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
-}
-
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
CmpInst::Predicate Pred;
@@ -1001,46 +992,6 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
-Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
- const TargetTransformInfo *TTI,
- Value *Src,
- const RecurrenceDescriptor &Desc,
- PHINode *OrigPhi) {
- assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
- Desc.getRecurrenceKind()) &&
- "Unexpected reduction kind");
- Value *InitVal = Desc.getRecurrenceStartValue();
- Value *NewVal = nullptr;
-
- // First use the original phi to determine the new value we're trying to
- // select from in the loop.
- SelectInst *SI = nullptr;
- for (auto *U : OrigPhi->users()) {
- if ((SI = dyn_cast<SelectInst>(U)))
- break;
- }
- assert(SI && "One user of the original phi should be a select");
-
- if (SI->getTrueValue() == OrigPhi)
- NewVal = SI->getFalseValue();
- else {
- assert(SI->getFalseValue() == OrigPhi &&
- "At least one input to the select should be the original Phi");
- NewVal = SI->getTrueValue();
- }
-
- // Create a splat vector with the new value and compare this to the vector
- // we want to reduce.
- ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
- Value *Right = Builder.CreateVectorSplat(EC, InitVal);
- Value *Cmp =
- Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
-
- // If any predicate is true it means that we want to select the new value.
- Cmp = Builder.CreateOrReduce(Cmp);
- return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
-}
-
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
const TargetTransformInfo *TTI,
Value *Src, RecurKind RdxKind,
@@ -1081,19 +1032,14 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- const RecurrenceDescriptor &Desc, Value *Src,
- PHINode *OrigPhi) {
+ const RecurrenceDescriptor &Desc,
+ Value *Src) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
IRBuilderBase::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(Desc.getFastMathFlags());
-
- RecurKind RK = Desc.getRecurrenceKind();
- if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
- return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
-
- return createSimpleTargetReduction(B, TTI, Src, RK);
+ return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
}
Value *llvm::createOrderedReduction(IRBuilderBase &B,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6da10e1179888..6d7901c7d43b2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4414,11 +4414,9 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
- ReducedPartRdx = createSelectCmpOp(Builder, ReductionStartValue, RK,
- ReducedPartRdx, RdxPart);
- else
+ } else {
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
+ }
}
}
@@ -4426,7 +4424,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// target reduction in the loop using a Reduction recipe.
if (VF.isVector() && !PhiR->isInLoop()) {
ReducedPartRdx =
- createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi);
+ createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (PhiTy != RdxDesc.getRecurrenceType())
@@ -6520,22 +6518,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
unsigned StoresIC = IC / (NumStores ? NumStores : 1);
unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
- // There is little point in interleaving for reductions containing selects
- // and compares when VF=1 since it may just create more overhead than it's
- // worth for loops with small trip counts. This is because we still have to
- // do the final reduction after the loop.
- bool HasSelectCmpReductions =
- HasReductions &&
- any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
- const RecurrenceDescriptor &RdxDesc = Reduction.second;
- return RecurrenceDescriptor::isSelectCmpRecurrenceKind(
- RdxDesc.getRecurrenceKind());
- });
- if (HasSelectCmpReductions) {
- LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
- return 1;
- }
-
// If we have a scalar reduction (vector reductions are already dealt with
// by this point), we can increase the critical path length if the loop
// we're interleaving is inside another loop. For tree-wise reductions
@@ -9261,8 +9243,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RecipeBuilder.recordRecipeOf(R);
// For min/max reducitons, where we have a pair of icmp/select, we also
// need to record the ICmp recipe, so it can be removed later.
- assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
- "Only min/max recurrences allowed for inloop reductions");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
RecipeBuilder.recordRecipeOf(cast<Instruction>(R->getOperand(0)));
}
@@ -9586,8 +9566,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
VPValue *ChainOp = Plan->getVPValue(Chain);
unsigned FirstOpId;
- assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
- "Only min/max recurrences allowed for inloop reductions");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
"Expected to replace a VPWidenSelectSC");
@@ -9760,10 +9738,10 @@ void VPReductionRecipe::execute(VPTransformState &State) {
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, Part);
VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
- Value *Iden = RdxDesc->getRecurrenceIdentity(
+ Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags());
- Value *IdenVec =
- State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
+ Constant *IdenVec =
+ ConstantVector::getSplat(VecTy->getElementCount(), Iden);
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
NewVecOp = Select;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d5bb948e5f304..fa71b1c4528bf 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1336,8 +1336,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
- RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) {
// MinMax reduction have the start value as their identify.
if (ScalarPHI) {
Iden = StartV;
@@ -1348,11 +1347,12 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
} else {
- Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
- RdxDesc.getFastMathFlags());
+ Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity(
+ RK, VecTy->getScalarType(), RdxDesc.getFastMathFlags());
+ Iden = IdenC;
if (!ScalarPHI) {
- Iden = Builder.CreateVectorSplat(State.VF, Iden);
+ Iden = ConstantVector::getSplat(State.VF, IdenC);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
Constant *Zero = Builder.getInt32(0);
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
deleted file mode 100644
index a8285cfa08768..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll
+++ /dev/null
@@ -1,204 +0,0 @@
-; RUN: opt -loop-vectorize -scalable-vectorization=preferred -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
-; RUN: opt -loop-vectorize -scalable-vectorization=preferred -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
-
-target triple = "aarch64-linux-gnu"
-
-define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
-; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
-
-; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp
-; CHECK-VF4IC4: vector.body:
-; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ]
-; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ]
-; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ]
-; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ]
-; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL1]] = select <vscale x 4 x i1> [[VEC_ICMP1]], <vscale x 4 x i32> [[VEC_PHI1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL2]] = select <vscale x 4 x i1> [[VEC_ICMP2]], <vscale x 4 x i32> [[VEC_PHI2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL3]] = select <vscale x 4 x i1> [[VEC_ICMP3]], <vscale x 4 x i32> [[VEC_PHI3]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL4]] = select <vscale x 4 x i1> [[VEC_ICMP4]], <vscale x 4 x i32> [[VEC_PHI4]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4: middle.block:
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP5]], <vscale x 4 x i32> [[VEC_SEL1]], <vscale x 4 x i32> [[VEC_SEL2]]
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP6]], <vscale x 4 x i32> [[VEC_SEL5]], <vscale x 4 x i32> [[VEC_SEL3]]
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP7]], <vscale x 4 x i32> [[VEC_SEL6]], <vscale x 4 x i32> [[VEC_SEL4]]
-; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select i1 %4, i32 %1, i32 7
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body, !llvm.loop !0
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 {
-; CHECK-VF4IC1-LABEL: @select_i32_from_icmp
-; CHECK-VF4IC1: vector.ph:
-; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
-; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %b, i32 0
-; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[SPLAT_OF_B]]
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
-; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[FIN_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], [[FIN_SPLAT]]
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_CMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a
-
-; CHECK-VF4IC4-LABEL: @select_i32_from_icmp
-; CHECK-VF4IC4: vector.body:
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ %a, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select i1 %4, i32 %1, i32 %b
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body, !llvm.loop !0
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) #0 {
-; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x float>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2
-
-; CHECK-VF4IC4-LABEL: @select_const_i32_from_fcmp
-; CHECK-VF4IC4: vector.body:
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds float, float* %v, i64 %0
- %3 = load float, float* %2, align 4
- %4 = fcmp fast ueq float %3, 3.0
- %5 = select i1 %4, i32 %1, i32 1
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body, !llvm.loop !0
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) #0 {
-; CHECK-VF4IC1-LABEL: @select_const_f32_from_icmp
-; CHECK-VF4IC1-NOT: vector.body
-; CHECK-VF4IC4-LABEL: @select_const_f32_from_icmp
-; CHECK-VF4IC4-NOT: vector.body
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select fast i1 %4, float %1, float 7.0
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body, !llvm.loop !0
-
-exit: ; preds = %for.body
- ret float %5
-}
-
-define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) #0 {
-; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32>
-; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison)
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]]
-; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]]
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 0, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0
-
-; CHECK-VF4IC4-LABEL: @pred_select_const_i32_from_icmp
-; CHECK-VF4IC4: vector.body:
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.inc
- %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
- %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013
- %0 = load i32, i32* %arrayidx, align 4
- %cmp1 = icmp sgt i32 %0, 35
- br i1 %cmp1, label %if.then, label %for.inc
-
-if.then: ; preds = %for.body
- %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013
- %1 = load i32, i32* %arrayidx2, align 4
- %cmp3 = icmp eq i32 %1, 2
- %spec.select = select i1 %cmp3, i32 1, i32 %r.012
- br label %for.inc
-
-for.inc: ; preds = %if.then, %for.body
- %r.1 = phi i32 [ %r.012, %for.body ], [ %spec.select, %if.then ]
- %inc = add nuw nsw i64 %i.013, 1
- %exitcond.not = icmp eq i64 %inc, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body, !llvm.loop !0
-
-for.end.loopexit: ; preds = %for.inc
- %r.1.lcssa = phi i32 [ %r.1, %for.inc ]
- ret i32 %r.1.lcssa
-}
-
-
-attributes #0 = { "target-features"="+sve" }
-
-!0 = distinct !{!0, !1}
-!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
index 9fcebe076eb0b..9d0aa5ef082bc 100644
--- a/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll
@@ -7,8 +7,8 @@ define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
; CHECK-LABEL: @reduction_add_trunc(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 0, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer), i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
-; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 0, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer), %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ insertelement (<vscale x 8 x i32> zeroinitializer, i32 255, i32 0), %vector.ph ], [ [[TMP34:%.*]], %vector.body ]
+; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP36:%.*]], %vector.body ]
; CHECK: [[TMP14:%.*]] = and <vscale x 8 x i32> [[VEC_PHI]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK-NEXT: [[TMP15:%.*]] = and <vscale x 8 x i32> [[VEC_PHI1]], shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 255, i32 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, <vscale x 8 x i8>*
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
deleted file mode 100644
index 4e64e94459a1d..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=CHECK-VF2IC1
-; RUN: opt -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC2
-
-define i32 @pred_select_const_i32_from_icmp(i32* noalias nocapture readonly %src1, i32* noalias nocapture readonly %src2, i64 %n) {
-; CHECK-VF2IC1-LABEL: @pred_select_const_i32_from_icmp(
-; CHECK-VF2IC1: vector.body:
-; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ]
-; CHECK-VF2IC1: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{%.*}}, align 4
-; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 35, i32 35>
-; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
-; CHECK-VF2IC1-NEXT: br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue
-; CHECK-VF2IC1: pred.load.if:
-; CHECK-VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC2:%.*]], i64 {{%.*}}
-; CHECK-VF2IC1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
-; CHECK-VF2IC1-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
-; CHECK-VF2IC1-NEXT: br label %pred.load.continue
-; CHECK-VF2IC1: pred.load.continue:
-; CHECK-VF2IC1-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %vector.body ], [ [[TMP8]], %pred.load.if ]
-; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
-; CHECK-VF2IC1-NEXT: br i1 [[TMP10]], label %pred.load.if1, label %pred.load.continue2
-; CHECK-VF2IC1: pred.load.if1:
-; CHECK-VF2IC1: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[SRC2]], i64 {{%.*}}
-; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
-; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
-; CHECK-VF2IC1-NEXT: br label %pred.load.continue2
-; CHECK-VF2IC1: pred.load.continue2:
-; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %pred.load.continue ], [ [[TMP14]], %pred.load.if1 ]
-; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], <i32 2, i32 2>
-; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i32> <i32 1, i32 1>, <2 x i32> [[VEC_PHI]]
-; CHECK-VF2IC1-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
-; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP17]], <2 x i32> [[VEC_PHI]]
-; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body
-; CHECK-VF2IC1: middle.block:
-; CHECK-VF2IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i32> [[PREDPHI]], zeroinitializer
-; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]])
-; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 1, i32 0
-; CHECK-VF2IC1: scalar.ph:
-; CHECK-VF2IC1: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ]
-; CHECK-VF2IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ]
-; CHECK-VF2IC1-NEXT: br label %for.body
-; CHECK-VF2IC1: for.body:
-; CHECK-VF2IC1: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ]
-; CHECK-VF2IC1: [[TMP21:%.*]] = load i32, i32* {{%.*}}, align 4
-; CHECK-VF2IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP21]], 35
-; CHECK-VF2IC1-NEXT: br i1 [[CMP1]], label %if.then, label %for.inc
-; CHECK-VF2IC1: if.then:
-; CHECK-VF2IC1: [[TMP22:%.*]] = load i32, i32* {{%.*}}, align 4
-; CHECK-VF2IC1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP22]], 2
-; CHECK-VF2IC1-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
-; CHECK-VF2IC1-NEXT: br label %for.inc
-; CHECK-VF2IC1: for.inc:
-; CHECK-VF2IC1-NEXT: [[R_1]] = phi i32 [ [[R_012]], %for.body ], [ [[SPEC_SELECT]], %if.then ]
-; CHECK-VF2IC1: for.end.loopexit:
-; CHECK-VF2IC1-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %for.inc ], [ [[RDX_SELECT]], %middle.block ]
-; CHECK-VF2IC1-NEXT: ret i32 [[R_1_LCSSA]]
-;
-; CHECK-VF1IC2-LABEL: @pred_select_const_i32_from_icmp(
-; CHECK-VF1IC2: vector.body:
-; CHECK-VF1IC2: [[VEC_PHI:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue4 ]
-; CHECK-VF1IC2-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue4 ]
-; CHECK-VF1IC2: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[SRC1:%.*]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[SRC1]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-VF1IC2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
-; CHECK-VF1IC2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 35
-; CHECK-VF1IC2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 35
-; CHECK-VF1IC2-NEXT: br i1 [[TMP4]], label %pred.load.if, label %pred.load.continue
-; CHECK-VF1IC2: pred.load.if:
-; CHECK-VF1IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC2:%.*]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
-; CHECK-VF1IC2-NEXT: br label %pred.load.continue
-; CHECK-VF1IC2: pred.load.continue:
-; CHECK-VF1IC2-NEXT: [[TMP8:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP7]], %pred.load.if ]
-; CHECK-VF1IC2-NEXT: br i1 [[TMP5]], label %pred.load.if3, label %pred.load.continue4
-; CHECK-VF1IC2: pred.load.if3:
-; CHECK-VF1IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[SRC2]], i64 {{%.*}}
-; CHECK-VF1IC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
-; CHECK-VF1IC2-NEXT: br label %pred.load.continue4
-; CHECK-VF1IC2: pred.load.continue4:
-; CHECK-VF1IC2-NEXT: [[TMP11:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP10]], %pred.load.if3 ]
-; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 2
-; CHECK-VF1IC2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP11]], 2
-; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = select i1 [[TMP12]], i32 1, i32 [[VEC_PHI]]
-; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], i32 1, i32 [[VEC_PHI2]]
-; CHECK-VF1IC2-NEXT: [[TMP16:%.*]] = xor i1 [[TMP4]], true
-; CHECK-VF1IC2-NEXT: [[TMP17:%.*]] = xor i1 [[TMP5]], true
-; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP4]], i32 [[TMP14]], i32 [[VEC_PHI]]
-; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i32 [[TMP15]], i32 [[VEC_PHI2]]
-; CHECK-VF1IC2: br i1 {{%.*}}, label %middle.block, label %vector.body
-; CHECK-VF1IC2: middle.block:
-; CHECK-VF1IC2-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[PREDPHI]], 0
-; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[PREDPHI]], i32 [[PREDPHI5]]
-; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %scalar.ph
-; CHECK-VF1IC2: scalar.ph:
-; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ]
-; CHECK-VF1IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ]
-; CHECK-VF1IC2-NEXT: br label %for.body
-; CHECK-VF1IC2: for.body:
-; CHECK-VF1IC2-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %for.inc ], [ [[BC_RESUME_VAL]], %scalar.ph ]
-; CHECK-VF1IC2-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ]
-; CHECK-VF1IC2: [[TMP19:%.*]] = load i32, i32* {{%.*}}, align 4
-; CHECK-VF1IC2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35
-; CHECK-VF1IC2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label %for.inc
-; CHECK-VF1IC2: if.then:
-; CHECK-VF1IC2: [[TMP20:%.*]] = load i32, i32* {{%.*}}, align 4
-; CHECK-VF1IC2-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2
-; CHECK-VF1IC2-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]]
-; CHECK-VF1IC2-NEXT: br label %for.inc
-; CHECK-VF1IC2: for.inc:
-; CHECK-VF1IC2-NEXT: [[R_1]] = phi i32 [ [[R_012]], %for.body ], [ [[SPEC_SELECT]], %if.then ]
-; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %for.body
-; CHECK-VF1IC2: for.end.loopexit:
-; CHECK-VF1IC2-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %for.inc ], [ [[RDX_SELECT]], %middle.block ]
-; CHECK-VF1IC2-NEXT: ret i32 [[R_1_LCSSA]]
-;
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.inc
- %i.013 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
- %r.012 = phi i32 [ %r.1, %for.inc ], [ 0, %entry ]
- %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %i.013
- %0 = load i32, i32* %arrayidx, align 4
- %cmp1 = icmp sgt i32 %0, 35
- br i1 %cmp1, label %if.then, label %for.inc
-
-if.then: ; preds = %for.body
- %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %i.013
- %1 = load i32, i32* %arrayidx2, align 4
- %cmp3 = icmp eq i32 %1, 2
- %spec.select = select i1 %cmp3, i32 1, i32 %r.012
- br label %for.inc
-
-for.inc: ; preds = %if.then, %for.body
- %r.1 = phi i32 [ %r.012, %for.body ], [ %spec.select, %if.then ]
- %inc = add nuw nsw i64 %i.013, 1
- %exitcond.not = icmp eq i64 %inc, %n
- br i1 %exitcond.not, label %for.end.loopexit, label %for.body
-
-for.end.loopexit: ; preds = %for.inc
- %r.1.lcssa = phi i32 [ %r.1, %for.inc ]
- ret i32 %r.1.lcssa
-}
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
deleted file mode 100644
index 3bcf1c8e2fb63..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ /dev/null
@@ -1,288 +0,0 @@
-; RUN: opt -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 --check-prefix=CHECK
-; RUN: opt -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK
-; RUN: opt -loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK
-
-define i32 @select_const_i32_from_icmp(i32* nocapture readonly %v, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_icmp
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
-
-; CHECK-VF4IC4: vector.body:
-; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ]
-; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ]
-; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ]
-; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ]
-; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = select <4 x i1> [[VEC_ICMP1]], <4 x i32> [[VEC_PHI1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = select <4 x i1> [[VEC_ICMP2]], <4 x i32> [[VEC_PHI2]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = select <4 x i1> [[VEC_ICMP3]], <4 x i32> [[VEC_PHI3]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = select <4 x i1> [[VEC_ICMP4]], <4 x i32> [[VEC_PHI4]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>
-; CHECK-VF4IC4: middle.block:
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne <4 x i32> [[VEC_SEL1]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select <4 x i1> [[VEC_ICMP5]], <4 x i32> [[VEC_SEL1]], <4 x i32> [[VEC_SEL2]]
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne <4 x i32> [[VEC_SEL5]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select <4 x i1> [[VEC_ICMP6]], <4 x i32> [[VEC_SEL5]], <4 x i32> [[VEC_SEL3]]
-; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne <4 x i32> [[VEC_SEL6]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[VEC_SEL_FIN:%.*]] = select <4 x i1> [[VEC_ICMP7]], <4 x i32> [[VEC_SEL6]], <4 x i32> [[VEC_SEL4]]
-; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL_FIN]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
-
-
-; CHECK-VF1IC4: vector.body:
-; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ]
-; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ]
-; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ]
-; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ]
-; CHECK-VF1IC4: [[VEC_LOAD1:%.*]] = load i32
-; CHECK-VF1IC4-NEXT: [[VEC_LOAD2:%.*]] = load i32
-; CHECK-VF1IC4-NEXT: [[VEC_LOAD3:%.*]] = load i32
-; CHECK-VF1IC4-NEXT: [[VEC_LOAD4:%.*]] = load i32
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP1:%.*]] = icmp eq i32 [[VEC_LOAD1]], 3
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq i32 [[VEC_LOAD2]], 3
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq i32 [[VEC_LOAD3]], 3
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq i32 [[VEC_LOAD4]], 3
-; CHECK-VF1IC4-NEXT: [[VEC_SEL1]] = select i1 [[VEC_ICMP1]], i32 [[VEC_PHI1]], i32 7
-; CHECK-VF1IC4-NEXT: [[VEC_SEL2]] = select i1 [[VEC_ICMP2]], i32 [[VEC_PHI2]], i32 7
-; CHECK-VF1IC4-NEXT: [[VEC_SEL3]] = select i1 [[VEC_ICMP3]], i32 [[VEC_PHI3]], i32 7
-; CHECK-VF1IC4-NEXT: [[VEC_SEL4]] = select i1 [[VEC_ICMP4]], i32 [[VEC_PHI4]], i32 7
-; CHECK-VF1IC4: middle.block:
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp ne i32 [[VEC_SEL1]], 3
-; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = select i1 [[VEC_ICMP4]], i32 [[VEC_SEL1]], i32 [[VEC_SEL2]]
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne i32 [[VEC_SEL5]], 3
-; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = select i1 [[VEC_ICMP5]], i32 [[VEC_SEL5]], i32 [[VEC_SEL3]]
-; CHECK-VF1IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne i32 [[VEC_SEL6]], 3
-; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[VEC_ICMP6]], i32 [[VEC_SEL6]], i32 [[VEC_SEL4]]
-
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select i1 %4, i32 %1, i32 7
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-
-define i32 @select_const_i32_from_icmp2(i32* nocapture readonly %v, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_icmp2
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32> [[VEC_PHI]]
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3
-
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ 3, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select i1 %4, i32 7, i32 %1
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-
-define i32 @select_i32_from_icmp(i32* nocapture readonly %v, i32 %a, i32 %b, i64 %n) {
-; CHECK-LABEL: @select_i32_from_icmp
-; CHECK-VF4IC1: vector.ph:
-; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
-; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i32 0
-; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32>
-; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]]
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i32 0
-; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]]
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ %a, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select i1 %4, i32 %1, i32 %b
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-
-define i32 @select_const_i32_from_fcmp_fast(float* nocapture readonly %v, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_fcmp_fast
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float>
-; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds float, float* %v, i64 %0
- %3 = load float, float* %2, align 4
- %4 = fcmp fast ueq float %3, 3.0
- %5 = select i1 %4, i32 %1, i32 1
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-
-define i32 @select_const_i32_from_fcmp(float* nocapture readonly %v, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_fcmp
-; CHECK-VF4IC1: vector.body:
-; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ]
-; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float>
-; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-VF4IC1: middle.block:
-; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]])
-; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi i32 [ 2, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds float, float* %v, i64 %0
- %3 = load float, float* %2, align 4
- %4 = fcmp ueq float %3, 3.0
- %5 = select i1 %4, i32 %1, i32 1
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret i32 %5
-}
-
-
-; Negative tests
-
-; We don't support FP reduction variables at the moment.
-define float @select_const_f32_from_icmp(i32* nocapture readonly %v, i64 %n) {
-; CHECK: @select_const_f32_from_icmp
-; CHECK-NOT: vector.body
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %6, %for.body ]
- %1 = phi fast float [ 3.0, %entry ], [ %5, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = icmp eq i32 %3, 3
- %5 = select fast i1 %4, float %1, float 7.0
- %6 = add nuw nsw i64 %0, 1
- %7 = icmp eq i64 %6, %n
- br i1 %7, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret float %5
-}
-
-
-; We don't support select/cmp reduction patterns where there is more than one
-; use of the icmp/fcmp.
-define i32 @select_const_i32_from_icmp_mul_use(i32* nocapture readonly %v1, i32* %v2, i64 %n) {
-; CHECK-LABEL: @select_const_i32_from_icmp_mul_use
-; CHECK-NOT: vector.body
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
- %1 = phi i32 [ 3, %entry ], [ %6, %for.body ]
- %2 = phi i32 [ 0, %entry ], [ %7, %for.body ]
- %3 = getelementptr inbounds i32, i32* %v1, i64 %0
- %4 = load i32, i32* %3, align 4
- %5 = icmp eq i32 %4, 3
- %6 = select i1 %5, i32 %1, i32 7
- %7 = zext i1 %5 to i32
- %8 = add nuw nsw i64 %0, 1
- %9 = icmp eq i64 %8, %n
- br i1 %9, label %exit, label %for.body
-
-exit: ; preds = %for.body
- store i32 %7, i32* %v2, align 4
- ret i32 %6
-}
-
-
-; We don't support selecting loop-variant values.
-define i32 @select_variant_i32_from_icmp(i32* nocapture readonly %v1, i32* nocapture readonly %v2, i64 %n) {
-; CHECK-LABEL: @select_variant_i32_from_icmp
-; CHECK-NOT: vector.body
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %for.body
- %0 = phi i64 [ 0, %entry ], [ %8, %for.body ]
- %1 = phi i32 [ 3, %entry ], [ %7, %for.body ]
- %2 = getelementptr inbounds i32, i32* %v1, i64 %0
- %3 = load i32, i32* %2, align 4
- %4 = getelementptr inbounds i32, i32* %v2, i64 %0
- %5 = load i32, i32* %4, align 4
- %6 = icmp eq i32 %3, 3
- %7 = select i1 %6, i32 %1, i32 %5
- %8 = add nuw nsw i64 %0, 1
- %9 = icmp eq i64 %8, %n
- br i1 %9, label %exit, label %for.body
-
-exit: ; preds = %for.body
- ret i32 %7
-}
More information about the llvm-commits
mailing list