[llvm] [VPlan] Dispatch to multiple exit blocks via middle blocks. (PR #112138)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 21 19:16:27 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
A more lightweight variant of https://github.com/llvm/llvm-project/pull/109193,
which dispatches to multiple exit blocks via the middle blocks.
The patch also introduces a bit of required scaffolding to enable early-exit vectorization, including an option. At the moment, early-exit vectorization doesn't come with legality checks, and is only used if the option is provided and the loop has metadata forcing vectorization. This is only intended to be used for testing during bring-up, with @<!-- -->david-arm enabling auto early-exit vectorization plugging in the changes from https://github.com/llvm/llvm-project/pull/88385.
---
Patch is 48.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112138.diff
12 Files Affected:
- (modified) llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h (+3)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+29)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+51-30)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+36-8)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+20-5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+82)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+4)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (-8)
- (added) llvm/test/Transforms/LoopVectorize/X86/multi-exit-codegen.ll (+240)
- (modified) llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll (+9-9)
- (added) llvm/test/Transforms/LoopVectorize/X86/multi-exit-vplan.ll (+148)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index dc7e484a40a452..af6fae44cf0f09 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -287,6 +287,9 @@ class LoopVectorizationLegality {
/// we can use in-order reductions.
bool canVectorizeFPMath(bool EnableStrictReductions);
+ /// Returns true if the loop has an early exit that we can vectorize.
+ bool canVectorizeEarlyExit() const;
+
/// Return true if we can vectorize this loop while folding its tail by
/// masking.
bool canFoldTailByMasking() const;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 43be72f0f34d45..ee53d28a4c8282 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -43,6 +43,10 @@ AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
cl::desc("Enable recognition of non-constant strided "
"pointer induction variables."));
+static cl::opt<bool>
+ EnableEarlyExitVectorization("enable-early-exit-vectorization",
+ cl::init(false), cl::Hidden, cl::desc(""));
+
namespace llvm {
cl::opt<bool>
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
@@ -1378,6 +1382,10 @@ bool LoopVectorizationLegality::isFixedOrderRecurrence(
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
+ // When vectorizing early exits, create predicates for all blocks, except the
+ // header.
+ if (canVectorizeEarlyExit() && BB != TheLoop->getHeader())
+ return true;
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
@@ -1514,6 +1522,27 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
return true;
}
+bool LoopVectorizationLegality::canVectorizeEarlyExit() const {
+ // Currently only allow vectorizing loops with early exits, if early-exit
+ // vectorization is explicitly enabled and the loop has metadata to force
+ // vectorization.
+ if (!EnableEarlyExitVectorization)
+ return false;
+
+ SmallVector<BasicBlock *> Exiting;
+ TheLoop->getExitingBlocks(Exiting);
+ if (Exiting.size() == 1)
+ return false;
+
+ LoopVectorizeHints Hints(TheLoop, true, *ORE);
+ if (Hints.getForce() == LoopVectorizeHints::FK_Undefined)
+ return false;
+
+ Function *Fn = TheLoop->getHeader()->getParent();
+ return Hints.allowVectorization(Fn, TheLoop,
+ true /*VectorizeOnlyWhenForced*/);
+}
+
// Helper function to canVectorizeLoopNestCFG.
bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
bool UseVPlanNativePath) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e8653498d32a12..c80d45b1479b36 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1363,9 +1363,11 @@ class LoopVectorizationCostModel {
// If we might exit from anywhere but the latch, must run the exiting
// iteration in scalar form.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
- LLVM_DEBUG(
- dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
- return true;
+ if (!Legal->canVectorizeEarlyExit()) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Loop requires scalar epilogue: multiple exits\n");
+ return true;
+ }
}
if (IsVectorizing && InterleaveInfo.requiresScalarEpilogue()) {
LLVM_DEBUG(dbgs() << "LV: Loop requires scalar epilogue: "
@@ -2575,7 +2577,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
assert(LoopVectorPreHeader && "Invalid loop structure");
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
- assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
+ assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector()) ||
+ Legal->canVectorizeEarlyExit()) &&
"multiple exit loop without required epilogue?");
LoopMiddleBlock =
@@ -2758,7 +2761,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
// value (the value that feeds into the phi from the loop latch).
// We allow both, but they, obviously, have different values.
- assert(OrigLoop->getUniqueExitBlock() && "Expected a single exit block");
DenseMap<Value *, Value *> MissingVals;
@@ -2819,6 +2821,9 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
PHI->addIncoming(I.second, MiddleBlock);
}
+
+ assert((MissingVals.empty() || OrigLoop->getUniqueExitBlock()) &&
+ "Expected a single exit block");
}
namespace {
@@ -3599,7 +3604,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
TheLoop->getExitingBlocks(Exiting);
for (BasicBlock *E : Exiting) {
auto *Cmp = dyn_cast<Instruction>(E->getTerminator()->getOperand(0));
- if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
+ if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse() &&
+ (TheLoop->getLoopLatch() == E || !Legal->canVectorizeEarlyExit()))
AddToWorklistIfAllowed(Cmp);
}
@@ -7692,12 +7698,15 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan.execute(&State);
// 2.5 Collect reduction resume values.
- auto *ExitVPBB =
- cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
- for (VPRecipeBase &R : *ExitVPBB) {
- createAndCollectMergePhiForReduction(
- dyn_cast<VPInstruction>(&R), State, OrigLoop,
- State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
+ VPBasicBlock *ExitVPBB = nullptr;
+ if (BestVPlan.getVectorLoopRegion()->getSingleSuccessor()) {
+ ExitVPBB = cast<VPBasicBlock>(
+ BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
+ for (VPRecipeBase &R : *ExitVPBB) {
+ createAndCollectMergePhiForReduction(
+ dyn_cast<VPInstruction>(&R), State, OrigLoop,
+ State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
+ }
}
// 2.6. Maintain Loop Hints
@@ -7723,6 +7732,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
LoopVectorizeHints Hints(L, true, *ORE);
Hints.setAlreadyVectorized();
}
+
TargetTransformInfo::UnrollingPreferences UP;
TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
@@ -7735,15 +7745,17 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
ILV.printDebugTracesAtEnd();
// 4. Adjust branch weight of the branch in the middle block.
- auto *MiddleTerm =
- cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
- if (MiddleTerm->isConditional() &&
- hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
- // Assume that `Count % VectorTripCount` is equally distributed.
- unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
- assert(TripCount > 0 && "trip count should not be zero");
- const uint32_t Weights[] = {1, TripCount - 1};
- setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
+ if (ExitVPBB) {
+ auto *MiddleTerm =
+ cast<BranchInst>(State.CFG.VPBB2IRBB[ExitVPBB]->getTerminator());
+ if (MiddleTerm->isConditional() &&
+ hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) {
+ // Assume that `Count % VectorTripCount` is equally distributed.
+ unsigned TripCount = BestVPlan.getUF() * State.VF.getKnownMinValue();
+ assert(TripCount > 0 && "trip count should not be zero");
+ const uint32_t Weights[] = {1, TripCount - 1};
+ setBranchWeights(*MiddleTerm, Weights, /*IsExpected=*/false);
+ }
}
return State.ExpandedSCEVs;
@@ -8128,7 +8140,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
// If source is an exiting block, we know the exit edge is dynamically dead
// in the vector loop, and thus we don't need to restrict the mask. Avoid
// adding uses of an otherwise potentially dead instruction.
- if (OrigLoop->isLoopExiting(Src))
+ if (!Legal->canVectorizeEarlyExit() && OrigLoop->isLoopExiting(Src))
return EdgeMaskCache[Edge] = SrcMask;
VPValue *EdgeMask = getVPValueOrAddLiveIn(BI->getCondition());
@@ -8778,6 +8790,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
static SetVector<VPIRInstruction *> collectUsersInExitBlock(
Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+ if (!Plan.getVectorLoopRegion()->getSingleSuccessor())
+ return {};
auto *MiddleVPBB =
cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
// No edge from the middle block to the unique exit block has been inserted
@@ -8863,6 +8877,8 @@ static void addLiveOutsForFirstOrderRecurrences(
// TODO: Should be replaced by
// Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the
// scalar region is modeled as well.
+ if (!VectorRegion->getSingleSuccessor())
+ return;
auto *MiddleVPBB = cast<VPBasicBlock>(VectorRegion->getSingleSuccessor());
VPBasicBlock *ScalarPHVPBB = nullptr;
if (MiddleVPBB->getNumSuccessors() == 2) {
@@ -9146,10 +9162,15 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
- addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
- addUsersInExitBlock(*Plan, ExitUsersToFix);
+ if (Legal->canVectorizeEarlyExit()) {
+ VPlanTransforms::convertToMultiCond(*Plan, *PSE.getSE(), OrigLoop,
+ RecipeBuilder);
+ } else {
+ SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlock(
+ OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+ addLiveOutsForFirstOrderRecurrences(*Plan, ExitUsersToFix);
+ addUsersInExitBlock(*Plan, ExitUsersToFix);
+ }
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9277,8 +9298,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
using namespace VPlanPatternMatch;
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
- VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
for (VPRecipeBase &R : Header->phis()) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
@@ -9297,8 +9316,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
for (VPUser *U : Cur->users()) {
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
- assert(UserRecipe->getParent() == MiddleVPBB &&
- "U must be either in the loop region or the middle block.");
continue;
}
Worklist.insert(UserRecipe);
@@ -9403,6 +9420,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
Builder.setInsertPoint(&*LatchVPBB->begin());
+ if (!VectorLoopRegion->getSingleSuccessor())
+ return;
+ VPBasicBlock *MiddleVPBB =
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
for (VPRecipeBase &R :
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index c1b97791331bcf..eb7c808551340d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -474,6 +474,14 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
// backedges. A backward successor is set when the branch is created.
const auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
+ if (TermBr->getSuccessor(idx) &&
+ PredVPBlock == getPlan()->getVectorLoopRegion() &&
+ PredVPBlock->getNumSuccessors()) {
+ // Update PRedBB and TermBr for BranchOnMultiCond in predecessor.
+ PredBB = TermBr->getSuccessor(1);
+ TermBr = cast<BranchInst>(PredBB->getTerminator());
+ idx = 0;
+ }
assert(!TermBr->getSuccessor(idx) &&
"Trying to reset an existing successor block.");
TermBr->setSuccessor(idx, IRBB);
@@ -908,8 +916,8 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
- VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
if (!RequiresScalarEpilogueCheck) {
+ VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
return Plan;
}
@@ -923,10 +931,14 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
// we unconditionally branch to the scalar preheader. Do nothing.
// 3) Otherwise, construct a runtime check.
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock();
- auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock);
- // The connection order corresponds to the operands of the conditional branch.
- VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+ if (IRExitBlock) {
+ auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock);
+ // The connection order corresponds to the operands of the conditional
+ // branch.
+ VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB);
+ VPBasicBlock *ScalarPH = new VPBasicBlock("scalar.ph");
+ VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+ }
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
// Here we use the same DebugLoc as the scalar loop latch terminator instead
@@ -1031,7 +1043,9 @@ void VPlan::execute(VPTransformState *State) {
// VPlan execution rather than earlier during VPlan construction.
BasicBlock *MiddleBB = State->CFG.ExitBB;
VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ getVectorLoopRegion()->getNumSuccessors() == 1
+ ? cast<VPBasicBlock>(getVectorLoopRegion()->getSuccessors()[0])
+ : cast<VPBasicBlock>(getVectorLoopRegion()->getSuccessors()[1]);
// Find the VPBB for the scalar preheader, relying on the current structure
// when creating the middle block and its successrs: if there's a single
// predecessor, it must be the scalar preheader. Otherwise, the second
@@ -1044,6 +1058,10 @@ void VPlan::execute(VPTransformState *State) {
MiddleSuccs.size() == 1 ? MiddleSuccs[0] : MiddleSuccs[1]);
assert(!isa<VPIRBasicBlock>(ScalarPhVPBB) &&
"scalar preheader cannot be wrapped already");
+ if (ScalarPhVPBB->getNumSuccessors() != 0) {
+ ScalarPhVPBB = cast<VPBasicBlock>(ScalarPhVPBB->getSuccessors()[1]);
+ MiddleVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ }
replaceVPBBWithIRVPBB(ScalarPhVPBB, ScalarPh);
replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB);
@@ -1056,12 +1074,19 @@ void VPlan::execute(VPTransformState *State) {
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
// Generate code in the loop pre-header and body.
- for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
+
+ for (VPBlockBase *Block : RPOT)
Block->execute(State);
VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
+ if (!getVectorLoopRegion()->getSingleSuccessor())
+ VectorLatchBB =
+ cast<BranchInst>(VectorLatchBB->getTerminator())->getSuccessor(1);
+
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
@@ -1088,7 +1113,10 @@ void VPlan::execute(VPTransformState *State) {
// Move the last step to the end of the latch block. This ensures
// consistent placement of all induction updates.
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+ if (VectorLatchBB->getTerminator() == &*VectorLatchBB->getFirstNonPHI())
+ Inc->moveBefore(VectorLatchBB->getTerminator());
+ else
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
// Use the steps for the last part as backedge value for the induction.
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 59a084401cc9bf..21f44eac188936 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1274,6 +1274,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
+ AnyOf,
};
private:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 41f13cc2d9a978..9d5c609ad26043 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -67,6 +67,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
default:
return true;
}
+ case VPExpandSCEVSC:
+ return getParent()->getPlan()->getTripCount() == getVPSingleValue();
case VPInterleaveSC:
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
case VPWidenStoreEVLSC:
@@ -160,6 +162,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPPredInstPHISC:
case VPScalarCastSC:
return false;
+ case VPExpandSCEVSC:
+ return getParent()->getPlan()->getTripCount() == getVPSingleValue();
case VPInstructionSC:
return mayWriteToMemory();
case VPWidenCallSC: {
@@ -399,6 +403,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
case VPInstruction::ExplicitVectorLength:
+ case VPInstruction::AnyOf:
return true;
default:
return false;
@@ -674,6 +679,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
return NewPhi;
}
+ case VPInstruction::AnyOf: {
+ Value *A = State.get(getOperand(0));
+ return Builder.CreateOrReduce(A);
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
@@ -682,7 +691,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
bool VPInstruction::isVectorToScalar() const {
return getOpcode() == VPInstruction::ExtractFromEnd ||
- getOpcode() == VPInstruction::ComputeReductionResult;
+ getOpcode() == VPInstruction::ComputeReductionResult ||
+ getOpcode() == VPInstruction::AnyOf;
}
bool VPInstruction::isSingleScalar() const {
@@ -745,6 +755,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return false;
case Instruction::ICmp:
case Instruction::Select:
+ case Instruction::Or:
case VPInstruction::PtrAdd:
// TODO: Cover additional opcodes.
return vputils::onlyFirstLaneUsed(this);
@@ -840,6 +851,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::PtrAdd:
O << "ptradd";
break;
+ case VPInstruction::AnyOf...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/112138
More information about the llvm-commits
mailing list