[llvm] [VPlan] Simplify branch to scalar ph in VPlan transform. (NFC) (PR #140409)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 17 14:34:21 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Simplify branch on false, starting with the branch from the middle block
to the scalar preheader. Initially this helps simplifying the initial VPlan construction.
Depends on https://github.com/llvm/llvm-project/pull/140405.
---
Patch is 101.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140409.diff
32 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+23-37)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+18-7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (-1)
- (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+21-23)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+11-26)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+23-12)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (+1-3)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll (+5-5)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (+8-8)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-fixed-order-recurrence.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+9-11)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+16-16)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+6-12)
- (modified) llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll (+9-9)
- (modified) llvm/test/Transforms/LoopVectorize/uncountable-early-exit-vplan.ll (+25-25)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-iv-transforms.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll (+18-18)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+66-66)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll (+4-4)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll (+20-20)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-unused-interleave-group.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-widen-struct-return.ll (+2-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 148b187f4f25d..6c3a2ce941c54 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -490,7 +490,7 @@ class InnerLoopVectorizer {
MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
RTChecks(RTChecks), Plan(Plan),
- VectorPHVPB(Plan.getEntry()->getSingleSuccessor()) {}
+ VectorPHVPB(Plan.getEntry()->getSuccessors()[1]) {}
virtual ~InnerLoopVectorizer() = default;
@@ -2365,22 +2365,19 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
VPBlockBase *PreVectorPH = VectorPHVPB->getSinglePredecessor();
- if (PreVectorPH->getNumSuccessors() != 1) {
- assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
- assert(PreVectorPH->getSuccessors()[0] == ScalarPH &&
- "Unexpected successor");
- VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
- VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
- PreVectorPH = CheckVPIRBB;
- }
+ assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors");
+ assert(PreVectorPH->getSuccessors()[0] == ScalarPH && "Unexpected successor");
+ VPIRBasicBlock *CheckVPIRBB = Plan.createVPIRBasicBlock(CheckIRBB);
+ VPBlockUtils::insertOnEdge(PreVectorPH, VectorPHVPB, CheckVPIRBB);
+ PreVectorPH = CheckVPIRBB;
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH);
PreVectorPH->swapSuccessors();
// We just connected a new block to the scalar preheader. Update all
// ResumePhis by adding an incoming value for it, replicating the last value.
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
- auto *ResumePhi = dyn_cast<VPInstruction>(&R);
- if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
+ auto *ResumePhi = cast<VPPhi>(&R);
+ if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors())
continue;
ResumePhi->addOperand(
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
@@ -2463,9 +2460,6 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);
-
- // TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here.
- introduceCheckBlockInVPlan(TCCheckBlock);
}
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
@@ -2530,7 +2524,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
for (auto &R : make_early_inc_range(*VPBB)) {
- assert(!R.isPhi() && "Tried to move phi recipe to end of block");
+ assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) &&
+ "Tried to move phi recipe to end of block");
R.moveBefore(*IRVPBB, IRVPBB->end());
}
@@ -7768,10 +7763,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
// over the incoming values correctly.
using namespace VPlanPatternMatch;
- auto IsResumePhi = [](VPUser *U) {
- auto *VPI = dyn_cast<VPInstruction>(U);
- return VPI && VPI->getOpcode() == VPInstruction::ResumePhi;
- };
+ auto IsResumePhi = [](VPUser *U) { return isa<VPPhi>(U); };
assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
"ResumePhi must have a single user");
auto *EpiResumePhiVPI =
@@ -7837,7 +7829,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 1. Set up the skeleton for vectorization, including vector pre-header and
// middle block. The vector loop is created during VPlan execution.
- VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSingleSuccessor());
+ VPBasicBlock *VectorPH = cast<VPBasicBlock>(Entry->getSuccessors()[1]);
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
if (VectorizingEpilogue)
VPlanTransforms::removeDeadRecipes(BestVPlan);
@@ -8070,7 +8062,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
- introduceCheckBlockInVPlan(TCCheckBlock);
+ if (!ForEpilogue)
+ introduceCheckBlockInVPlan(TCCheckBlock);
return TCCheckBlock;
}
@@ -8200,7 +8193,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
Plan.setEntry(NewEntry);
// OldEntry is now dead and will be cleaned up when the plan gets destroyed.
- introduceCheckBlockInVPlan(Insert);
return Insert;
}
@@ -9146,9 +9138,8 @@ static VPInstruction *addResumePhiRecipeForInduction(
WideIV->getDebugLoc());
}
- auto *ResumePhiRecipe =
- ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
- WideIV->getDebugLoc(), "bc.resume.val");
+ auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
+ {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
return ResumePhiRecipe;
}
@@ -9160,7 +9151,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
DenseMap<VPValue *, VPValue *> &IVEndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
- auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
+ auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getPredecessors()[0]);
VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion();
VPBuilder VectorPHBuilder(
cast<VPBasicBlock>(VectorRegion->getSinglePredecessor()));
@@ -9177,8 +9168,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
&Plan.getVectorTripCount())) {
- assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
- "Expected a ResumePhi");
+ assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
ScalarPhiIRI->addOperand(ResumePhi);
continue;
@@ -9203,8 +9193,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
- auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
@@ -10406,9 +10395,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPI->setOperand(1, Freeze);
if (UpdateResumePhis)
OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
- return Freeze != &U && isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ResumePhi;
+ return Freeze != &U && isa<VPPhi>(&U);
});
}
};
@@ -10421,13 +10408,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
// scalar (which will become vector) epilogue loop we are done. Otherwise
// create it below.
if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
- return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
- m_Specific(VectorTC), m_SpecificInt(0)));
+ return match(&R, m_VPInstruction<Instruction::PHI>(m_Specific(VectorTC),
+ m_SpecificInt(0)));
}))
return;
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
- ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ ScalarPHBuilder.createScalarPhi(
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
"vec.epilog.resume.val");
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e634de1e17c69..ebbffef2a4afe 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -887,11 +887,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
SLPStore,
ActiveLaneMask,
ExplicitVectorLength,
- /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
- /// The first operand is the incoming value from the predecessor in VPlan,
- /// the second operand is the incoming value for all other predecessors
- /// (which are currently not modeled in VPlan).
- ResumePhi,
CalculateTripCountMinusVF,
// Increment the canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
@@ -1160,6 +1155,8 @@ class VPPhiAccessors {
return getAsRecipe()->getNumOperands();
}
+ void removeIncomingValue(VPBlockBase *VPB) const;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -1170,11 +1167,15 @@ struct VPPhi : public VPInstruction, public VPPhiAccessors {
VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
: VPInstruction(Instruction::PHI, Operands, DL, Name) {}
- static inline bool classof(const VPRecipeBase *U) {
+ static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPInstruction>(U);
return R && R->getOpcode() == Instruction::PHI;
}
+ VPPhi *clone() override {
+ return new VPPhi(operands(), getDebugLoc(), getName());
+ }
+
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -3737,6 +3738,15 @@ VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
}
+inline void VPPhiAccessors::removeIncomingValue(VPBlockBase *VPB) const {
+ VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
+ const VPBasicBlock *Parent = R->getParent();
+ assert(R->getNumOperands() == Parent->getNumPredecessors());
+ auto I = find(Parent->getPredecessors(), VPB);
+ R->getOperand(I - Parent->getPredecessors().begin())->removeUser(*R);
+ R->removeOperand(I - Parent->getPredecessors().begin());
+}
+
/// A special type of VPBasicBlock that wraps an existing IR basic block.
/// Recipes of the block get added before the first non-phi instruction in the
/// wrapped block.
@@ -4246,7 +4256,8 @@ class VPlan {
/// that this relies on unneeded branches to the scalar tail loop being
/// removed.
bool hasScalarTail() const {
- return getScalarPreheader()->getNumPredecessors() != 0;
+ return getScalarPreheader()->getNumPredecessors() != 0 &&
+ getScalarPreheader()->getSinglePredecessor() != getEntry();
}
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..926490bfad7d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -101,7 +101,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return inferScalarType(R->getOperand(0));
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::Not:
- case VPInstruction::ResumePhi:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::AnyOf:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index 287bc93ce496a..204af8f12311b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -507,8 +507,12 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
+ } else {
+ for (VPRecipeBase &R : cast<VPIRBasicBlock>(EB)->phis()) {
+ if (auto *PhiR = dyn_cast<VPIRPhi>(&R))
+ PhiR->removeIncomingValue(Pred);
+ }
}
-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
@@ -541,23 +545,13 @@ void VPlanTransforms::prepareForVectorization(
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.
+ VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
+ // Also connect the entry block to the scalar preheader.
+ VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
+ Plan.getEntry()->swapSuccessors();
- if (!RequiresScalarEpilogueCheck) {
- if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
- VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
- // The exit blocks are unreachable, remove their recipes to make sure no
- // users remain that may pessimize transforms.
- for (auto *EB : Plan.getExitBlocks()) {
- for (VPRecipeBase &R : make_early_inc_range(*EB))
- R.eraseFromParent();
- }
+ if (MiddleVPBB->getNumSuccessors() != 2)
return;
- }
-
- // The connection order corresponds to the operands of the conditional branch,
- // with the middle block already connected to the exit block.
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
// Here we use the same DebugLoc as the scalar loop latch terminator instead
@@ -565,13 +559,17 @@ void VPlanTransforms::prepareForVectorization(
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
VPBuilder Builder(MiddleVPBB);
- VPValue *Cmp =
- TailFolded
- ? Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())))
- : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
- &Plan.getVectorTripCount(),
- ScalarLatchTerm->getDebugLoc(), "cmp.n");
+ VPValue *Cmp;
+ if (TailFolded)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else if (!RequiresScalarEpilogueCheck)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else
+ Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
+ &Plan.getVectorTripCount(),
+ ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3fa6a21b80b17..f4beb5b6d11e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -754,17 +754,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Addend = State.get(getOperand(1), VPLane(0));
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
- case VPInstruction::ResumePhi: {
- auto *NewPhi =
- Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
- for (const auto &[IncVPV, PredVPBB] :
- zip(operands(), getParent()->getPredecessors())) {
- Value *IncV = State.get(IncVPV, /* IsScalar */ true);
- BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast<VPBasicBlock>(PredVPBB));
- NewPhi->addIncoming(IncV, PredBB);
- }
- return NewPhi;
- }
case VPInstruction::AnyOf: {
Value *A = State.get(getOperand(0));
return Builder.CreateOrReduce(A);
@@ -863,8 +852,7 @@ bool VPInstruction::isVectorToScalar() const {
}
bool VPInstruction::isSingleScalar() const {
- return getOpcode() == VPInstruction::ResumePhi ||
- getOpcode() == Instruction::PHI;
+ return getOpcode() == Instruction::PHI;
}
#if !defined(NDEBUG)
@@ -963,7 +951,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
- case VPInstruction::ResumePhi:
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1020,9 +1007,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ActiveLaneMask:
O << "active lane mask";
break;
- case VPInstruction::ResumePhi:
- O << "resume-phi";
- break;
case VPInstruction::ExplicitVectorLength:
O << "EXPLICIT-VECTOR-LENGTH";
break;
@@ -1130,15 +1114,16 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
void VPPhi::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- assert(getParent() ==
- getParent()->getPlan()->getVectorLoopRegion()->getEntry() &&
- "VPInstructions with PHI opcodes must be used for header phis only "
- "at the moment");
- BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
- Value *Start = State.get(getIncomingValue(0), VPLane(0));
- PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());
- Phi->addIncoming(Start, VectorPH);
- State.set(this, Phi, VPLane(0));
+ PHINode *NewPhi = State.Builder.CreatePHI(
+ State.TypeAnalysis.inferScalarType(this), 2, getName());
+ unsigned NumIncoming =
+ getParent()->getNumPredecessors() == 0 ? 1 : getNumIncoming();
+ for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
+ Value *IncV = State.get(getIncomingValue(Idx), /* IsScalar */ true);
+ BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
+ NewPhi->addIncoming(IncV, PredBB);
+ }
+ State.set(this, NewPhi, VPLane(0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f1c466e3208be..e1c0e505d7c90 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1732,11 +1732,21 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
- if (VPBB->getNumSuccessors() != 2 ||
- !match(&VPBB->back(), m_BranchOnCond(m_True())))
+ VPValue *Cond;
+ if (VPBB->getNumSuccessors() != 2 || VPBB->empty() ||
+ !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
- VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
+ unsigned RemovedIdx;
+ if (match(Cond, m_True()))
+ RemovedIdx = 1;
+ else if (match(Cond, m_False()))
+ RemovedIdx = 0;
+ else
+ continue;
+
+ VPBasicBlock *RemovedSucc =
+ cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
const auto &Preds = RemovedSucc->getPredecessors();
assert(count(Preds, VPBB) == 1 &&
"There must be a single edge between VPBB and its successor");
@@ -1745,12 +1755,14 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
// from these recipes.
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140409
More information about the llvm-commits
mailing list