[llvm] [VPlan] Simplify branch on False in VPlan transform (NFC). (PR #140409)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 03:24:57 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/140409
>From 063d70f7c1425147f6fd78077609a4a043f7e7f1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 5 May 2025 20:41:34 +0100
Subject: [PATCH 1/5] [VPlan] Remove ResumePhi opcode, use regular PHI instead
(NFC).
Use regular VPPhi instead of a separate opcode for resume phis. This
removes an unneeded specialized opcode and unifies the code
(verification, printing, updating when CFG is changed).
Depends on https://github.com/llvm/llvm-project/pull/140132.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 34 +++++++-----------
llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++---
.../Transforms/Vectorize/VPlanAnalysis.cpp | 1 -
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 36 ++++++++-----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 9 +++--
.../Transforms/Vectorize/VPlanVerifier.cpp | 4 +--
.../vplan-printing-before-execute.ll | 8 ++---
.../LoopVectorize/vplan-printing.ll | 26 +++++++-------
8 files changed, 54 insertions(+), 75 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4f305bb383465..f6b1913bcba60 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2381,8 +2381,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
// We just connected a new block to the scalar preheader. Update all
// ResumePhis by adding an incoming value for it, replicating the last value.
for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
- auto *ResumePhi = dyn_cast<VPInstruction>(&R);
- if (!ResumePhi || ResumePhi->getOpcode() != VPInstruction::ResumePhi)
+ auto *ResumePhi = cast<VPPhi>(&R);
+ if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors())
continue;
ResumePhi->addOperand(
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
@@ -2533,7 +2533,8 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
for (auto &R : make_early_inc_range(*VPBB)) {
- assert(!R.isPhi() && "Tried to move phi recipe to end of block");
+ assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) &&
+ "Tried to move phi recipe to end of block");
R.moveBefore(*IRVPBB, IRVPBB->end());
}
@@ -7591,10 +7592,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
// over the incoming values correctly.
using namespace VPlanPatternMatch;
- auto IsResumePhi = [](VPUser *U) {
- auto *VPI = dyn_cast<VPInstruction>(U);
- return VPI && VPI->getOpcode() == VPInstruction::ResumePhi;
- };
+ auto IsResumePhi = [](VPUser *U) { return isa<VPPhi>(U); };
assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
"ResumePhi must have a single user");
auto *EpiResumePhiVPI =
@@ -8776,9 +8774,8 @@ static VPInstruction *addResumePhiRecipeForInduction(
WideIV->getDebugLoc());
}
- auto *ResumePhiRecipe =
- ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
- WideIV->getDebugLoc(), "bc.resume.val");
+ auto *ResumePhiRecipe = ScalarPHBuilder.createScalarPhi(
+ {EndValue, Start}, WideIV->getDebugLoc(), "bc.resume.val");
return ResumePhiRecipe;
}
@@ -8807,8 +8804,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
&Plan.getVectorTripCount())) {
- assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
- "Expected a ResumePhi");
+ assert(isa<VPPhi>(ResumePhi) && "Expected a phi");
IVEndValues[WideIVR] = ResumePhi->getOperand(0);
ScalarPhiIRI->addOperand(ResumePhi);
continue;
@@ -8833,8 +8829,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
VPInstruction::ExtractLastElement, {ResumeFromVectorLoop}, {},
"vector.recur.extract");
StringRef Name = IsFOR ? "scalar.recur.init" : "bc.merge.rdx";
- auto *ResumePhiR = ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
{ResumeFromVectorLoop, VectorPhiR->getStartValue()}, {}, Name);
ScalarPhiIRI->addOperand(ResumePhiR);
}
@@ -10010,9 +10005,7 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPI->setOperand(1, Freeze);
if (UpdateResumePhis)
OrigStart->replaceUsesWithIf(Freeze, [Freeze](VPUser &U, unsigned) {
- return Freeze != &U && isa<VPInstruction>(&U) &&
- cast<VPInstruction>(&U)->getOpcode() ==
- VPInstruction::ResumePhi;
+ return Freeze != &U && isa<VPPhi>(&U);
});
}
};
@@ -10025,13 +10018,12 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
// scalar (which will become vector) epilogue loop we are done. Otherwise
// create it below.
if (any_of(*MainScalarPH, [VectorTC](VPRecipeBase &R) {
- return match(&R, m_VPInstruction<VPInstruction::ResumePhi>(
- m_Specific(VectorTC), m_SpecificInt(0)));
+ return match(&R, m_VPInstruction<Instruction::PHI>(m_Specific(VectorTC),
+ m_SpecificInt(0)));
}))
return;
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
- ScalarPHBuilder.createNaryOp(
- VPInstruction::ResumePhi,
+ ScalarPHBuilder.createScalarPhi(
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
"vec.epilog.resume.val");
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index fd58b8a8ec758..9646c3183f285 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -887,11 +887,6 @@ class VPInstruction : public VPRecipeWithIRFlags,
SLPStore,
ActiveLaneMask,
ExplicitVectorLength,
- /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
- /// The first operand is the incoming value from the predecessor in VPlan,
- /// the second operand is the incoming value for all other predecessors
- /// (which are currently not modeled in VPlan).
- ResumePhi,
CalculateTripCountMinusVF,
// Increment the canonical IV separately for each unrolled part.
CanonicalIVIncrementForPart,
@@ -1137,11 +1132,15 @@ struct VPPhi : public VPInstruction, public VPPhiAccessors {
VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
: VPInstruction(Instruction::PHI, Operands, DL, Name) {}
- static inline bool classof(const VPRecipeBase *U) {
+ static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPInstruction>(U);
return R && R->getOpcode() == Instruction::PHI;
}
+ VPPhi *clone() override {
+ return new VPPhi(operands(), getDebugLoc(), getName());
+ }
+
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..926490bfad7d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -101,7 +101,6 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return inferScalarType(R->getOperand(0));
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::Not:
- case VPInstruction::ResumePhi:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::AnyOf:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 18229780bc4a5..a5dbae00e402d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -732,17 +732,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Addend = State.get(getOperand(1), VPLane(0));
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
}
- case VPInstruction::ResumePhi: {
- auto *NewPhi =
- Builder.CreatePHI(State.TypeAnalysis.inferScalarType(this), 2, Name);
- for (const auto &[IncVPV, PredVPBB] :
- zip(operands(), getParent()->getPredecessors())) {
- Value *IncV = State.get(IncVPV, /* IsScalar */ true);
- BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(cast<VPBasicBlock>(PredVPBB));
- NewPhi->addIncoming(IncV, PredBB);
- }
- return NewPhi;
- }
case VPInstruction::AnyOf: {
Value *A = State.get(getOperand(0));
return Builder.CreateOrReduce(A);
@@ -841,8 +830,7 @@ bool VPInstruction::isVectorToScalar() const {
}
bool VPInstruction::isSingleScalar() const {
- return getOpcode() == VPInstruction::ResumePhi ||
- getOpcode() == Instruction::PHI;
+ return getOpcode() == Instruction::PHI;
}
void VPInstruction::execute(VPTransformState &State) {
@@ -928,7 +916,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
- case VPInstruction::ResumePhi:
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -985,9 +972,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ActiveLaneMask:
O << "active lane mask";
break;
- case VPInstruction::ResumePhi:
- O << "resume-phi";
- break;
case VPInstruction::ExplicitVectorLength:
O << "EXPLICIT-VECTOR-LENGTH";
break;
@@ -1095,11 +1079,19 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
void VPPhi::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
- BasicBlock *VectorPH = State.CFG.VPBB2IRBB.at(getIncomingBlock(0));
- Value *Start = State.get(getIncomingValue(0), VPLane(0));
- PHINode *Phi = State.Builder.CreatePHI(Start->getType(), 2, getName());
- Phi->addIncoming(Start, VectorPH);
- State.set(this, Phi, VPLane(0));
+ PHINode *NewPhi = State.Builder.CreatePHI(
+ State.TypeAnalysis.inferScalarType(this), 2, getName());
+ // TODO: Fixup incoming values once other recipes are enabled.
+ unsigned NumIncoming =
+ getParent() == getParent()->getPlan()->getScalarPreheader()
+ ? getNumIncoming()
+ : 1;
+ for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
+ Value *IncV = State.get(getIncomingValue(Idx), VPLane(0));
+ BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
+ NewPhi->addIncoming(IncV, PredBB);
+ }
+ State.set(this, NewPhi, VPLane(0));
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index dc5be520505eb..9d7d451bb5741 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1863,8 +1863,8 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
!isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
!isa<VPHeaderPHIRecipe>(&R) &&
"Cannot update VPIRInstructions wrapping phis or header phis yet");
- auto *VPI = dyn_cast<VPInstruction>(&R);
- if (!VPI || VPI->getOpcode() != VPInstruction::ResumePhi)
+ auto *VPI = dyn_cast<VPPhi>(&R);
+ if (!VPI)
break;
VPBuilder B(VPI);
SmallVector<VPValue *> NewOperands;
@@ -1874,9 +1874,8 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
continue;
NewOperands.push_back(Op);
}
- VPI->replaceAllUsesWith(B.createNaryOp(VPInstruction::ResumePhi,
- NewOperands, VPI->getDebugLoc(),
- VPI->getName()));
+ VPI->replaceAllUsesWith(
+ B.createScalarPhi(NewOperands, VPI->getDebugLoc(), VPI->getName()));
VPI->eraseFromParent();
}
// Disconnect blocks and remove the terminator. RemovedSucc will be deleted
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 54cf8ac2ed04a..45010d0021581 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -243,9 +243,7 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) {
continue;
}
// TODO: Also verify VPPredInstPHIRecipe.
- if (isa<VPPredInstPHIRecipe>(UI) ||
- (isa<VPInstruction>(UI) && (cast<VPInstruction>(UI)->getOpcode() ==
- VPInstruction::ResumePhi)))
+ if (isa<VPPredInstPHIRecipe>(UI))
continue;
// If the user is in the same block, check it comes after R in the
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index 3816def9e13c2..5dcb7d214d359 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -49,8 +49,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and>
-; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A>
+; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -99,8 +99,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<scalar.ph>:
-; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and>
-; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A>
+; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 3504558d1b545..9219c8bfba681 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -46,7 +46,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<for.body.preheader> ]
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -117,7 +117,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x,
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<for.body.preheader> ]
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -206,7 +206,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -289,7 +289,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -394,7 +394,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -479,7 +479,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -548,7 +548,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -616,7 +616,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -686,7 +686,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -776,7 +776,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -855,7 +855,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -972,8 +972,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = phi [ vp<[[RESUME_1]]>, middle.block ], [ ir<22>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
>From 74eddb586e3bec545515544b61a952e3e38963e5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 28 May 2025 16:27:26 +0100
Subject: [PATCH 2/5] !fixup address latest comments, thanks
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 ++---
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 23 ++++++++++------
.../vplan-printing-before-execute.ll | 8 +++---
.../LoopVectorize/vplan-printing.ll | 26 +++++++++----------
4 files changed, 34 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c9263c4cd6ed8..eadaa687fb009 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2380,10 +2380,8 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
// We just connected a new block to the scalar preheader. Update all
// ResumePhis by adding an incoming value for it, replicating the last value.
- for (VPRecipeBase &R : *cast<VPBasicBlock>(ScalarPH)) {
+ for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
auto *ResumePhi = cast<VPPhi>(&R);
- if (ResumePhi->getNumIncoming() == ScalarPH->getNumPredecessors())
- continue;
ResumePhi->addOperand(
ResumePhi->getOperand(ResumePhi->getNumOperands() - 1));
}
@@ -2534,7 +2532,7 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) {
VPIRBasicBlock *IRVPBB = VPBB->getPlan()->createVPIRBasicBlock(IRBB);
for (auto &R : make_early_inc_range(*VPBB)) {
assert((IRVPBB->empty() || IRVPBB->back().isPhi() || !R.isPhi()) &&
- "Tried to move phi recipe to end of block");
+ "Tried to move phi recipe after a non-phi recipe");
R.moveBefore(*IRVPBB, IRVPBB->end());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 3464e833710b7..8a7dde5eddebb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1081,11 +1081,12 @@ void VPPhi::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
PHINode *NewPhi = State.Builder.CreatePHI(
State.TypeAnalysis.inferScalarType(this), 2, getName());
- // TODO: Fixup incoming values once other recipes are enabled.
- unsigned NumIncoming =
- getParent() == getParent()->getPlan()->getScalarPreheader()
- ? getNumIncoming()
- : 1;
+ unsigned NumIncoming = getNumIncoming();
+ if (getParent() != getParent()->getPlan()->getScalarPreheader()) {
+ // TODO: Fixup all incoming values of header phis once recipes defining them
+ // are introduced.
+ NumIncoming = 1;
+ }
for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {
Value *IncV = State.get(getIncomingValue(Idx), VPLane(0));
BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));
@@ -1099,9 +1100,15 @@ void VPPhi::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
- O << " = phi ";
-
- printPhiOperands(O, SlotTracker);
+ const VPBasicBlock *Parent = getParent();
+ if (Parent == Parent->getPlan()->getScalarPreheader()) {
+ // TODO: Use regular printing for resume-phis as well
+ O << " = resume-phi ";
+ printOperands(O, SlotTracker);
+ } else {
+ O << " = phi ";
+ printPhiOperands(O, SlotTracker);
+ }
}
#endif
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
index 76052886ea7af..b487911379f30 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll
@@ -49,8 +49,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and>
+; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -98,8 +98,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<scalar.ph>:
-; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = phi [ vp<[[END1]]>, middle.block ], [ ir<%and>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = phi [ vp<[[END2]]>, middle.block ], [ ir<%A>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME1:%.+]]> = resume-phi vp<[[END1]]>, ir<%and>
+; CHECK-NEXT: EMIT vp<[[RESUME2:%.+]]>.1 = resume-phi vp<[[END2]]>, ir<%A>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 9219c8bfba681..3504558d1b545 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -46,7 +46,7 @@ define void @print_call_and_memory(i64 %n, ptr noalias %y, ptr noalias %x) nounw
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<for.body.preheader> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -117,7 +117,7 @@ define void @print_widen_gep_and_select(i64 %n, ptr noalias %y, ptr noalias %x,
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<for.body.preheader> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -206,7 +206,7 @@ define void @print_replicate_predicated_phi(i64 %n, ptr %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -289,7 +289,7 @@ define void @print_interleave_groups(i32 %C, i32 %D) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -394,7 +394,7 @@ define void @debug_loc_vpinstruction(ptr nocapture %asd, ptr nocapture %bsd) !db
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph:
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -479,7 +479,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[IV_END]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[IV_END]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -548,7 +548,7 @@ define i32 @print_exit_value(ptr %ptr, i32 %off) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -616,7 +616,7 @@ define void @print_fast_math_flags(i64 %n, ptr noalias %y, ptr noalias %x, ptr %
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -686,7 +686,7 @@ define void @print_exact_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -776,7 +776,7 @@ define void @print_call_flags(ptr readonly %src, ptr noalias %dest, i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<for.body>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<for.body>:
@@ -855,7 +855,7 @@ define void @print_disjoint_flags(i64 %n, ptr noalias %x) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
@@ -972,8 +972,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
-; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = phi [ vp<[[RESUME_1]]>, middle.block ], [ ir<22>, ir-bb<entry> ]
-; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = phi [ vp<[[VTC]]>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22>
+; CHECK-NEXT: EMIT vp<[[RESUME_IV:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
; CHECK-NEXT: Successor(s): ir-bb<loop>
; CHECK-EMPTY:
; CHECK-NEXT: ir-bb<loop>:
>From 801b98dd57d1bf8fb3e95d8db421ce830e5e2a55 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 28 May 2025 16:36:18 +0100
Subject: [PATCH 3/5] !fixup update comment
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index eadaa687fb009..10e888e694e11 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2379,7 +2379,7 @@ void InnerLoopVectorizer::introduceCheckBlockInVPlan(BasicBlock *CheckIRBB) {
PreVectorPH->swapSuccessors();
// We just connected a new block to the scalar preheader. Update all
- // ResumePhis by adding an incoming value for it, replicating the last value.
+ // VPPhis by adding an incoming value for it, replicating the last value.
for (VPRecipeBase &R : cast<VPBasicBlock>(ScalarPH)->phis()) {
auto *ResumePhi = cast<VPPhi>(&R);
ResumePhi->addOperand(
>From d3df2c398c0d729198f1c0fa82df1a424f80a79f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 29 May 2025 11:03:01 +0100
Subject: [PATCH 4/5] !fixup update after merging
5b85e4b08d2e356c40f5fbe8da98f94467c2f0cb.
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 267bf88e72b69..5565c4b8aac1e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1104,7 +1104,7 @@ void VPPhi::execute(VPTransformState &State) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPPhi::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- O << Indent << "EMIT ";
+ O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";
printAsOperand(O, SlotTracker);
const VPBasicBlock *Parent = getParent();
if (Parent == Parent->getPlan()->getScalarPreheader()) {
>From 69ad1de102cd2e17ad0bd241b3d3511cff9c8838 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 5 May 2025 19:10:45 +0100
Subject: [PATCH 5/5] [VPlan] Simplify branch to scalar ph in VPlan transform.
(NFC)
Simplify branch on false, starting with the branch from the middle block
to the scalar preheader.
Depends on https://github.com/llvm/llvm-project/pull/140405.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++++
.../Vectorize/VPlanConstruction.cpp | 46 ++++++++-----------
.../Transforms/Vectorize/VPlanTransforms.cpp | 24 +++++++---
llvm/lib/Transforms/Vectorize/VPlanValue.h | 5 ++
4 files changed, 53 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 16c063c0a4a27..be0f79fbf016b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1122,6 +1122,8 @@ class VPPhiAccessors {
return getAsRecipe()->getNumOperands();
}
+ void removeIncomingValue(VPBlockBase *VPB) const;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const;
@@ -3715,6 +3717,15 @@ VPPhiAccessors::getIncomingBlock(unsigned Idx) const {
return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
}
+inline void VPPhiAccessors::removeIncomingValue(VPBlockBase *VPB) const {
+ VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());
+ const VPBasicBlock *Parent = R->getParent();
+ assert(R->getNumOperands() == Parent->getNumPredecessors());
+ auto I = find(Parent->getPredecessors(), VPB);
+ R->getOperand(I - Parent->getPredecessors().begin())->removeUser(*R);
+ R->removeOperand(I - Parent->getPredecessors().begin());
+}
+
/// A special type of VPBasicBlock that wraps an existing IR basic block.
/// Recipes of the block get added before the first non-phi instruction in the
/// wrapped block.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
index b92ea757060cb..a70845a46694b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
@@ -501,8 +501,12 @@ void VPlanTransforms::prepareForVectorization(
cast<VPBasicBlock>(HeaderVPB),
cast<VPBasicBlock>(LatchVPB), Range);
HandledUncountableEarlyExit = true;
+ } else {
+ for (VPRecipeBase &R : cast<VPIRBasicBlock>(EB)->phis()) {
+ if (auto *PhiR = dyn_cast<VPIRPhi>(&R))
+ PhiR->removeIncomingValue(Pred);
+ }
}
-
cast<VPBasicBlock>(Pred)->getTerminator()->eraseFromParent();
VPBlockUtils::disconnectBlocks(Pred, EB);
}
@@ -535,25 +539,6 @@ void VPlanTransforms::prepareForVectorization(
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can set the condition to true.
// 3) Otherwise, construct a runtime check.
-
- if (!RequiresScalarEpilogueCheck) {
- if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor())
- VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB);
- VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
- VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
- Plan.getEntry()->swapSuccessors();
-
- // The exit blocks are unreachable, remove their recipes to make sure no
- // users remain that may pessimize transforms.
- for (auto *EB : Plan.getExitBlocks()) {
- for (VPRecipeBase &R : make_early_inc_range(*EB))
- R.eraseFromParent();
- }
- return;
- }
-
- // The connection order corresponds to the operands of the conditional branch,
- // with the middle block already connected to the exit block.
VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
// Also connect the entry block to the scalar preheader.
// TODO: Also introduce a branch recipe together with the minimum trip count
@@ -561,19 +546,26 @@ void VPlanTransforms::prepareForVectorization(
VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
Plan.getEntry()->swapSuccessors();
+ if (MiddleVPBB->getNumSuccessors() != 2)
+ return;
+
auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator();
// Here we use the same DebugLoc as the scalar loop latch terminator instead
// of the corresponding compare because they may have ended up with
// different line numbers and we want to avoid awkward line stepping while
// debugging. Eg. if the compare has got a line number inside the loop.
VPBuilder Builder(MiddleVPBB);
- VPValue *Cmp =
- TailFolded
- ? Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(TripCount->getType()->getContext())))
- : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
- &Plan.getVectorTripCount(),
- ScalarLatchTerm->getDebugLoc(), "cmp.n");
+ VPValue *Cmp;
+ if (TailFolded)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getTrue(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else if (!RequiresScalarEpilogueCheck)
+ Cmp = Plan.getOrAddLiveIn(ConstantInt::getFalse(
+ IntegerType::getInt1Ty(TripCount->getType()->getContext())));
+ else
+ Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
+ &Plan.getVectorTripCount(),
+ ScalarLatchTerm->getDebugLoc(), "cmp.n");
Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp},
ScalarLatchTerm->getDebugLoc());
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index beab52fc3b133..98f220ee4f9af 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1847,11 +1847,21 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
using namespace llvm::VPlanPatternMatch;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
+ VPValue *Cond;
if (VPBB->getNumSuccessors() != 2 || VPBB == Plan.getEntry() ||
- !match(&VPBB->back(), m_BranchOnCond(m_True())))
+ !match(&VPBB->back(), m_BranchOnCond(m_VPValue(Cond))))
continue;
- VPBasicBlock *RemovedSucc = cast<VPBasicBlock>(VPBB->getSuccessors()[1]);
+ unsigned RemovedIdx;
+ if (match(Cond, m_True()))
+ RemovedIdx = 1;
+ else if (match(Cond, m_False()))
+ RemovedIdx = 0;
+ else
+ continue;
+
+ VPBasicBlock *RemovedSucc =
+ cast<VPBasicBlock>(VPBB->getSuccessors()[RemovedIdx]);
const auto &Preds = RemovedSucc->getPredecessors();
assert(count(Preds, VPBB) == 1 &&
"There must be a single edge between VPBB and its successor");
@@ -1860,10 +1870,12 @@ static void removeBranchOnCondTrue(VPlan &Plan) {
// Values coming from VPBB into ResumePhi recipes of RemoveSucc are removed
// from these recipes.
for (VPRecipeBase &R : make_early_inc_range(*RemovedSucc)) {
- assert((!isa<VPIRInstruction>(&R) ||
- !isa<PHINode>(cast<VPIRInstruction>(&R)->getInstruction())) &&
- !isa<VPHeaderPHIRecipe>(&R) &&
- "Cannot update VPIRInstructions wrapping phis or header phis yet");
+ if (isa<VPIRPhi>(&R)) {
+ assert(RemovedSucc->getNumPredecessors() == 1);
+ cast<VPIRPhi>(&R)->removeIncomingValue(VPBB);
+ continue;
+ }
+
auto *VPI = dyn_cast<VPPhi>(&R);
if (!VPI)
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 6cc792627f60d..db8385e909a9c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -38,6 +38,7 @@ class VPSlotTracker;
class VPUser;
class VPRecipeBase;
class VPInterleaveRecipe;
+class VPPhiAccessors;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@@ -199,8 +200,12 @@ raw_ostream &operator<<(raw_ostream &OS, const VPRecipeBase &R);
/// This class augments VPValue with operands which provide the inverse def-use
/// edges from VPValue's users to their defs.
class VPUser {
+ friend class VPPhiAccessors;
+
SmallVector<VPValue *, 2> Operands;
+ void removeOperand(unsigned Idx) { Operands.erase(Operands.begin() + Idx); }
+
protected:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the operands to \p O.
More information about the llvm-commits
mailing list