[llvm] [VPlan] Use ResumePhi to create reduction resume phis. (PR #110004)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 25 08:44:10 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Use VPInstruction::ResumePhi to create phi nodes for reduction resume values.
This allows simplifying createAndCollectMergePhiForReduction to only collect reduction resume phis when vectorizing epilogue loops and adding extra incoming edges from the main vector loop.
---
Full diff: https://github.com/llvm/llvm-project/pull/110004.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+41-41)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll (+9)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll (+2)
- (modified) llvm/test/Transforms/LoopVectorize/vplan-printing.ll (+9)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index cac0b57fc69649..4f9e990e5483da 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7422,8 +7422,9 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
}
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
-// create a merge phi node for it and add it to \p ReductionResumeValues.
-static void createAndCollectMergePhiForReduction(
+// add it to \p ReductionResumeValues and add incoming values from the main
+// vector loop.
+static void updateAndCollectMergePhiForReductionForEpilogueVectorization(
VPInstruction *RedResult,
DenseMap<const RecurrenceDescriptor *, Value *> &ReductionResumeValues,
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
@@ -7432,15 +7433,24 @@ static void createAndCollectMergePhiForReduction(
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
return;
+ using namespace VPlanPatternMatch;
+ VPValue *ResumePhiVPV =
+ cast<VPInstruction>(*find_if(RedResult->users(), [](VPUser *U) {
+ return match(U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(),
+ m_VPValue()));
+ }));
+ auto *BCBlockPhi = cast<PHINode>(State.get(ResumePhiVPV, true));
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ if (!VectorizingEpilogue) {
+ ReductionResumeValues[&RdxDesc] = BCBlockPhi;
+ return;
+ }
- Value *FinalValue =
- State.get(RedResult, VPIteration(0, VPLane::getFirstLane()));
auto *ResumePhi =
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
- if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
- RdxDesc.getRecurrenceKind())) {
+ if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
@@ -7450,42 +7460,15 @@ static void createAndCollectMergePhiForReduction(
"when vectorizing the epilogue loop, we need a resume phi from main "
"vector loop");
- // TODO: bc.merge.rdx should not be created here, instead it should be
- // modeled in VPlan.
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
- // Create a phi node that merges control-flow from the backedge-taken check
- // block and the middle block.
- auto *BCBlockPhi =
- PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
- LoopScalarPreHeader->getTerminator()->getIterator());
-
// If we are fixing reductions in the epilogue loop then we should already
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
// we carry over the incoming values correctly.
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
- if (Incoming == LoopMiddleBlock)
- BCBlockPhi->addIncoming(FinalValue, Incoming);
- else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
- BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
- Incoming);
- else
- BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
+ if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
+ BCBlockPhi->setIncomingValueForBlock(
+ Incoming, ResumePhi->getIncomingValueForBlock(Incoming));
}
-
- auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
- // TODO: This fixup should instead be modeled in VPlan.
- // Fix the scalar loop reduction variable with the incoming reduction sum
- // from the vector body and from the backedge value.
- int IncomingEdgeBlockIdx =
- OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
- assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
- // Pick the other block.
- int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
- Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
- OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
-
- ReductionResumeValues[&RdxDesc] = BCBlockPhi;
}
std::pair<DenseMap<const SCEV *, Value *>,
@@ -7579,11 +7562,12 @@ LoopVectorizationPlanner::executePlan(
DenseMap<const RecurrenceDescriptor *, Value *> ReductionResumeValues;
auto *ExitVPBB =
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
- for (VPRecipeBase &R : *ExitVPBB) {
- createAndCollectMergePhiForReduction(
- dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
- State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
- }
+ if (IsEpilogueVectorization)
+ for (VPRecipeBase &R : *ExitVPBB) {
+ updateAndCollectMergePhiForReductionForEpilogueVectorization(
+ dyn_cast<VPInstruction>(&R), ReductionResumeValues, State, OrigLoop,
+ State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
+ }
// 2.6. Maintain Loop Hints
// Keep all loop hints from the original loop on the vector loop (we'll
@@ -9369,6 +9353,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
m_VPValue()));
});
+ VPBasicBlock *ScalarPHVPBB = nullptr;
+ if (MiddleVPBB->getNumSuccessors() == 2) {
+ // Order is strict: first is the exit block, second is the scalar
+ // preheader.
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
+ } else {
+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
+ }
+
+ VPBuilder ScalarPHBuilder(ScalarPHVPBB);
+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
+ VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
+ {}, "bc.merge.rdx");
+ auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
+ Plan->addLiveOut(RedPhi, ResumePhiRecipe);
+
// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
// any iteration. The final value is selected by the final
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 90c209cf3f5186..6a435709aeb2b2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: No successors
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: scalar.ph:
+; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
; IF-EVL-INLOOP-NEXT: No successors
+; IF-EVL-INLOOP-EMPTY:
+; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
; IF-EVL-INLOOP-NEXT: }
;
@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: No successors
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: scalar.ph:
+; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
; NO-VP-OUTLOOP-NEXT: No successors
+; NO-VP-OUTLOOP-EMPTY:
+; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
; NO-VP-OUTLOOP-NEXT: }
;
@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: No successors
; NO-VP-INLOOP-EMPTY:
; NO-VP-INLOOP-NEXT: scalar.ph:
+; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
; NO-VP-INLOOP-NEXT: No successors
+; NO-VP-INLOOP-EMPTY:
+; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
; NO-VP-INLOOP-NEXT: }
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 8e56614a2e3d5c..b05980bef1b38f 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
+; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
; CHECK-NEXT: }
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 26974c2307065a..8b2597f7b1cbab 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
; CHECK-NEXT: }
;
entry:
@@ -220,7 +223,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
; CHECK-NEXT: }
;
entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
+; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
; CHECK-NEXT:}
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/110004
More information about the llvm-commits
mailing list