[llvm] [VPlan] Introduce explicit ExtractFromEnd recipes for live-outs. (PR #100658)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 12 12:44:47 PDT 2024
================
@@ -8517,20 +8534,121 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) {
}
VPValue *OneVPV = Plan.getOrAddLiveIn(
ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1));
+ VPValue *TwoVPV = Plan.getOrAddLiveIn(
+ ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 2));
for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) {
auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhi);
if (!FOR)
continue;
+ // This is the second phase of vectorizing first-order recurrences. An
+ // overview of the transformation is described below. Suppose we have the
+ // following loop with some use after the loop of the last a[i-1],
+ //
+ // for (int i = 0; i < n; ++i) {
+ // t = a[i - 1];
+ // b[i] = a[i] - t;
+ // }
+ // use t;
+ //
+ // There is a first-order recurrence on "a". For this loop, the shorthand
+ // scalar IR looks like:
+ //
+ // scalar.ph:
+ // s_init = a[-1]
+ // br scalar.body
+ //
+ // scalar.body:
+ // i = phi [0, scalar.ph], [i+1, scalar.body]
+ // s1 = phi [s_init, scalar.ph], [s2, scalar.body]
+ // s2 = a[i]
+ // b[i] = s2 - s1
+ // br cond, scalar.body, exit.block
+ //
+ // exit.block:
+ // use = lcssa.phi [s1, scalar.body]
+ //
+ // In this example, s1 is a recurrence because it's value depends on the
+ // previous iteration. In the first phase of vectorization, we created a
+ // vector phi v1 for s1. We now complete the vectorization and produce the
+ // shorthand vector IR shown below (for VF = 4, UF = 1).
+ //
+ // vector.ph:
+ // v_init = vector(..., ..., ..., a[-1])
+ // br vector.body
+ //
+ // vector.body
+ // i = phi [0, vector.ph], [i+4, vector.body]
+ // v1 = phi [v_init, vector.ph], [v2, vector.body]
+ // v2 = a[i, i+1, i+2, i+3];
+ // v3 = vector(v1(3), v2(0, 1, 2))
+ // b[i, i+1, i+2, i+3] = v2 - v3
+ // br cond, vector.body, middle.block
+ //
+ // middle.block:
+ // s_penultimate = v2(2) = v3(3)
+ // s_resume = v2(3)
+ // br cond, scalar.ph, exit.block
+ //
+ // scalar.ph:
+ // s_init' = phi [s_resume, middle.block], [s_init, otherwise]
+ // br scalar.body
+ //
+ // scalar.body:
+ // i = phi [0, scalar.ph], [i+1, scalar.body]
+ // s1 = phi [s_init', scalar.ph], [s2, scalar.body]
+ // s2 = a[i]
+ // b[i] = s2 - s1
+ // br cond, scalar.body, exit.block
+ //
+ // exit.block:
+ // lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
+ //
+ // After execution completes the vector loop, we extract the next value of
+ // the recurrence (x) to use as the initial value in the scalar loop. This
+ // is modeled by ExtractFromEnd.
+ //
+
// Extract the resume value and create a new VPLiveOut for it.
auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd,
{FOR->getBackedgeValue(), OneVPV},
{}, "vector.recur.extract");
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {},
"scalar.recur.init");
- Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), ResumePhiRecipe);
+ auto *FORPhi = cast<PHINode>(FOR->getUnderlyingInstr());
+ Plan.addLiveOut(FORPhi, ResumePhiRecipe);
+
+ // Now create VPLiveOuts for users in the exit block.
+ // Extract the penultimate value of the recurrence and add VPLiveOut
+ // users of the recurrence splice. Note that the extract of the final
+ // value used to resume in the scalar loop is created earlier during VPlan
+ // construction.
+
+ // No edge from the middle block to the unique exit block has been inserted
+ // and there is nothing to fix from vector loop; phis should have incoming
+ // from scalar loop only.
+ if (RequiresScalarEpilogue)
+ continue;
----------------
fhahn wrote:
Update to check #of successors, thanks!
https://github.com/llvm/llvm-project/pull/100658
More information about the llvm-commits
mailing list