[llvm] [VPlan] Expand VPWidenPointerInductionRecipe into separate recipes (PR #148274)

Mon Jul 28 05:39:49 PDT 2025

================
@@ -2679,6 +2680,102 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
   WidenIVR->replaceAllUsesWith(WidePHI);
 }
 
+/// Expand a VPWidenPointerInductionRecipe into executable recipes, for the
+/// initial value, phi and backedge value. In the following example:
+///
+///  <x1> vector loop: {
+///    vector.body:
+///      EMIT ir<%ptr.iv> = WIDEN-POINTER-INDUCTION %start, %step, %vf
+///      ...
+///      EMIT branch-on-count ...
+///  }
+///
+/// WIDEN-POINTER-INDUCTION will get expanded to:
+///
+///  <x1> vector loop: {
+///    vector.body:
+///      EMIT-SCALAR %pointer.phi = phi %start, %ptr.ind
+///      EMIT %mul = mul %stepvector, %step
+///      EMIT %vector.gep = ptradd %pointer.phi, %mul
+///      ...
+///      EMIT %ptr.ind = ptradd %pointer.phi, %vf
+///      EMIT branch-on-count ...
+///  }
+static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
+                                          VPTypeAnalysis &TypeInfo) {
+  VPlan *Plan = R->getParent()->getPlan();
+
+  assert(R->getInductionDescriptor().getKind() ==
+             InductionDescriptor::IK_PtrInduction &&
+         "Not a pointer induction according to InductionDescriptor!");
+  assert(TypeInfo.inferScalarType(R)->isPointerTy() && "Unexpected type.");
+  assert(!R->onlyScalarsGenerated(Plan->hasScalableVF()) &&
+         "Recipe should have been replaced");
+
+  unsigned CurrentPart = R->getCurrentPart();
+
+  VPBuilder Builder(R);
+  DebugLoc DL = R->getDebugLoc();
+
+  // Build a pointer phi
+  VPPhi *Phi;
+  if (CurrentPart == 0) {
+    Phi = Builder.createScalarPhi({R->getStartValue()}, DL, "pointer.phi");
+  } else {
+    // The recipe has been unrolled. In that case, fetch the single pointer phi
+    // shared among all unrolled parts of the recipe.
+    auto *PtrAdd = cast<VPInstruction>(R->getFirstUnrolledPartOperand());
+    Phi = cast<VPPhi>(PtrAdd->getOperand(0)->getDefiningRecipe());
+  }
+
+  Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
+
+  // A pointer induction, performed by using a gep
+  Type *PhiType = TypeInfo.inferScalarType(R->getStepValue());
+  VPValue *RuntimeVF = Builder.createScalarZExtOrTrunc(
+      &Plan->getVF(), PhiType, TypeInfo.inferScalarType(&Plan->getVF()), DL);
+  if (CurrentPart == 0) {
+    // The recipe represents the first part of the pointer induction. Create the
+    // GEP to increment the phi across all unrolled parts.
+    VPValue *NumUnrolledElems = Builder.createScalarZExtOrTrunc(
+        R->getOperand(2), PhiType, TypeInfo.inferScalarType(R->getOperand(2)),
+        DL);
+    VPValue *Offset = Builder.createNaryOp(
+        Instruction::Mul, {R->getStepValue(), NumUnrolledElems});
+
+    VPBuilder::InsertPointGuard Guard(Builder);
+    VPBasicBlock *ExitingBB =
+        Plan->getVectorLoopRegion()->getExitingBasicBlock();
+    Builder.setInsertPoint(ExitingBB,
+                           ExitingBB->getTerminator()->getIterator());
+
+    VPValue *InductionGEP = Builder.createPtrAdd(Phi, Offset, DL, "ptr.ind");
+    Phi->addOperand(InductionGEP);
+  }
+
+  VPValue *CurrentPartV =
+      Plan->getOrAddLiveIn(ConstantInt::get(PhiType, CurrentPart));
+
+  // Create actual address geps that use the pointer phi as base and a
+  // vectorized version of the step value (<step*0, ..., step*N>) as offset.
+  VPValue *StartOffsetScalar =
+      Builder.createNaryOp(Instruction::Mul, {RuntimeVF, CurrentPartV});
+  VPValue *StartOffset =
+      Builder.createNaryOp(VPInstruction::Broadcast, StartOffsetScalar);
+  // Create a vector of consecutive numbers from zero to VF.
+  StartOffset = Builder.createNaryOp(
+      Instruction::Add,
+      {StartOffset,
+       Builder.createNaryOp(VPInstruction::StepVector, {}, PhiType)});
----------------
lukel97 wrote:

I tried this out and I was able to handle the unrolling in `UnrollState::unrollWidenInductionByUF`, alongside `VPWidenIntOrFpInductionRecipe`. 

Just to double check though, we still need `VPInstruction::WidePtrAdd` since we still emit a vector of pointers which regular VPInstruction::PtrAdd can't do. 

Just after unrolling the VPlan looks like:

```
vector.ph:
  vp<%3> = DERIVED-IV ir<%p> + vp<%2> * ir<32>
  EMIT vp<%4> = wide-iv-step vp<%0>, ir<32>
Successor(s): vector loop

<x1> vector loop: {
  vector.body:
    EMIT vp<%5> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
    EMIT ir<%p.iv> = WIDEN-POINTER-INDUCTION ir<%p>, ir<32>, vp<%1>, vp<%4>, vp<%step.add.3>
    EMIT vp<%step.add> = wide-ptradd ir<%p.iv>, vp<%4>
    EMIT vp<%step.add.2> = wide-ptradd vp<%step.add>, vp<%4>
    EMIT vp<%step.add.3> = wide-ptradd vp<%step.add.2>, vp<%4>
    vp<%6> = SCALAR-STEPS vp<%5>, ir<1>, vp<%0>
    CLONE ir<%gep> = getelementptr ir<%p>, vp<%6>
    vp<%7> = vector-pointer ir<%gep>
    vp<%8> = vector-pointer ir<%gep>, ir<1>
    vp<%9> = vector-pointer ir<%gep>, ir<2>
    vp<%10> = vector-pointer ir<%gep>, ir<3>
    WIDEN store vp<%7>, ir<%p.iv>
    WIDEN store vp<%8>, vp<%step.add>
    WIDEN store vp<%9>, vp<%step.add.2>
    WIDEN store vp<%10>, vp<%step.add.3>
    EMIT vp<%index.next> = add nuw vp<%5>, vp<%1>
    EMIT branch-on-count vp<%index.next>, vp<%2>
  No successors
}

```

And then when converted to concrete recipes:

```
ir-bb<vector.ph>:
  IR   %n.mod.vf = urem i64 %n, 16
  IR   %n.vec = sub i64 %n, %n.mod.vf
  vp<%1> = DERIVED-IV ir<%p> + ir<%n.vec> * ir<32>
  EMIT vp<%2> = mul ir<4>, ir<32>
Successor(s): vector.body

vector.body:
  EMIT-SCALAR vp<%index> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
  EMIT-SCALAR vp<%pointer.phi> = phi [ ir<%p>, ir-bb<vector.ph> ], [ vp<%ptr.ind>, vector.body ]
  EMIT vp<%3> = step-vector i64
  EMIT vp<%4> = mul vp<%3>, ir<32>
  EMIT vp<%vector.gep> = wide-ptradd vp<%pointer.phi>, vp<%4>
  EMIT vp<%step.add> = wide-ptradd vp<%vector.gep>, vp<%2>
  EMIT vp<%step.add.2> = wide-ptradd vp<%step.add>, vp<%2>
  EMIT vp<%step.add.3> = wide-ptradd vp<%step.add.2>, vp<%2>
  CLONE ir<%gep> = getelementptr ir<%p>, vp<%index>
  vp<%5> = vector-pointer ir<%gep>, ir<1>
  vp<%6> = vector-pointer ir<%gep>, ir<2>
  vp<%7> = vector-pointer ir<%gep>, ir<3>
  WIDEN store ir<%gep>, vp<%vector.gep>
  WIDEN store vp<%5>, vp<%step.add>
  WIDEN store vp<%6>, vp<%step.add.2>
  WIDEN store vp<%7>, vp<%step.add.3>
  EMIT vp<%index.next> = add nuw vp<%index>, ir<16>
  EMIT vp<%ptr.ind> = ptradd vp<%step.add.3>, vp<%2>
  EMIT branch-on-count vp<%index.next>, ir<%n.vec>
Successor(s): middle.block, vector.body
```

https://github.com/llvm/llvm-project/pull/148274