[llvm] [VPlan] Delay adding canonical IV increment. (PR #82270)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 15:00:26 PST 2025


================
@@ -2043,6 +1986,98 @@ void VPlanTransforms::createInterleaveGroups(
   }
 }
 
+void VPlanTransforms::convertCanonicalIV(
+    VPlan &Plan, bool HasNUW, bool DataAndControlFlowWithoutRuntimeCheck) {
+  auto *CanIV = Plan.getCanonicalIV();
+
+  VPBasicBlock *Latch = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+  auto *LatchTerm = Latch->getTerminator();
+  VPBuilder Builder(LatchTerm);
+  DebugLoc DL = CanIV->getDebugLoc();
+  // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
+  auto *CanonicalIVIncrement =
+      Builder.createOverflowingOp(Instruction::Add, {CanIV, &Plan.getVFxUF()},
+                                  {HasNUW, false}, DL, "index.next");
+  CanIV->addOperand(CanonicalIVIncrement);
+
+  auto FoundLaneMaskPhi = find_if(
+      Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
+      [](VPRecipeBase &P) { return isa<VPActiveLaneMaskPHIRecipe>(P); });
+
+  if (FoundLaneMaskPhi ==
+      Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis().end()) {
+
+    // Update BranchOnCount VPInstruction in the latch to use increment.
+    // TODO: Should have separate opcodes for separate semantics.
+    LatchTerm->setOperand(0, CanonicalIVIncrement);
+    return;
+  }
+
+  // Now adjust the start value of the active-lane-mask depending on
+  // DataAndControlFlowWithoutRuntimeCheck, introduce its increment and a
+  // conditional branch to control the loop until the lane mask is exhausted.
+  // Concretely, we add the following recipes:
+  //
+  // vector.ph:
+  //   %TripCount = calculate-trip-count-minus-VF (original TC)
+  //       [if DataWithControlFlowWithoutRuntimeCheck]
+  //   %EntryInc = canonical-iv-increment-for-part %StartV
+  //   %EntryALM = active-lane-mask %EntryInc, %TripCount (replaces the
+  //       existing start value for the existing active-lane-mask-phi)
+  //
+  // vector.body:
+  //   ...
+  //   (existing) %P = active-lane-mask-phi [ %EntryALM, %vector.ph ],
+  //   (added increment)                    [ %ALM, %vector.body ]
+  //   ...
+  //   %InLoopInc = canonical-iv-increment-for-part %IncrementValue
+  //   %ALM = active-lane-mask %InLoopInc, TripCount
+  //   %Negated = Not %ALM
+  //   branch-on-cond %Negated (replaces existing BranchOnCount)
+
+  auto *LaneMaskPhi = cast<VPActiveLaneMaskPHIRecipe>(&*FoundLaneMaskPhi);
+  auto *VecPreheader =
+      cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
+  Builder.setInsertPoint(VecPreheader);
+
+  VPValue *TC = Plan.getTripCount();
+
+  // TODO: Check if dropping the flags is needed if
+  // !DataAndControlFlowWithoutRuntimeCheck.
+  CanonicalIVIncrement->dropPoisonGeneratingFlags();
+  VPValue *TripCount, *IncrementValue;
+  if (!DataAndControlFlowWithoutRuntimeCheck) {
+    // When the loop is guarded by a runtime overflow check for the loop
+    // induction variable increment by VF, we can increment the value before
+    // the get.active.lane mask and use the unmodified tripcount.
+    IncrementValue = CanonicalIVIncrement;
+    TripCount = TC;
+  } else {
+    // When avoiding a runtime check, the active.lane.mask inside the loop
+    // uses a modified trip count and the induction variable increment is
+    // done after the active.lane.mask intrinsic is called.
+    IncrementValue = CanIV;
+    TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
+                                     {TC}, DL);
+  }
+  // Create the active lane mask for the next iteration of the loop before the
+  // original terminator.
+  Builder.setInsertPoint(Latch);
+  auto *InLoopIncrement =
+      Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
+                                  {IncrementValue}, {false, false}, DL);
+  auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
+                                   {InLoopIncrement, TripCount}, DL,
+                                   "active.lane.mask.next");
+  LaneMaskPhi->addOperand(ALM);
+
+  // Replace the original terminator with BranchOnCond. We have to invert the
+  // mask here because a true condition means jumping to the exit block.
+  auto *NotMask = Builder.createNot(ALM, DL);
+  Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
----------------
ayalz wrote:

BranchOnCond operates on a single condition bit, so NotMask (only last part thereof?) should effectively be reduced using AnyOf reduction - although its execute/generate seems to look for the first lane (only), rather than the last?

https://github.com/llvm/llvm-project/pull/82270


More information about the llvm-commits mailing list