[llvm] [VPlan] Explicitly unroll replicate-regions without live-outs by VF. (PR #170212)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 06:20:46 PDT 2026


================
@@ -729,3 +733,150 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
   for (auto *R : reverse(ToRemove))
     R->eraseFromParent();
 }
+
+/// Convert recipes in region blocks to operate on a single lane 0. Lane 0
+/// uses the original blocks, and the recipes are adjusted:
+/// VPReplicateRecipes are converted to single-scalar ones, branch-on-mask is
+/// converted into BranchOnCond and extracts are created as needed.
+static void convertRecipesInRegionBlocksToSingleScalar(
+    VPlan &Plan, Type *IdxTy, ElementCount VF,
+    ArrayRef<VPBlockBase *> RegionBlocks) {
+  for (VPBlockBase *VPB : RegionBlocks) {
+    for (VPRecipeBase &NewR : make_early_inc_range(*cast<VPBasicBlock>(VPB))) {
+      VPBuilder Builder(&NewR);
+      for (const auto &[I, Op] : enumerate(NewR.operands())) {
+        // Skip operands that don't need extraction: scalar VF (no vectors),
+        // values defined in the same block (already scalar), or values that
+        // are already single scalars.
+        auto *DefR = Op->getDefiningRecipe();
+        if (VF.isScalar() || (DefR && DefR->getParent() == VPB) ||
+            vputils::isSingleScalar(Op))
+          continue;
+
+        // Extract the lane from values defined outside the region.
+        VPValue *Idx = Plan.getConstantInt(IdxTy, 0);
+        VPValue *Extract = Builder.createNaryOp(Instruction::ExtractElement,
+                                                {Op, Idx}, NewR.getDebugLoc());
+        NewR.setOperand(I, Extract);
+      }
+
+      if (auto *RepR = dyn_cast<VPReplicateRecipe>(&NewR)) {
+        auto *New =
+            new VPReplicateRecipe(RepR->getUnderlyingInstr(), RepR->operands(),
+                                  /* IsSingleScalar=*/true, /*Mask=*/nullptr,
+                                  *RepR, *RepR, RepR->getDebugLoc());
+        New->insertBefore(RepR);
+        RepR->replaceAllUsesWith(New);
+        RepR->eraseFromParent();
+      } else if (auto *BranchOnMask = dyn_cast<VPBranchOnMaskRecipe>(&NewR)) {
+        Builder.createNaryOp(VPInstruction::BranchOnCond,
+                             {BranchOnMask->getOperand(0)},
+                             BranchOnMask->getDebugLoc());
+        BranchOnMask->eraseFromParent();
+      }
+    }
+  }
+}
+
+/// Process recipes in a single lane's blocks, updating them for lane-specific
+/// operations.
+static void processLaneForReplicateRegion(
+    VPlan &Plan, Type *IdxTy, unsigned Lane,
+    ArrayRef<VPBlockBase *> RegionBlocks,
+    DenseMap<VPBlockBase *, VPBlockBase *> &Old2NewBlocks) {
+  DenseMap<VPValue *, VPValue *> Old2NewVPValues;
+  for (VPBlockBase *OldVPB : RegionBlocks) {
+    auto *OldBB = cast<VPBasicBlock>(OldVPB);
+    auto *NewBB = cast<VPBasicBlock>(Old2NewBlocks.lookup(OldVPB));
+    for (const auto &[OldR, NewR] : zip(*OldBB, *NewBB)) {
+      for (const auto &[OldV, NewV] :
+           zip(OldR.definedValues(), NewR.definedValues()))
+        Old2NewVPValues[OldV] = NewV;
+    }
+
+    // Update lane operands and remap operands to use copies for current lane.
+    for (VPRecipeBase &NewR : make_early_inc_range(*NewBB)) {
+      if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&NewR)) {
+        VPBuilder Builder(Steps);
+        addStartIndexForLane(Steps, Lane, Plan, Builder);
+      } else if (match(&NewR, m_ExtractElement(m_VPValue(), m_ZeroInt()))) {
+        NewR.setOperand(1, Plan.getConstantInt(IdxTy, Lane));
+      }
+
+      // Remap operands to use lane-specific values.
+      for (const auto &[I, Op] : enumerate(NewR.operands())) {
+        // Use cloned value if operand was defined in the region.
+        if (auto *New = Old2NewVPValues.lookup(Op))
+          NewR.setOperand(I, New);
+      }
+    }
+  }
+}
+
+void VPlanTransforms::unrollReplicateRegions(VPlan &Plan, ElementCount VF) {
+  // Collect all replicate regions in the plan before modifying the CFG.
+  SmallVector<VPRegionBlock *> ReplicateRegions;
+  for (VPRegionBlock *Region : VPBlockUtils::blocksOnly<VPRegionBlock>(
+           vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+    if (Region->isReplicator())
+      ReplicateRegions.push_back(Region);
+  }
+
+  Type *IdxTy = IntegerType::get(Plan.getContext(), 32);
+  for (VPRegionBlock *Region : ReplicateRegions) {
+    assert(!VF.isScalable() && "cannot replicate across scalable VFs");
+
+    // Skip regions with live-outs as packing scalar results back into vectors
+    // is not yet implemented.
+    VPBlockBase *Exiting = Region->getExiting();
+    if (any_of(*cast<VPBasicBlock>(Exiting), IsaPred<VPPredInstPHIRecipe>))
+      continue;
+
+    // Disconnect and dissolve the region.
+    VPBlockBase *Pred = Region->getSinglePredecessor();
+    assert(Pred && "Replicate region must have a single predecessor");
+    SmallVector<VPBlockBase *> Successors(Region->successors());
+    VPBlockUtils::disconnectBlocks(Pred, Region);
+    for (VPBlockBase *Succ : Successors)
+      VPBlockUtils::disconnectBlocks(Region, Succ);
+
+    VPBlockBase *Entry = Region->getEntry();
+    SmallVector<VPBlockBase *> RegionBlocks(vp_depth_first_shallow(Entry));
+    VPRegionBlock *ParentRegion = Region->getParent();
+    for (VPBlockBase *Block : RegionBlocks)
+      Block->setParent(ParentRegion);
+    VPBlockUtils::connectBlocks(Pred, Entry);
+
+    // Process lane 0: convert original blocks to single-scalar.
+    convertRecipesInRegionBlocksToSingleScalar(Plan, IdxTy, VF, RegionBlocks);
+    SmallVector<std::pair<VPBlockBase *, VPBlockBase *>> LaneClones;
----------------
fhahn wrote:

This has now be folded into a single vector, that contains all cloned blocks per lane.

https://github.com/llvm/llvm-project/pull/170212


More information about the llvm-commits mailing list