[llvm] [VPlan] Dissolve replicate regions with vector live-outs. (PR #189022)

via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 4 10:28:38 PDT 2026


================
@@ -808,6 +819,73 @@ static void dissolveReplicateRegion(VPRegionBlock *Region, ElementCount VF,
   // FirstLaneExiting.
   VPBlockUtils::connectBlocks(Predecessor, FirstLaneEntry);
   VPBlockUtils::connectBlocks(FirstLaneExiting, NextLaneEntry);
+
+  // Collect per-lane predicated scalar phis and create InsertElement chains or
+  // BuildStructVector to pack them into a vector result. Phis are in the same
+  // order across all cloned exit blocks, so we can match them by position.
+  SmallVector<SmallVector<VPValue *, 4>> PhisByLane;
+  for (VPBasicBlock *Exit : ExitingBlocks) {
+    auto &Phis = PhisByLane.emplace_back();
+    for (auto &Phi : Exit->phis())
+      Phis.push_back(Phi.getVPSingleValue());
+  }
+
+  VPTypeAnalysis TypeInfo(Plan);
+  SmallVector<VPValue *> LaneValues;
+  for (unsigned I = 0; I < PhisByLane[0].size(); ++I) {
+    LaneValues.clear();
+    for (auto &LanePhis : PhisByLane)
+      LaneValues.push_back(LanePhis[I]);
+
+    // If only the first lane is used, no need to pack into a vector.
+    // The lane-0 scalar phi can be used directly. Only lane 0 has external
+    // users; the other lane phis are clones without downstream users.
+    if (vputils::onlyFirstLaneUsed(LaneValues[0])) {
+      assert(all_of(drop_begin(LaneValues),
+                    [](VPValue *V) { return V->getNumUsers() == 0; }) &&
+             "cloned lane phis should have no users");
+      continue;
+    }
+
+    Type *ScalarTy = TypeInfo.inferScalarType(LaneValues[0]);
+
+    // Struct types need BuildStructVector as InsertElement doesn't apply.
+    if (isa<StructType>(ScalarTy)) {
+      auto *LastExit = ExitingBlocks.back();
+      VPBuilder Builder(LastExit, LastExit->end());
+      auto *BV =
+          Builder.createNaryOp(VPInstruction::BuildStructVector, LaneValues);
+      LaneValues[0]->replaceUsesWithIf(
+          BV, [BV](VPUser &U, unsigned) { return &U != BV; });
+      continue;
+    }
+
+    // Convert chain of scalar phis to InsertElement + VPWidenPHIRecipe chains.
+    VPValue *RunningVec = Plan.getOrAddLiveIn(PoisonValue::get(ScalarTy));
+    for (auto [Lane, LaneVal] : enumerate(LaneValues)) {
+      auto *LanePhi = cast<VPPhi>(LaneVal);
+      assert(LanePhi->getNumOperands() == 2 &&
+             match(LanePhi->getOperand(0), m_Poison()) &&
+             "expected predicated phi");
+      auto *MergeBB = LanePhi->getParent();
+      VPValue *PredVal = LanePhi->getOperand(1);
+      auto *ThenBB = PredVal->getDefiningRecipe()->getParent();
+
+      VPBuilder ThenBuilder(
+          ThenBB, std::next(PredVal->getDefiningRecipe()->getIterator()));
+      VPValue *Idx = Plan.getConstantInt(IdxTy, Lane);
+      auto *Insert = ThenBuilder.createNaryOp(Instruction::InsertElement,
+                                              {RunningVec, PredVal, Idx},
+                                              LanePhi->getDebugLoc());
+
+      auto *VecPhi =
+          new VPWidenPHIRecipe(nullptr, RunningVec, LanePhi->getDebugLoc());
----------------
ayalz wrote:

Could these InsertElement and WidenPHI recipes be introduced into the original lane 0 attached to its PredInstPhi prior to cloning, and have processLaneForReplicateRegion() chain their def/uses together?

https://github.com/llvm/llvm-project/pull/189022


More information about the llvm-commits mailing list