[llvm] [VPlan] Dissolve replicate regions with vector live-outs. (PR #189022)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 4 10:28:38 PDT 2026
================
@@ -808,6 +819,73 @@ static void dissolveReplicateRegion(VPRegionBlock *Region, ElementCount VF,
// FirstLaneExiting.
VPBlockUtils::connectBlocks(Predecessor, FirstLaneEntry);
VPBlockUtils::connectBlocks(FirstLaneExiting, NextLaneEntry);
+
+ // Collect per-lane predicated scalar phis and create InsertElement chains or
+ // BuildStructVector to pack them into a vector result. Phis are in the same
+ // order across all cloned exit blocks, so we can match them by position.
+ SmallVector<SmallVector<VPValue *, 4>> PhisByLane;
+ for (VPBasicBlock *Exit : ExitingBlocks) {
+ auto &Phis = PhisByLane.emplace_back();
+ for (auto &Phi : Exit->phis())
+ Phis.push_back(Phi.getVPSingleValue());
+ }
+
+ VPTypeAnalysis TypeInfo(Plan);
+ SmallVector<VPValue *> LaneValues;
+ for (unsigned I = 0; I < PhisByLane[0].size(); ++I) {
+ LaneValues.clear();
+ for (auto &LanePhis : PhisByLane)
+ LaneValues.push_back(LanePhis[I]);
+
+ // If only the first lane is used, no need to pack into a vector.
+ // The lane-0 scalar phi can be used directly. Only lane 0 has external
+ // users; the other lane phis are clones without downstream users.
+ if (vputils::onlyFirstLaneUsed(LaneValues[0])) {
+ assert(all_of(drop_begin(LaneValues),
+ [](VPValue *V) { return V->getNumUsers() == 0; }) &&
+ "cloned lane phis should have no users");
+ continue;
+ }
+
+ Type *ScalarTy = TypeInfo.inferScalarType(LaneValues[0]);
+
+ // Struct types need BuildStructVector as InsertElement doesn't apply.
+ if (isa<StructType>(ScalarTy)) {
+ auto *LastExit = ExitingBlocks.back();
+ VPBuilder Builder(LastExit, LastExit->end());
+ auto *BV =
+ Builder.createNaryOp(VPInstruction::BuildStructVector, LaneValues);
+ LaneValues[0]->replaceUsesWithIf(
+ BV, [BV](VPUser &U, unsigned) { return &U != BV; });
+ continue;
+ }
+
+ // Convert chain of scalar phis to InsertElement + VPWidenPHIRecipe chains.
+ VPValue *RunningVec = Plan.getOrAddLiveIn(PoisonValue::get(ScalarTy));
+ for (auto [Lane, LaneVal] : enumerate(LaneValues)) {
+ auto *LanePhi = cast<VPPhi>(LaneVal);
+ assert(LanePhi->getNumOperands() == 2 &&
+ match(LanePhi->getOperand(0), m_Poison()) &&
+ "expected predicated phi");
+ auto *MergeBB = LanePhi->getParent();
+ VPValue *PredVal = LanePhi->getOperand(1);
+ auto *ThenBB = PredVal->getDefiningRecipe()->getParent();
+
+ VPBuilder ThenBuilder(
+ ThenBB, std::next(PredVal->getDefiningRecipe()->getIterator()));
+ VPValue *Idx = Plan.getConstantInt(IdxTy, Lane);
+ auto *Insert = ThenBuilder.createNaryOp(Instruction::InsertElement,
+ {RunningVec, PredVal, Idx},
+ LanePhi->getDebugLoc());
+
+ auto *VecPhi =
+ new VPWidenPHIRecipe(nullptr, RunningVec, LanePhi->getDebugLoc());
----------------
ayalz wrote:
Could these InsertElement and WidenPHI recipes be introduced into the original lane 0 attached to its PredInstPhi prior to cloning, and have processLaneForReplicateRegion() chain their def/uses together?
https://github.com/llvm/llvm-project/pull/189022
More information about the llvm-commits
mailing list