[llvm] [VPlan] Unroll VPReplicateRecipe by VF. (PR #142433)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 22 04:26:45 PDT 2025
================
@@ -450,3 +451,86 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
VPlanTransforms::removeDeadRecipes(Plan);
}
+
+/// Create a single-scalar clone of \p RepR for lane \p Lane.
+static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
+ Type *IdxTy, VPReplicateRecipe *RepR,
+ VPLane Lane) {
+ // Collect the operands at Lane, creating extracts as needed.
+ SmallVector<VPValue *> NewOps;
+ for (VPValue *Op : RepR->operands()) {
+ if (vputils::isSingleScalar(Op)) {
+ NewOps.push_back(Op);
+ continue;
+ }
+ if (Lane.getKind() == VPLane::Kind::ScalableLast) {
+ NewOps.push_back(
+ Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
+ continue;
+ }
+ // Look through buildvector to avoid unnecessary extracts.
+ if (match(Op, m_BuildVector())) {
+ NewOps.push_back(
+ cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane()));
+ continue;
+ }
+ VPValue *Idx =
+ Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane()));
+ VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx});
+ NewOps.push_back(Ext);
+ }
+
+ auto *New =
+ new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps,
+ /*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR);
+ New->insertBefore(RepR);
+ return New;
+}
+
+void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
+ Type *IdxTy = IntegerType::get(
+ Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32);
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ if (!RepR || RepR->isSingleScalar())
+ continue;
+
+ VPBuilder Builder(RepR);
+ SmallVector<VPValue *> LaneDefs;
+ // Stores to invariant addresses need to store the last lane only.
+ if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
+ vputils::isSingleScalar(RepR->getOperand(1))) {
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF));
+ RepR->eraseFromParent();
+ continue;
+ }
+
+ /// Create single-scalar version of RepR for all lanes.
+ for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
+ LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
+
+ if (RepR->getNumUsers() == 0) {
+ RepR->eraseFromParent();
+ continue;
+ }
+
+ /// Users that only demand the first lane can use the definition for lane
+ /// 0.
+ RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
+ return U.onlyFirstLaneUsed(RepR);
+ });
+
+ // If needed, create a Build(Struct)Vector recipe to insert the scalar
+ // lane values into a vector.
----------------
ayalz wrote:
So a pair of replicating recipes one feeding the other is replaced by VF recipes feeding a buildVector which VF other recipes extract from, where the extracts are optimized away by cloneForLane(); and the buildVector possibly by dce?
https://github.com/llvm/llvm-project/pull/142433
More information about the llvm-commits
mailing list