[llvm] 7e99893 - [VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) (#151487)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 12:49:45 PDT 2025
Author: Florian Hahn
Date: 2025-08-18T20:49:42+01:00
New Revision: 7e9989390d95cbb382cb2dc9eb44b37717e23738
URL: https://github.com/llvm/llvm-project/commit/7e9989390d95cbb382cb2dc9eb44b37717e23738
DIFF: https://github.com/llvm/llvm-project/commit/7e9989390d95cbb382cb2dc9eb44b37717e23738.diff
LOG: [VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) (#151487)
Materialze Build(Struct)Vectors explicitly for VPRecplicateRecipes, to
serve their users requiring a vector, instead of doing so when unrolling
by VF.
Now we only need to implicitly build vectors in VPTransformState::get
for VPInstructions. Once they are also unrolled by VF we can remove the
code-path alltogether.
PR: https://github.com/llvm/llvm-project/pull/151487
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e009b81afd0ed..9c00e51ff5213 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7254,8 +7254,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF);
- VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
+ VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
+ VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
bool HasBranchWeights =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
if (HasBranchWeights) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 724a38e565304..f972efa07eb7e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -355,6 +355,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ assert(isa<VPInstruction>(Def) &&
+ "Explicit BuildVector recipes must have"
+ "handled packing for non-VPInstructions.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0609510ac8212..96ef6e7cf8243 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -460,6 +460,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Load:
case VPInstruction::AnyOf:
case VPInstruction::BranchOnCond:
+ case VPInstruction::BuildStructVector:
+ case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 14532244d5748..81088c9a81392 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3282,6 +3282,52 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan,
BTC->replaceAllUsesWith(TCMO);
}
+void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
+ if (Plan.hasScalarVFOnly())
+ return;
+
+ VPTypeAnalysis TypeInfo(Plan);
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()));
+ auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(LoopRegion->getEntry()));
+ // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
+ // excluding ones in replicate regions. Those are not materialized explicitly
+ // yet. Those vector users are still handled in VPReplicateRegion::execute(),
+ // via shouldPack().
+ // TODO: materialize build vectors for replicating recipes in replicating
+ // regions.
+ // TODO: materialize build vectors for VPInstructions.
+ for (VPBasicBlock *VPBB :
+ concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
+ VPRegionBlock *ParentRegion =
+ cast<VPRecipeBase>(U)->getParent()->getParent();
+ return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
+ };
+ if (!RepR || RepR->isSingleScalar() ||
+ none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
+ continue;
+
+ Type *ScalarTy = TypeInfo.inferScalarType(RepR);
+ unsigned Opcode = ScalarTy->isStructTy()
+ ? VPInstruction::BuildStructVector
+ : VPInstruction::BuildVector;
+ auto *BuildVector = new VPInstruction(Opcode, {RepR});
+ BuildVector->insertAfter(RepR);
+
+ RepR->replaceUsesWithIf(
+ BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
+ VPUser &U, unsigned) {
+ return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
+ });
+ }
+ }
+}
+
void VPlanTransforms::materializeVectorTripCount(VPlan &Plan,
VPBasicBlock *VectorPHVPBB,
bool TailByMasking,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 35fa45ced53e0..5b3d18b237efb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -274,6 +274,10 @@ struct VPlanTransforms {
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);
+ /// Add explicit Build[Struct]Vector recipes that combine multiple scalar
+ /// values into single vectors.
+ static void materializeBuildVectors(VPlan &Plan);
+
/// Materialize VF and VFxUF to be computed explicitly using VPInstructions.
static void materializeVFAndVFxUF(VPlan &Plan, VPBasicBlock *VectorPH,
ElementCount VF);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 9a6b7b70cc9f9..62fd83a5e092a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -464,10 +464,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) {
VPlanTransforms::removeDeadRecipes(Plan);
}
-/// Create a single-scalar clone of \p RepR for lane \p Lane.
-static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
- Type *IdxTy, VPReplicateRecipe *RepR,
- VPLane Lane) {
+/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
+/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
+static VPReplicateRecipe *
+cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
+ VPReplicateRecipe *RepR, VPLane Lane,
+ const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : RepR->operands()) {
@@ -480,6 +482,14 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
}
+ // If Op is a definition that has been unrolled, directly use the clone for
+ // the corresponding lane.
+ auto LaneDefs = Def2LaneDefs.find(Op);
+ if (LaneDefs != Def2LaneDefs.end()) {
+ NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
+ continue;
+ }
+
// Look through buildvector to avoid unnecessary extracts.
if (match(Op, m_BuildVector())) {
NewOps.push_back(
@@ -512,6 +522,13 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
auto VPBBsToUnroll =
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
+ // A mapping of current VPValue definitions to collections of new VPValues
+ // defined per lane. Serves to hook-up potential users of current VPValue
+ // definition that are replicated-per-VF later.
+ DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
+ // The removal of current recipes being replaced by new ones needs to be
+ // delayed after Def2LaneDefs is no longer in use.
+ SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -523,12 +540,12 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
vputils::isSingleScalar(RepR->getOperand(1))) {
// Stores to invariant addresses need to store the last lane only.
- cloneForLane(Plan, Builder, IdxTy, RepR,
- VPLane::getLastLaneForVF(VF));
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
+ Def2LaneDefs);
} else {
// Create single-scalar version of RepR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
}
RepR->eraseFromParent();
continue;
@@ -536,23 +553,33 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
/// Create single-scalar version of RepR for all lanes.
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
- LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
+ LaneDefs.push_back(
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
+ Def2LaneDefs[RepR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(RepR);
});
- // If needed, create a Build(Struct)Vector recipe to insert the scalar
- // lane values into a vector.
- Type *ResTy = RepR->getUnderlyingInstr()->getType();
- VPValue *VecRes = Builder.createNaryOp(
- ResTy->isStructTy() ? VPInstruction::BuildStructVector
- : VPInstruction::BuildVector,
- LaneDefs);
- RepR->replaceAllUsesWith(VecRes);
- RepR->eraseFromParent();
+ // Update each build vector user that currently has RepR as its only
+ // operand, to have all LaneDefs as its operands.
+ for (VPUser *U : to_vector(RepR->users())) {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
+ VPI->getOpcode() != VPInstruction::BuildStructVector))
+ continue;
+ assert(VPI->getNumOperands() == 1 &&
+ "Build(Struct)Vector must have a single operand before "
+ "replicating by VF");
+ VPI->setOperand(0, LaneDefs[0]);
+ for (VPValue *LaneDef : drop_begin(LaneDefs))
+ VPI->addOperand(LaneDef);
+ }
+ ToRemove.push_back(RepR);
}
}
+ for (auto *R : reverse(ToRemove))
+ R->eraseFromParent();
}
More information about the llvm-commits
mailing list