[llvm] [VPlan] Materialize Build(Struct)Vectors for VPReplicateRecipes. (NFCI) (PR #151487)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 11 02:53:29 PDT 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/151487
>From d5857574a0ddf1a44aa51e49c1d8d49a5a5ecc1a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 21 Jun 2025 14:22:08 +0100
Subject: [PATCH 1/5] [VPlan] Materialize Build(Struct)Vectors for
VPReplicateRecipes. (NFCI)
Materialze Build(Struct)Vectors explicitly for VPRecplicateRecipes, to
serve their users requiring a vector, instead of doing so when unrolling
by VF.
Now we only need to implicitly build vectors in VPTransformState::get
for VPInstructions. Once they are also unrolled by VF we can remove the
code-path alltogether.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 45 +++++++++++++++++++
.../Transforms/Vectorize/VPlanTransforms.h | 4 ++
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 45 ++++++++++++-------
6 files changed, 83 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 20528089c0008..c7f350c5634fe 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7284,6 +7284,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
OrigLoop->getHeader()->getContext());
+ VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
bool HasBranchWeights =
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 25b9616880bf4..96ccf5bf50a25 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -372,6 +372,8 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
+ assert(isa<VPInstruction>(Def) && "Explicit BuildVector recipes must "
+ "handle packing for non-VPInstructions.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 11b4677ec102e..ebf43eecb34c9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -461,6 +461,8 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::Load:
case VPInstruction::AnyOf:
case VPInstruction::BranchOnCond:
+ case VPInstruction::BuildStructVector:
+ case VPInstruction::BuildVector:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::ExplicitVectorLength:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index fcbc86f5e4c58..9b51fcdf7ad8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3178,6 +3178,51 @@ void VPlanTransforms::materializeVectorTripCount(
Plan.getVectorTripCount().setUnderlyingValue(NewC->getValue());
}
+void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
+ if (Plan.hasScalarVFOnly())
+ return;
+
+ VPTypeAnalysis TypeInfo(Plan);
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ auto VPBBsOutsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(Plan.getEntry()));
+ auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_shallow(LoopRegion->getEntry()));
+ for (VPBasicBlock *VPBB :
+ concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
+ for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ if (!RepR || RepR->isSingleScalar())
+ continue;
+ VPInstruction *BuildVector = nullptr;
+ for (VPUser *U : to_vector(RepR->users())) {
+ VPRegionBlock *ParentRegion =
+ cast<VPRecipeBase>(U)->getParent()->getParent();
+ if (U->usesScalars(RepR) && ParentRegion == LoopRegion)
+ continue;
+
+ if (!BuildVector) {
+ Type *ScalarTy = TypeInfo.inferScalarType(RepR);
+ unsigned Opc = ScalarTy->isStructTy()
+ ? VPInstruction::BuildStructVector
+ : VPInstruction::BuildVector;
+ BuildVector = new VPInstruction(Opc, {RepR});
+ BuildVector->insertAfter(RepR);
+ }
+
+ // Only update a single operand per users, as the same user is added
+ // multiple times, once per use.
+ // TODO: Introduce de-duplicating iterator over users.
+ for (unsigned Idx = 0; Idx != U->getNumOperands(); ++Idx)
+ if (U->getOperand(Idx) == RepR) {
+ U->setOperand(Idx, BuildVector);
+ break;
+ }
+ }
+ }
+ }
+}
+
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be
/// converted to a narrower recipe. \p V is used by a wide recipe that feeds a
/// store interleave group at index \p Idx, \p WideMember0 is the recipe feeding
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 880159f760922..1a19e15bbaa25 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -256,6 +256,10 @@ struct VPlanTransforms {
unsigned BestUF,
PredicatedScalarEvolution &PSE);
+ /// Add explicit Build[Struct]Vector recipes that combine scalar values
+ /// produced by VPReplicateRecipes to a single vector.
+ static void materializeBuildVectors(VPlan &Plan);
+
/// Try to convert a plan with interleave groups with VF elements to a plan
/// with the interleave groups replaced by wide loads and stores processing VF
/// elements, if all transformed interleave groups access the full vector
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 871e37ef3966a..7f23fb5b7d11a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -463,9 +463,10 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
}
/// Create a single-scalar clone of \p RepR for lane \p Lane.
-static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
- Type *IdxTy, VPReplicateRecipe *RepR,
- VPLane Lane) {
+static VPReplicateRecipe *
+cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
+ VPReplicateRecipe *RepR, VPLane Lane,
+ DenseMap<VPValue *, SmallVector<VPValue *>> &Value2Lanes) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : RepR->operands()) {
@@ -478,6 +479,11 @@ static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder,
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
}
+ if (Value2Lanes.contains(Op)) {
+ NewOps.push_back(Value2Lanes[Op][Lane.getKnownLane()]);
+ continue;
+ }
+
// Look through buildvector to avoid unnecessary extracts.
if (match(Op, m_BuildVector())) {
NewOps.push_back(
@@ -510,6 +516,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
auto VPBBsToUnroll =
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
+ DenseMap<VPValue *, SmallVector<VPValue *>> Value2Lanes;
+ SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
@@ -521,12 +529,12 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
if (isa<StoreInst>(RepR->getUnderlyingInstr()) &&
vputils::isSingleScalar(RepR->getOperand(1))) {
// Stores to invariant addresses need to store the last lane only.
- cloneForLane(Plan, Builder, IdxTy, RepR,
- VPLane::getLastLaneForVF(VF));
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
+ Value2Lanes);
} else {
// Create single-scalar version of RepR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I));
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes);
}
RepR->eraseFromParent();
continue;
@@ -534,23 +542,28 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
/// Create single-scalar version of RepR for all lanes.
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
- LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)));
+ LaneDefs.push_back(
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes));
+ Value2Lanes[RepR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(RepR);
});
- // If needed, create a Build(Struct)Vector recipe to insert the scalar
- // lane values into a vector.
- Type *ResTy = RepR->getUnderlyingInstr()->getType();
- VPValue *VecRes = Builder.createNaryOp(
- ResTy->isStructTy() ? VPInstruction::BuildStructVector
- : VPInstruction::BuildVector,
- LaneDefs);
- RepR->replaceAllUsesWith(VecRes);
- RepR->eraseFromParent();
+ for (VPUser *U : to_vector(RepR->users())) {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
+ VPI->getOpcode() != VPInstruction::BuildStructVector))
+ continue;
+ VPI->setOperand(0, LaneDefs[0]);
+ for (VPValue *Def : drop_begin(LaneDefs))
+ VPI->addOperand(Def);
+ }
+ ToRemove.push_back(RepR);
}
}
+ for (auto *R : reverse(ToRemove))
+ R->eraseFromParent();
}
>From 66dea83df9e729b2db358592bd5be2201062c4e4 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Fri, 1 Aug 2025 20:57:15 +0100
Subject: [PATCH 2/5] !fixup add comment and assert
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 ++
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 ++
2 files changed, 4 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 9b51fcdf7ad8d..23ed7e570e01d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3188,6 +3188,8 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
vp_depth_first_shallow(Plan.getEntry()));
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()));
+ // Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
+ // excluding ones in replicate regions. Those are not unrolled explicitly yet.
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 7f23fb5b7d11a..2a3e30932f12e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -557,6 +557,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
VPI->getOpcode() != VPInstruction::BuildStructVector))
continue;
+ assert(VPI->getNumOperands() == 1 &&
+ "Build(Struct)Vector must have a single operand");
VPI->setOperand(0, LaneDefs[0]);
for (VPValue *Def : drop_begin(LaneDefs))
VPI->addOperand(Def);
>From 8ad1800c89a9625c13d6ff0567d7e1122dd31aae Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 9 Aug 2025 21:03:16 +0100
Subject: [PATCH 3/5] !fixup address comments, thnaks
---
.../Transforms/Vectorize/LoopVectorize.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VPlan.cpp | 5 +-
.../Transforms/Vectorize/VPlanTransforms.cpp | 49 +++++++++----------
.../Transforms/Vectorize/VPlanTransforms.h | 4 +-
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 26 ++++++----
5 files changed, 46 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1202130fd2377..35f072e18f7b5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7267,8 +7267,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
OrigLoop->getHeader()->getContext());
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan);
- VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
VPlanTransforms::runPass(VPlanTransforms::materializeBroadcasts, BestVPlan);
+ VPlanTransforms::runPass(VPlanTransforms::replicateByVF, BestVPlan, BestVF);
bool HasBranchWeights =
hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator());
if (HasBranchWeights) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index fdc5ddc254b19..d7d101e457dd3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -372,8 +372,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
set(Def, VectorValue);
} else {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- assert(isa<VPInstruction>(Def) && "Explicit BuildVector recipes must "
- "handle packing for non-VPInstructions.");
+ assert(isa<VPInstruction>(Def) &&
+ "Explicit BuildVector recipes must have"
+ "handled packing for non-VPInstructions.");
// Initialize packing with insertelements to start from poison.
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF));
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 94fba8ee396b1..c75ee526f1a41 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3289,38 +3289,37 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(LoopRegion->getEntry()));
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes,
- // excluding ones in replicate regions. Those are not unrolled explicitly yet.
+ // excluding ones in replicate regions. Those are not materialized explicitly
+ // yet. Those vector users are still handled in VPReplicateRegion::execute(),
+ // via shouldPack().
+ // TODO: materialize build vectors for replicating recipes in replicating
+ // regions.
+ // TODO: materialize build vectors for VPInstructions.
for (VPBasicBlock *VPBB :
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- if (!RepR || RepR->isSingleScalar())
- continue;
- VPInstruction *BuildVector = nullptr;
- for (VPUser *U : to_vector(RepR->users())) {
+ auto UsesVectorOrInsideReplicateRegions = [RepR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
- if (U->usesScalars(RepR) && ParentRegion == LoopRegion)
- continue;
-
- if (!BuildVector) {
- Type *ScalarTy = TypeInfo.inferScalarType(RepR);
- unsigned Opc = ScalarTy->isStructTy()
- ? VPInstruction::BuildStructVector
- : VPInstruction::BuildVector;
- BuildVector = new VPInstruction(Opc, {RepR});
- BuildVector->insertAfter(RepR);
- }
+ return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
+ };
+ if (!RepR || RepR->isSingleScalar() ||
+ none_of(RepR->users(), UsesVectorOrInsideReplicateRegions))
+ continue;
- // Only update a single operand per users, as the same user is added
- // multiple times, once per use.
- // TODO: Introduce de-duplicating iterator over users.
- for (unsigned Idx = 0; Idx != U->getNumOperands(); ++Idx)
- if (U->getOperand(Idx) == RepR) {
- U->setOperand(Idx, BuildVector);
- break;
- }
- }
+ Type *ScalarTy = TypeInfo.inferScalarType(RepR);
+ unsigned Opcode = ScalarTy->isStructTy()
+ ? VPInstruction::BuildStructVector
+ : VPInstruction::BuildVector;
+ auto *BuildVector = new VPInstruction(Opcode, {RepR});
+ BuildVector->insertAfter(RepR);
+
+ RepR->replaceUsesWithIf(
+ BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegions](
+ VPUser &U, unsigned) {
+ return &U != BuildVector && UsesVectorOrInsideReplicateRegions(&U);
+ });
}
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index e9b35d21d1d98..1e6f7acd03e11 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -276,8 +276,8 @@ struct VPlanTransforms {
static void materializeBackedgeTakenCount(VPlan &Plan,
VPBasicBlock *VectorPH);
- /// Add explicit Build[Struct]Vector recipes that combine scalar values
- /// produced by VPReplicateRecipes to a single vector.
+ /// Add explicit Build[Struct]Vector recipes that combine multiple scalar
+ /// values into single vectors.
static void materializeBuildVectors(VPlan &Plan);
/// Try to convert a plan with interleave groups with VF elements to a plan
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 26d11e8f99ce8..21db4f3b1baa4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -465,11 +465,12 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) {
VPlanTransforms::removeDeadRecipes(Plan);
}
-/// Create a single-scalar clone of \p RepR for lane \p Lane.
+/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p
+/// Def2LaneDefs to look up scalar definitions for operands of \RepR.
static VPReplicateRecipe *
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
VPReplicateRecipe *RepR, VPLane Lane,
- DenseMap<VPValue *, SmallVector<VPValue *>> &Value2Lanes) {
+ const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) {
// Collect the operands at Lane, creating extracts as needed.
SmallVector<VPValue *> NewOps;
for (VPValue *Op : RepR->operands()) {
@@ -482,8 +483,11 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy,
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op}));
continue;
}
- if (Value2Lanes.contains(Op)) {
- NewOps.push_back(Value2Lanes[Op][Lane.getKnownLane()]);
+ // If Op is a definition that has been unrolled, directly use the clone for
+ // the corresponding lane.
+ auto LaneDefs = Def2LaneDefs.find(Op);
+ if (LaneDefs != Def2LaneDefs.end()) {
+ NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]);
continue;
}
@@ -519,7 +523,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
auto VPBBsToUnroll =
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
- DenseMap<VPValue *, SmallVector<VPValue *>> Value2Lanes;
+ DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -533,11 +537,11 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
vputils::isSingleScalar(RepR->getOperand(1))) {
// Stores to invariant addresses need to store the last lane only.
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF),
- Value2Lanes);
+ Def2LaneDefs);
} else {
// Create single-scalar version of RepR for all lanes.
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes);
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs);
}
RepR->eraseFromParent();
continue;
@@ -546,22 +550,24 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
SmallVector<VPValue *> LaneDefs;
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I)
LaneDefs.push_back(
- cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Value2Lanes));
+ cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs));
- Value2Lanes[RepR] = LaneDefs;
+ Def2LaneDefs[RepR] = LaneDefs;
/// Users that only demand the first lane can use the definition for lane
/// 0.
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) {
return U.onlyFirstLaneUsed(RepR);
});
+ // Update each build vector user that currently has RepR as its only
+ // operand, to have all LaneDefs as its operands.
for (VPUser *U : to_vector(RepR->users())) {
auto *VPI = dyn_cast<VPInstruction>(U);
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector &&
VPI->getOpcode() != VPInstruction::BuildStructVector))
continue;
assert(VPI->getNumOperands() == 1 &&
- "Build(Struct)Vector must have a single operand");
+ "Build(Struct)Vector must have a single operand before replicating by VF"");
VPI->setOperand(0, LaneDefs[0]);
for (VPValue *Def : drop_begin(LaneDefs))
VPI->addOperand(Def);
>From d4994de1e8d689c52c4d3810d9ec8e6599279225 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 11 Aug 2025 09:51:37 +0100
Subject: [PATCH 4/5] !fixup remove stray " causing build failure
---
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 21db4f3b1baa4..95e69d01591e3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -567,7 +567,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
VPI->getOpcode() != VPInstruction::BuildStructVector))
continue;
assert(VPI->getNumOperands() == 1 &&
- "Build(Struct)Vector must have a single operand before replicating by VF"");
+ "Build(Struct)Vector must have a single operand before "
+ "replicating by VF");
VPI->setOperand(0, LaneDefs[0]);
for (VPValue *Def : drop_begin(LaneDefs))
VPI->addOperand(Def);
>From 31bbac2702c61f538671a212c3d2e8a23ceef5d2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 11 Aug 2025 10:52:44 +0100
Subject: [PATCH 5/5] !fixup address comments, thanks
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 8 ++++----
llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 9 +++++++--
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8c6d3a1c15ba3..ef59550cab8bc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3301,13 +3301,13 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- auto UsesVectorOrInsideReplicateRegions = [RepR, LoopRegion](VPUser *U) {
+ auto UsesVectorOrInsideReplicateRegion = [RepR, LoopRegion](VPUser *U) {
VPRegionBlock *ParentRegion =
cast<VPRecipeBase>(U)->getParent()->getParent();
return !U->usesScalars(RepR) || ParentRegion != LoopRegion;
};
if (!RepR || RepR->isSingleScalar() ||
- none_of(RepR->users(), UsesVectorOrInsideReplicateRegions))
+ none_of(RepR->users(), UsesVectorOrInsideReplicateRegion))
continue;
Type *ScalarTy = TypeInfo.inferScalarType(RepR);
@@ -3318,9 +3318,9 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) {
BuildVector->insertAfter(RepR);
RepR->replaceUsesWithIf(
- BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegions](
+ BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
VPUser &U, unsigned) {
- return &U != BuildVector && UsesVectorOrInsideReplicateRegions(&U);
+ return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
});
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index 95e69d01591e3..ff251fbe94f3c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -523,7 +523,12 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()));
auto VPBBsToUnroll =
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion);
+ // A mapping of current VPValue definitions to collections of new VPValues
+ // defined per lane. Serves to hook-up potential users of current VPValue
+ // definition that are replicated-per-VF later.
DenseMap<VPValue *, SmallVector<VPValue *>> Def2LaneDefs;
+ // The removal of current recipes being replaced by new ones needs to be
+ // delayed after Def2LaneDefs is no longer in use.
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBBsToUnroll) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
@@ -570,8 +575,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) {
"Build(Struct)Vector must have a single operand before "
"replicating by VF");
VPI->setOperand(0, LaneDefs[0]);
- for (VPValue *Def : drop_begin(LaneDefs))
- VPI->addOperand(Def);
+ for (VPValue *LaneDef : drop_begin(LaneDefs))
+ VPI->addOperand(LaneDef);
}
ToRemove.push_back(RepR);
}
More information about the llvm-commits
mailing list