[llvm] [VPlan] Factor collectGroupedReplicateMemOps (NFC) (PR #179506)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 06:11:10 PST 2026
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/179506
>From d3b8829f71559696557fd89f00f82ecbf08df447 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 3 Feb 2026 12:31:50 +0000
Subject: [PATCH 1/3] [VPlan] Factor collectGroupedMemOps for
hoistInvariantLoads (NFCI)
Factor collectGroupedMemOps, enabling the reuse of
canHoistOrSinkWithNoAliasCheck in hoistInvariantLoads.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 1 +
.../Transforms/Vectorize/VPlanTransforms.cpp | 137 +++++++++---------
.../Transforms/Vectorize/VPlanTransforms.h | 3 +-
.../VPlan/vplan-print-after-all.ll | 1 -
4 files changed, 71 insertions(+), 71 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 36c8c0560c9eb..c548f3b07f22f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8131,6 +8131,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
if (auto Plan = tryToBuildVPlanWithVPRecipes(
std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
// Now optimize the initial VPlan.
+ VPlanTransforms::hoistInvariantLoads(*Plan, PSE, OrigLoop);
VPlanTransforms::hoistPredicatedLoads(*Plan, PSE, OrigLoop);
VPlanTransforms::sinkPredicatedStores(*Plan, PSE, OrigLoop);
RUN_VPLAN_PASS(VPlanTransforms::truncateToMinimalBitwidths, *Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 877a8d77c810d..8019b6789c756 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -230,6 +230,46 @@ canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc,
return true;
}
+// Collect either Loads or Stores grouped by their address SCEV.
+template <unsigned Opcode>
+static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
+collectGroupedMemOps(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L,
+ function_ref<bool(VPReplicateRecipe *)> FilterFn) {
+ static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
+ "Only Load and Store opcodes supported");
+ constexpr bool IsLoad = (Opcode == Instruction::Load);
+ SmallDenseMap<const SCEV *, SmallVector<VPReplicateRecipe *, 4>>
+ RecipesByAddress;
+ for (VPBlockBase *Block :
+ vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+ auto *VPBB = cast<VPBasicBlock>(Block);
+ for (VPRecipeBase &R : *VPBB) {
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
+ continue;
+
+ // For loads, operand 0 is address; for stores, operand 1 is address.
+ VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, PSE, L);
+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
+ RecipesByAddress[AddrSCEV].push_back(RepR);
+ }
+ }
+ auto Groups = to_vector(RecipesByAddress.values());
+ VPDominatorTree VPDT(Plan);
+ for (auto &Group : Groups) {
+ // Sort mem ops by dominance order, with earliest (most dominating) first.
+ stable_sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return VPDT.properlyDominates(A, B);
+ });
+ }
+ // Sort groups by leader dominance order.
+ stable_sort(Groups, [&VPDT](auto A, auto B) {
+ return VPDT.properlyDominates(A[0], B[0]);
+ });
+ return Groups;
+}
+
/// Return true if we do not know how to (mechanically) hoist or sink \p R out
/// of a loop region.
static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
@@ -2813,7 +2853,6 @@ void VPlanTransforms::optimize(VPlan &Plan) {
RUN_VPLAN_PASS(removeDeadRecipes, Plan);
RUN_VPLAN_PASS(createAndOptimizeReplicateRegions, Plan);
- RUN_VPLAN_PASS(hoistInvariantLoads, Plan);
RUN_VPLAN_PASS(mergeBlocksIntoPredecessors, Plan);
RUN_VPLAN_PASS(licm, Plan);
}
@@ -4596,51 +4635,31 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
}
}
-void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
- VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
-
- // Collect candidate loads with invariant addresses and noalias scopes
- // metadata and memory-writing recipes with noalias metadata.
- SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
- SmallVector<MemoryLocation> Stores;
- for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
- vp_depth_first_shallow(LoopRegion->getEntry()))) {
- for (VPRecipeBase &R : *VPBB) {
- // Only handle single-scalar replicated loads with invariant addresses.
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
- if (RepR->isPredicated() || !RepR->isSingleScalar() ||
- RepR->getOpcode() != Instruction::Load)
- continue;
+void VPlanTransforms::hoistInvariantLoads(VPlan &Plan,
+ PredicatedScalarEvolution &PSE,
+ const Loop *L) {
+ VPBasicBlock *Preheader = Plan.getVectorPreheader();
+ auto IsInvariantLoad = [](VPReplicateRecipe *RepR) {
+ if (RepR->isPredicated() || !RepR->isSingleScalar() ||
+ RepR->getOpcode() != Instruction::Load)
+ return false;
+ VPValue *Addr = RepR->getOperand(0);
+ return Addr->isDefinedOutsideLoopRegions();
+ };
+ auto Groups =
+ collectGroupedMemOps<Instruction::Load>(Plan, PSE, L, IsInvariantLoad);
+ for (auto Group : Groups) {
+ VPReplicateRecipe *EarliestLoad = Group[0];
+ VPBasicBlock *FirstBB = EarliestLoad->getParent();
+ VPBasicBlock *LastBB = Group.back()->getParent();
- VPValue *Addr = RepR->getOperand(0);
- if (Addr->isDefinedOutsideLoopRegions()) {
- MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
- if (!Loc.AATags.Scope)
- continue;
- CandidateLoads.push_back({RepR, Loc});
- }
- }
- if (R.mayWriteToMemory()) {
- auto Loc = vputils::getMemoryLocation(R);
- if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
- return;
- Stores.push_back(*Loc);
- }
- }
- }
+ // Check that the load doesn't alias with stores between FirstBB and LastBB.
+ auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
+ if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB))
+ continue;
- VPBasicBlock *Preheader = Plan.getVectorPreheader();
- for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
- // Hoist the load to the preheader if it doesn't alias with any stores
- // according to the noalias metadata. Other loads should have been hoisted
- // by other passes
- const AAMDNodes &LoadAA = LoadLoc.AATags;
- if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
- return !ScopedNoAliasAAResult::mayAliasInScopes(
- LoadAA.Scope, StoreLoc.AATags.NoAlias);
- })) {
- LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
- }
+ for (VPReplicateRecipe *Load : Group)
+ Load->moveBefore(*Preheader, Preheader->getFirstNonPhi());
}
}
@@ -4661,33 +4680,18 @@ collectComplementaryPredicatedMemOps(VPlan &Plan,
static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
"Only Load and Store opcodes supported");
constexpr bool IsLoad = (Opcode == Instruction::Load);
- VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
- VPDominatorTree VPDT(Plan);
VPTypeAnalysis TypeInfo(Plan);
- // Group predicated operations by their address SCEV.
- DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> RecipesByAddress;
- for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
- auto *VPBB = cast<VPBasicBlock>(Block);
- for (VPRecipeBase &R : *VPBB) {
- auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
- continue;
-
- // For loads, operand 0 is address; for stores, operand 1 is address.
- VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
- const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, PSE, L);
- if (!isa<SCEVCouldNotCompute>(AddrSCEV))
- RecipesByAddress[AddrSCEV].push_back(RepR);
- }
- }
-
// For each address, collect operations with the same or complementary masks.
SmallVector<SmallVector<VPReplicateRecipe *, 4>> AllGroups;
auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) {
return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
};
- for (auto &[Addr, Recipes] : RecipesByAddress) {
+ auto Groups =
+ collectGroupedMemOps<Opcode>(Plan, PSE, L, [](VPReplicateRecipe *RepR) {
+ return RepR->isPredicated();
+ });
+ for (auto Recipes : Groups) {
if (Recipes.size() < 2)
continue;
@@ -4722,11 +4726,6 @@ collectComplementaryPredicatedMemOps(VPlan &Plan,
if (HasComplementaryMask) {
assert(Group.size() >= 2 && "must have at least 2 entries");
- // Sort replicates by dominance order, with earliest (most dominating)
- // first.
- sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
- return VPDT.properlyDominates(A, B);
- });
AllGroups.push_back(std::move(Group));
}
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 16f7ae2daeb5e..23d42250598da 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -379,7 +379,8 @@ struct VPlanTransforms {
/// Hoist single-scalar loads with invariant addresses out of the vector loop
/// to the preheader, if they are proven not to alias with any stores in the
/// plan using noalias metadata.
- static void hoistInvariantLoads(VPlan &Plan);
+ static void hoistInvariantLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
+ const Loop *L);
/// Hoist predicated loads from the same address to the loop entry block, if
/// they are guaranteed to execute on both paths (i.e., in replicate regions
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
index bc9367942ac27..a1369299897dd 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
@@ -31,7 +31,6 @@
; CHECK: VPlan for loop in 'foo' after removeBranchOnConst
; CHECK: VPlan for loop in 'foo' after removeDeadRecipes
; CHECK: VPlan for loop in 'foo' after createAndOptimizeReplicateRegions
-; CHECK: VPlan for loop in 'foo' after hoistInvariantLoads
; CHECK: VPlan for loop in 'foo' after mergeBlocksIntoPredecessors
; CHECK: VPlan for loop in 'foo' after licm
; CHECK: VPlan for loop in 'foo' after VPlanTransforms::optimize
>From c02a55ca76ce0cca7cc1e8864ea38d1f1f659537 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 24 Feb 2026 17:25:58 +0000
Subject: [PATCH 2/3] [VPlan] Address aes's review
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 23 ++++++++++---------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8019b6789c756..6e72a2d575da6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -233,8 +233,9 @@ canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc,
// Collect either Loads or Stores grouped by their address SCEV.
template <unsigned Opcode>
static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
-collectGroupedMemOps(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L,
- function_ref<bool(VPReplicateRecipe *)> FilterFn) {
+collectGroupedReplicateMemOps(
+ VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L,
+ function_ref<bool(VPReplicateRecipe *)> FilterFn) {
static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
"Only Load and Store opcodes supported");
constexpr bool IsLoad = (Opcode == Instruction::Load);
@@ -4646,16 +4647,17 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan,
VPValue *Addr = RepR->getOperand(0);
return Addr->isDefinedOutsideLoopRegions();
};
- auto Groups =
- collectGroupedMemOps<Instruction::Load>(Plan, PSE, L, IsInvariantLoad);
+ auto Groups = collectGroupedReplicateMemOps<Instruction::Load>(
+ Plan, PSE, L, IsInvariantLoad);
for (auto Group : Groups) {
VPReplicateRecipe *EarliestLoad = Group[0];
- VPBasicBlock *FirstBB = EarliestLoad->getParent();
+ VPBasicBlock *EntryBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPBasicBlock *LastBB = Group.back()->getParent();
- // Check that the load doesn't alias with stores between FirstBB and LastBB.
+ // Check that the load doesn't alias with stores between EntryBB and
+ // LastBB.
auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
- if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB))
+ if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, EntryBB, LastBB))
continue;
for (VPReplicateRecipe *Load : Group)
@@ -4687,10 +4689,9 @@ collectComplementaryPredicatedMemOps(VPlan &Plan,
auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) {
return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
};
- auto Groups =
- collectGroupedMemOps<Opcode>(Plan, PSE, L, [](VPReplicateRecipe *RepR) {
- return RepR->isPredicated();
- });
+ auto Groups = collectGroupedReplicateMemOps<Opcode>(
+ Plan, PSE, L,
+ [](VPReplicateRecipe *RepR) { return RepR->isPredicated(); });
for (auto Recipes : Groups) {
if (Recipes.size() < 2)
continue;
>From 80ab493e0313de91484c83bed6ffb6a0a9fceb9b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 3 Mar 2026 14:03:19 +0000
Subject: [PATCH 3/3] [VPlan] Fix another thinko; patch is now really NFC
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e72a2d575da6..3a22c6035695f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4652,12 +4652,13 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan,
for (auto Group : Groups) {
VPReplicateRecipe *EarliestLoad = Group[0];
VPBasicBlock *EntryBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
- VPBasicBlock *LastBB = Group.back()->getParent();
+ VPBasicBlock *ExitBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
- // Check that the load doesn't alias with stores between EntryBB and
- // LastBB.
+ // Check that the load doesn't alias with stores in the vector loop: if the
+ // load is before a store in the loop, we would need to re-load the value on
+ // each iteration.
auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
- if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, EntryBB, LastBB))
+ if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, EntryBB, ExitBB))
continue;
for (VPReplicateRecipe *Load : Group)
More information about the llvm-commits
mailing list