[llvm] [VPlan] Factor collectGroupedReplicateMemOps (NFC) (PR #179506)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 3 06:11:10 PST 2026


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/179506

>From d3b8829f71559696557fd89f00f82ecbf08df447 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 3 Feb 2026 12:31:50 +0000
Subject: [PATCH 1/3] [VPlan] Factor collectGroupedMemOps for
 hoistInvariantLoads (NFCI)

Factor collectGroupedMemOps, enabling the reuse of
canHoistOrSinkWithNoAliasCheck in hoistInvariantLoads.
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |   1 +
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 137 +++++++++---------
 .../Transforms/Vectorize/VPlanTransforms.h    |   3 +-
 .../VPlan/vplan-print-after-all.ll            |   1 -
 4 files changed, 71 insertions(+), 71 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 36c8c0560c9eb..c548f3b07f22f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8131,6 +8131,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
     if (auto Plan = tryToBuildVPlanWithVPRecipes(
             std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
       // Now optimize the initial VPlan.
+      VPlanTransforms::hoistInvariantLoads(*Plan, PSE, OrigLoop);
       VPlanTransforms::hoistPredicatedLoads(*Plan, PSE, OrigLoop);
       VPlanTransforms::sinkPredicatedStores(*Plan, PSE, OrigLoop);
       RUN_VPLAN_PASS(VPlanTransforms::truncateToMinimalBitwidths, *Plan,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 877a8d77c810d..8019b6789c756 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -230,6 +230,46 @@ canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc,
   return true;
 }
 
+// Collect either Loads or Stores grouped by their address SCEV.
+template <unsigned Opcode>
+static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
+collectGroupedMemOps(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L,
+                     function_ref<bool(VPReplicateRecipe *)> FilterFn) {
+  static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
+                "Only Load and Store opcodes supported");
+  constexpr bool IsLoad = (Opcode == Instruction::Load);
+  SmallDenseMap<const SCEV *, SmallVector<VPReplicateRecipe *, 4>>
+      RecipesByAddress;
+  for (VPBlockBase *Block :
+       vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry())) {
+    auto *VPBB = cast<VPBasicBlock>(Block);
+    for (VPRecipeBase &R : *VPBB) {
+      auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+      if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
+        continue;
+
+      // For loads, operand 0 is address; for stores, operand 1 is address.
+      VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
+      const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, PSE, L);
+      if (!isa<SCEVCouldNotCompute>(AddrSCEV))
+        RecipesByAddress[AddrSCEV].push_back(RepR);
+    }
+  }
+  auto Groups = to_vector(RecipesByAddress.values());
+  VPDominatorTree VPDT(Plan);
+  for (auto &Group : Groups) {
+    // Sort mem ops by dominance order, with earliest (most dominating) first.
+    stable_sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+      return VPDT.properlyDominates(A, B);
+    });
+  }
+  // Sort groups by leader dominance order.
+  stable_sort(Groups, [&VPDT](auto A, auto B) {
+    return VPDT.properlyDominates(A[0], B[0]);
+  });
+  return Groups;
+}
+
 /// Return true if we do not know how to (mechanically) hoist or sink \p R out
 /// of a loop region.
 static bool cannotHoistOrSinkRecipe(const VPRecipeBase &R) {
@@ -2813,7 +2853,6 @@ void VPlanTransforms::optimize(VPlan &Plan) {
   RUN_VPLAN_PASS(removeDeadRecipes, Plan);
 
   RUN_VPLAN_PASS(createAndOptimizeReplicateRegions, Plan);
-  RUN_VPLAN_PASS(hoistInvariantLoads, Plan);
   RUN_VPLAN_PASS(mergeBlocksIntoPredecessors, Plan);
   RUN_VPLAN_PASS(licm, Plan);
 }
@@ -4596,51 +4635,31 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
   }
 }
 
-void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
-  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
-
-  // Collect candidate loads with invariant addresses and noalias scopes
-  // metadata and memory-writing recipes with noalias metadata.
-  SmallVector<std::pair<VPRecipeBase *, MemoryLocation>> CandidateLoads;
-  SmallVector<MemoryLocation> Stores;
-  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
-           vp_depth_first_shallow(LoopRegion->getEntry()))) {
-    for (VPRecipeBase &R : *VPBB) {
-      // Only handle single-scalar replicated loads with invariant addresses.
-      if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
-        if (RepR->isPredicated() || !RepR->isSingleScalar() ||
-            RepR->getOpcode() != Instruction::Load)
-          continue;
+void VPlanTransforms::hoistInvariantLoads(VPlan &Plan,
+                                          PredicatedScalarEvolution &PSE,
+                                          const Loop *L) {
+  VPBasicBlock *Preheader = Plan.getVectorPreheader();
+  auto IsInvariantLoad = [](VPReplicateRecipe *RepR) {
+    if (RepR->isPredicated() || !RepR->isSingleScalar() ||
+        RepR->getOpcode() != Instruction::Load)
+      return false;
+    VPValue *Addr = RepR->getOperand(0);
+    return Addr->isDefinedOutsideLoopRegions();
+  };
+  auto Groups =
+      collectGroupedMemOps<Instruction::Load>(Plan, PSE, L, IsInvariantLoad);
+  for (auto Group : Groups) {
+    VPReplicateRecipe *EarliestLoad = Group[0];
+    VPBasicBlock *FirstBB = EarliestLoad->getParent();
+    VPBasicBlock *LastBB = Group.back()->getParent();
 
-        VPValue *Addr = RepR->getOperand(0);
-        if (Addr->isDefinedOutsideLoopRegions()) {
-          MemoryLocation Loc = *vputils::getMemoryLocation(*RepR);
-          if (!Loc.AATags.Scope)
-            continue;
-          CandidateLoads.push_back({RepR, Loc});
-        }
-      }
-      if (R.mayWriteToMemory()) {
-        auto Loc = vputils::getMemoryLocation(R);
-        if (!Loc || !Loc->AATags.Scope || !Loc->AATags.NoAlias)
-          return;
-        Stores.push_back(*Loc);
-      }
-    }
-  }
+    // Check that the load doesn't alias with stores between FirstBB and LastBB.
+    auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
+    if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB))
+      continue;
 
-  VPBasicBlock *Preheader = Plan.getVectorPreheader();
-  for (auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
-    // Hoist the load to the preheader if it doesn't alias with any stores
-    // according to the noalias metadata. Other loads should have been hoisted
-    // by other passes
-    const AAMDNodes &LoadAA = LoadLoc.AATags;
-    if (all_of(Stores, [&](const MemoryLocation &StoreLoc) {
-          return !ScopedNoAliasAAResult::mayAliasInScopes(
-              LoadAA.Scope, StoreLoc.AATags.NoAlias);
-        })) {
-      LoadRecipe->moveBefore(*Preheader, Preheader->getFirstNonPhi());
-    }
+    for (VPReplicateRecipe *Load : Group)
+      Load->moveBefore(*Preheader, Preheader->getFirstNonPhi());
   }
 }
 
@@ -4661,33 +4680,18 @@ collectComplementaryPredicatedMemOps(VPlan &Plan,
   static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
                 "Only Load and Store opcodes supported");
   constexpr bool IsLoad = (Opcode == Instruction::Load);
-  VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
-  VPDominatorTree VPDT(Plan);
   VPTypeAnalysis TypeInfo(Plan);
 
-  // Group predicated operations by their address SCEV.
-  DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> RecipesByAddress;
-  for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
-    auto *VPBB = cast<VPBasicBlock>(Block);
-    for (VPRecipeBase &R : *VPBB) {
-      auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
-      if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
-        continue;
-
-      // For loads, operand 0 is address; for stores, operand 1 is address.
-      VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
-      const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, PSE, L);
-      if (!isa<SCEVCouldNotCompute>(AddrSCEV))
-        RecipesByAddress[AddrSCEV].push_back(RepR);
-    }
-  }
-
   // For each address, collect operations with the same or complementary masks.
   SmallVector<SmallVector<VPReplicateRecipe *, 4>> AllGroups;
   auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) {
     return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
   };
-  for (auto &[Addr, Recipes] : RecipesByAddress) {
+  auto Groups =
+      collectGroupedMemOps<Opcode>(Plan, PSE, L, [](VPReplicateRecipe *RepR) {
+        return RepR->isPredicated();
+      });
+  for (auto Recipes : Groups) {
     if (Recipes.size() < 2)
       continue;
 
@@ -4722,11 +4726,6 @@ collectComplementaryPredicatedMemOps(VPlan &Plan,
 
       if (HasComplementaryMask) {
         assert(Group.size() >= 2 && "must have at least 2 entries");
-        // Sort replicates by dominance order, with earliest (most dominating)
-        // first.
-        sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
-          return VPDT.properlyDominates(A, B);
-        });
         AllGroups.push_back(std::move(Group));
       }
     }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 16f7ae2daeb5e..23d42250598da 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -379,7 +379,8 @@ struct VPlanTransforms {
   /// Hoist single-scalar loads with invariant addresses out of the vector loop
   /// to the preheader, if they are proven not to alias with any stores in the
   /// plan using noalias metadata.
-  static void hoistInvariantLoads(VPlan &Plan);
+  static void hoistInvariantLoads(VPlan &Plan, PredicatedScalarEvolution &PSE,
+                                  const Loop *L);
 
   /// Hoist predicated loads from the same address to the loop entry block, if
   /// they are guaranteed to execute on both paths (i.e., in replicate regions
diff --git a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
index bc9367942ac27..a1369299897dd 100644
--- a/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
+++ b/llvm/test/Transforms/LoopVectorize/VPlan/vplan-print-after-all.ll
@@ -31,7 +31,6 @@
 ; CHECK: VPlan for loop in 'foo' after removeBranchOnConst
 ; CHECK: VPlan for loop in 'foo' after removeDeadRecipes
 ; CHECK: VPlan for loop in 'foo' after createAndOptimizeReplicateRegions
-; CHECK: VPlan for loop in 'foo' after hoistInvariantLoads
 ; CHECK: VPlan for loop in 'foo' after mergeBlocksIntoPredecessors
 ; CHECK: VPlan for loop in 'foo' after licm
 ; CHECK: VPlan for loop in 'foo' after VPlanTransforms::optimize

>From c02a55ca76ce0cca7cc1e8864ea38d1f1f659537 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 24 Feb 2026 17:25:58 +0000
Subject: [PATCH 2/3] [VPlan] Address aes's review

---
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 8019b6789c756..6e72a2d575da6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -233,8 +233,9 @@ canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc,
 // Collect either Loads or Stores grouped by their address SCEV.
 template <unsigned Opcode>
 static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
-collectGroupedMemOps(VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L,
-                     function_ref<bool(VPReplicateRecipe *)> FilterFn) {
+collectGroupedReplicateMemOps(
+    VPlan &Plan, PredicatedScalarEvolution &PSE, const Loop *L,
+    function_ref<bool(VPReplicateRecipe *)> FilterFn) {
   static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
                 "Only Load and Store opcodes supported");
   constexpr bool IsLoad = (Opcode == Instruction::Load);
@@ -4646,16 +4647,17 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan,
     VPValue *Addr = RepR->getOperand(0);
     return Addr->isDefinedOutsideLoopRegions();
   };
-  auto Groups =
-      collectGroupedMemOps<Instruction::Load>(Plan, PSE, L, IsInvariantLoad);
+  auto Groups = collectGroupedReplicateMemOps<Instruction::Load>(
+      Plan, PSE, L, IsInvariantLoad);
   for (auto Group : Groups) {
     VPReplicateRecipe *EarliestLoad = Group[0];
-    VPBasicBlock *FirstBB = EarliestLoad->getParent();
+    VPBasicBlock *EntryBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
     VPBasicBlock *LastBB = Group.back()->getParent();
 
-    // Check that the load doesn't alias with stores between FirstBB and LastBB.
+    // Check that the load doesn't alias with stores between EntryBB and
+    // LastBB.
     auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
-    if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB))
+    if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, EntryBB, LastBB))
       continue;
 
     for (VPReplicateRecipe *Load : Group)
@@ -4687,10 +4689,9 @@ collectComplementaryPredicatedMemOps(VPlan &Plan,
   auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) {
     return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
   };
-  auto Groups =
-      collectGroupedMemOps<Opcode>(Plan, PSE, L, [](VPReplicateRecipe *RepR) {
-        return RepR->isPredicated();
-      });
+  auto Groups = collectGroupedReplicateMemOps<Opcode>(
+      Plan, PSE, L,
+      [](VPReplicateRecipe *RepR) { return RepR->isPredicated(); });
   for (auto Recipes : Groups) {
     if (Recipes.size() < 2)
       continue;

>From 80ab493e0313de91484c83bed6ffb6a0a9fceb9b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 3 Mar 2026 14:03:19 +0000
Subject: [PATCH 3/3] [VPlan] Fix another thinko; patch is now really NFC

---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e72a2d575da6..3a22c6035695f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4652,12 +4652,13 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan,
   for (auto Group : Groups) {
     VPReplicateRecipe *EarliestLoad = Group[0];
     VPBasicBlock *EntryBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
-    VPBasicBlock *LastBB = Group.back()->getParent();
+    VPBasicBlock *ExitBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
 
-    // Check that the load doesn't alias with stores between EntryBB and
-    // LastBB.
+    // Check that the load doesn't alias with stores in the vector loop: if the
+    // load is before a store in the loop, we would need to re-load the value on
+    // each iteration.
     auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
-    if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, EntryBB, LastBB))
+    if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, EntryBB, ExitBB))
       continue;
 
     for (VPReplicateRecipe *Load : Group)



More information about the llvm-commits mailing list