[llvm] [VPlan] Sink predicated stores with complementary masks. (PR #168771)
Julian Nagele via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 28 08:10:16 PST 2025
================
@@ -4127,119 +4143,217 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
}
}
-// Returns the intersection of metadata from a group of loads.
-static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) {
- VPIRMetadata CommonMetadata = *Loads.front();
- for (VPReplicateRecipe *Load : drop_begin(Loads))
- CommonMetadata.intersect(*Load);
+// Collect common metadata from a group of replicate recipes by intersecting
+// metadata from all recipes in the group.
+static VPIRMetadata getCommonMetadata(ArrayRef<VPReplicateRecipe *> Recipes) {
+ VPIRMetadata CommonMetadata = *Recipes.front();
+ for (VPReplicateRecipe *Recipe : drop_begin(Recipes))
+ CommonMetadata.intersect(*Recipe);
return CommonMetadata;
}
-void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
- const Loop *L) {
+template <unsigned Opcode>
+static SmallVector<SmallVector<VPReplicateRecipe *, 4>>
+collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L) {
+ static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
+ "Only Load and Store opcodes supported");
+ constexpr bool IsLoad = (Opcode == Instruction::Load);
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
VPTypeAnalysis TypeInfo(Plan);
- VPDominatorTree VPDT(Plan);
- // Group predicated loads by their address SCEV.
- DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
+ // Group predicated operations by their address SCEV.
+ DenseMap<const SCEV *, SmallVector<VPReplicateRecipe *>> RecipesByAddress;
for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
auto *VPBB = cast<VPBasicBlock>(Block);
for (VPRecipeBase &R : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
- if (!RepR || RepR->getOpcode() != Instruction::Load ||
- !RepR->isPredicated())
+ if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
continue;
- VPValue *Addr = RepR->getOperand(0);
+ // For loads, operand 0 is address; for stores, operand 1 is address.
+ VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
if (!isa<SCEVCouldNotCompute>(AddrSCEV))
- LoadsByAddress[AddrSCEV].push_back(RepR);
+ RecipesByAddress[AddrSCEV].push_back(RepR);
}
}
- // For each address, collect loads with complementary masks, sort by
- // dominance, and use the earliest load.
- for (auto &[Addr, Loads] : LoadsByAddress) {
- if (Loads.size() < 2)
+ // For each address, collect operations with the same or complementary masks.
+ SmallVector<SmallVector<VPReplicateRecipe *, 4>> AllGroups;
+ auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) {
+ return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
+ };
+ for (auto &[Addr, Recipes] : RecipesByAddress) {
+ if (Recipes.size() < 2)
continue;
- // Collect groups of loads with complementary masks.
- SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups;
- for (VPReplicateRecipe *&LoadI : Loads) {
- if (!LoadI)
+ // Collect groups with the same or complementary masks.
+ for (VPReplicateRecipe *&RecipeI : Recipes) {
+ if (!RecipeI)
continue;
- VPValue *MaskI = LoadI->getMask();
- Type *TypeI = TypeInfo.inferScalarType(LoadI);
+ VPValue *MaskI = RecipeI->getMask();
+ Type *TypeI = GetLoadStoreValueType(RecipeI);
SmallVector<VPReplicateRecipe *, 4> Group;
- Group.push_back(LoadI);
- LoadI = nullptr;
+ Group.push_back(RecipeI);
+ RecipeI = nullptr;
- // Find all loads with the same type.
- for (VPReplicateRecipe *&LoadJ : Loads) {
- if (!LoadJ)
+ // Find all operations with the same or complementary masks.
+ bool HasComplementaryMask = false;
+ for (VPReplicateRecipe *&RecipeJ : Recipes) {
+ if (!RecipeJ)
continue;
- Type *TypeJ = TypeInfo.inferScalarType(LoadJ);
+ VPValue *MaskJ = RecipeJ->getMask();
+ Type *TypeJ = GetLoadStoreValueType(RecipeJ);
if (TypeI == TypeJ) {
- Group.push_back(LoadJ);
- LoadJ = nullptr;
+ // Check if any operation in the group has a complementary mask with
+ // another, that is M1 == NOT(M2) or M2 == NOT(M1).
+ HasComplementaryMask |= match(MaskI, m_Not(m_Specific(MaskJ))) ||
+ match(MaskJ, m_Not(m_Specific(MaskI)));
+ Group.push_back(RecipeJ);
+ RecipeJ = nullptr;
}
}
- // Check if any load in the group has a complementary mask with another,
- // that is M1 == NOT(M2) or M2 == NOT(M1).
- bool HasComplementaryMask =
- any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) {
- VPValue *MaskJ = Load->getMask();
- return match(MaskI, m_Not(m_Specific(MaskJ))) ||
- match(MaskJ, m_Not(m_Specific(MaskI)));
- });
+ if (HasComplementaryMask) {
+ assert(Group.size() >= 2 && "must have at least 2 entries");
+ AllGroups.push_back(std::move(Group));
+ }
+ }
+ }
+
+ return AllGroups;
+}
+
+// Find the recipe with minimum alignment in the group.
+template <typename InstType>
+static VPReplicateRecipe *
+findRecipeWithMinAlign(ArrayRef<VPReplicateRecipe *> Group) {
+ return *min_element(Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return cast<InstType>(A->getUnderlyingInstr())->getAlign() <
+ cast<InstType>(B->getUnderlyingInstr())->getAlign();
+ });
+}
+
+void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L) {
+ auto Groups =
+ collectComplementaryPredicatedMemOps<Instruction::Load>(Plan, SE, L);
+ if (Groups.empty())
+ return;
+
+ VPDominatorTree VPDT(Plan);
- if (HasComplementaryMask)
- LoadGroups.push_back(std::move(Group));
+ // Process each group of loads.
+ for (auto &Group : Groups) {
+ // Sort loads by dominance order, with earliest (most dominating) first.
+ sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return VPDT.properlyDominates(A, B);
+ });
+
+ // Try to use the earliest (most dominating) load to replace all others.
+ VPReplicateRecipe *EarliestLoad = Group[0];
+ VPBasicBlock *FirstBB = EarliestLoad->getParent();
+ VPBasicBlock *LastBB = Group.back()->getParent();
+
+ // Check that the load doesn't alias with stores between first and last.
+ auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad);
+ if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB,
+ /*CheckReads=*/false))
+ continue;
+
+ // Collect common metadata from all loads in the group.
+ VPIRMetadata CommonMetadata = getCommonMetadata(Group);
+
+ // Find the load with minimum alignment to use.
+ auto *LoadWithMinAlign = findRecipeWithMinAlign<LoadInst>(Group);
+
+ // Create an unpredicated version of the earliest load with common
+ // metadata.
+ auto *UnpredicatedLoad = new VPReplicateRecipe(
+ LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
+ /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad,
+ CommonMetadata);
+
+ UnpredicatedLoad->insertBefore(EarliestLoad);
+
+ // Replace all loads in the group with the unpredicated load.
+ for (VPReplicateRecipe *Load : Group) {
+ Load->replaceAllUsesWith(UnpredicatedLoad);
+ Load->eraseFromParent();
}
+ }
+}
- // For each group, check memory dependencies and hoist the earliest load.
- for (auto &Group : LoadGroups) {
- // Sort loads by dominance order, with earliest (most dominating) first.
- sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
- return VPDT.properlyDominates(A, B);
- });
+static bool
+canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
+ auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front());
+ if (!StoreLoc || !StoreLoc->AATags.Scope)
+ return false;
- VPReplicateRecipe *EarliestLoad = Group.front();
- VPBasicBlock *FirstBB = EarliestLoad->getParent();
- VPBasicBlock *LastBB = Group.back()->getParent();
+ // When sinking a group of stores, all members of the group alias each other.
+ // Skip them during the alias checks.
----------------
juliannagele wrote:
```suggestion
// Skip them during the alias checks.
```
https://github.com/llvm/llvm-project/pull/168771
More information about the llvm-commits
mailing list