[llvm] [VPlan] Hoist predicated loads with complementary masks. (PR #168373)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 21 03:36:28 PST 2025
================
@@ -3974,6 +3974,152 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) {
}
}
+// Returns the intersection of metadata from a group of loads.
+static VPIRMetadata getCommonLoadMetadata(ArrayRef<VPReplicateRecipe *> Loads) {
+ VPIRMetadata CommonMetadata = *Loads.front();
+ for (VPReplicateRecipe *Load : drop_begin(Loads))
+ CommonMetadata.intersect(*Load);
+ return CommonMetadata;
+}
+
+// Check if a load can be hoisted by verifying it doesn't alias with any stores
+// in blocks between FirstBB and LastBB using scoped noalias metadata.
+static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load,
+ VPBasicBlock *FirstBB,
+ VPBasicBlock *LastBB) {
+ // Get the load's memory location and check if it aliases with any stores
+ // using scoped noalias metadata.
+ auto LoadLoc = vputils::getMemoryLocation(*Load);
+ if (!LoadLoc || !LoadLoc->AATags.Scope)
+ return false;
+
+ const AAMDNodes &LoadAA = LoadLoc->AATags;
+ for (VPBlockBase *Block = FirstBB; Block;
+ Block = Block->getSingleSuccessor()) {
+ // This function assumes a simple linear chain of blocks. If there are
+ // multiple successors, we would need more complex analysis.
+ assert(Block->getNumSuccessors() <= 1 &&
+ "Expected at most one successor in block chain");
+ auto *VPBB = cast<VPBasicBlock>(Block);
+ for (VPRecipeBase &R : *VPBB) {
+ if (R.mayWriteToMemory()) {
+ auto Loc = vputils::getMemoryLocation(R);
+ // Bail out if we can't get the location or if the scoped noalias
+ // metadata indicates potential aliasing.
+ if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes(
+ LoadAA.Scope, Loc->AATags.NoAlias))
+ return false;
+ }
+ }
+
+ if (Block == LastBB)
+ break;
+ }
+ return true;
+}
+
+void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
+ const Loop *L) {
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPTypeAnalysis TypeInfo(Plan);
+ VPDominatorTree VPDT(Plan);
+
+ // Group predicated loads by their address SCEV.
+ MapVector<const SCEV *, SmallVector<VPReplicateRecipe *>> LoadsByAddress;
+ for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) {
+ auto *VPBB = cast<VPBasicBlock>(Block);
+ for (VPRecipeBase &R : *VPBB) {
+ auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
+ if (!RepR || RepR->getOpcode() != Instruction::Load ||
+ !RepR->isPredicated())
+ continue;
+
+ VPValue *Addr = RepR->getOperand(0);
+ const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L);
+ if (!isa<SCEVCouldNotCompute>(AddrSCEV))
+ LoadsByAddress[AddrSCEV].push_back(RepR);
+ }
+ }
+
+ // For each address, collect loads with complementary masks, sort by
+ // dominance, and use the earliest load.
+ for (auto &[Addr, Loads] : LoadsByAddress) {
+ if (Loads.size() < 2)
+ continue;
+
+ // Collect groups of loads with complementary masks.
+ SmallVector<SmallVector<VPReplicateRecipe *, 4>> LoadGroups;
+ for (VPReplicateRecipe *&LoadI : Loads) {
+ if (!LoadI)
+ continue;
+
+ VPValue *MaskI = LoadI->getMask();
+ Type *TypeI = TypeInfo.inferScalarType(LoadI);
+ SmallVector<VPReplicateRecipe *, 4> Group;
+ Group.push_back(LoadI);
+ LoadI = nullptr;
+
+ // Find all loads with the same type.
+ for (VPReplicateRecipe *&LoadJ : Loads) {
+ if (!LoadJ)
+ continue;
+
+ Type *TypeJ = TypeInfo.inferScalarType(LoadJ);
+ if (TypeI == TypeJ) {
+ Group.push_back(LoadJ);
+ LoadJ = nullptr;
+ }
+ }
+
+ // Check if any load in the group has a complementary mask with another,
+ // that is M1 == NOT(M2) or M2 == NOT(M1).
+ bool HasComplementaryMask =
+ any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) {
+ VPValue *MaskJ = Load->getMask();
+ return match(MaskI, m_Not(m_Specific(MaskJ))) ||
+ match(MaskJ, m_Not(m_Specific(MaskI)));
+ });
+
+ if (HasComplementaryMask)
+ LoadGroups.push_back(std::move(Group));
+ }
+
+ // For each group, check memory dependencies and hoist the earliest load.
+ for (auto &Group : LoadGroups) {
+ // Sort loads by dominance order, with earliest (most dominating) first.
+ sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
+ return VPDT.properlyDominates(A, B);
+ });
+
+ VPReplicateRecipe *EarliestLoad = Group.front();
+ VPBasicBlock *FirstBB = EarliestLoad->getParent();
+ VPBasicBlock *LastBB = Group.back()->getParent();
+
+ // Check that the load doesn't alias with stores between first and last.
+ if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB))
+ continue;
+
+ // Collect common metadata from all loads in the group.
+ VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group);
+
+ // Create an unpredicated version of the earliest load with common
+ // metadata.
+ auto *UnpredicatedLoad = new VPReplicateRecipe(
+ EarliestLoad->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
----------------
artagnon wrote:
```suggestion
EarliestLoad->getUnderlyingInstr(), EarliestLoad->operands(),
```
https://github.com/llvm/llvm-project/pull/168373
More information about the llvm-commits
mailing list