[llvm] [SLP][NFC] Redesign schedule bundle, separate from schedule data, NFC (PR #131625)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 04:20:33 PDT 2025
================
@@ -18324,182 +18506,199 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
}
if (NextLoadStore) {
if (CurrentLoadStore)
- CurrentLoadStore->NextLoadStore = NextLoadStore;
+ CurrentLoadStore->setNextLoadStore(NextLoadStore);
} else {
LastLoadStoreInRegion = CurrentLoadStore;
}
}
-void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
+void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
bool InsertInReadyList,
BoUpSLP *SLP) {
- assert(SD->isSchedulingEntity());
+ SmallVector<ScheduleData *> WorkList;
+ auto ProcessNode = [&](ScheduleData *BundleMember) {
+ assert(!BundleMember->hasValidDependencies() && "invalid deps expected.");
+ BundleMember->initDependencies();
+ BundleMember->resetUnscheduledDeps();
+ // Handle def-use chain dependencies.
+ for (User *U : BundleMember->getInst()->users()) {
+ if (ScheduleData *UseSD = getScheduleData(U)) {
+ BundleMember->incDependencies();
+ if (!UseSD->isScheduled())
+ BundleMember->incrementUnscheduledDeps(1);
+ WorkList.push_back(UseSD);
+ }
+ }
- SmallVector<ScheduleData *, 10> WorkList;
- WorkList.push_back(SD);
+ auto MakeControlDependent = [&](Instruction *I) {
+ auto *DepDest = getScheduleData(I);
+ assert(DepDest && "must be in schedule window");
+ DepDest->addControlDependency(BundleMember);
+ BundleMember->incDependencies();
+ if (!DepDest->isScheduled())
+ BundleMember->incrementUnscheduledDeps(1);
+ WorkList.push_back(DepDest);
+ };
- while (!WorkList.empty()) {
- ScheduleData *SD = WorkList.pop_back_val();
- for (ScheduleData *BundleMember = SD; BundleMember;
- BundleMember = BundleMember->NextInBundle) {
- assert(isInSchedulingRegion(BundleMember));
- if (BundleMember->hasValidDependencies())
- continue;
+ // Any instruction which isn't safe to speculate at the beginning of the
+ // block is control depend on any early exit or non-willreturn call
+ // which proceeds it.
+ if (!isGuaranteedToTransferExecutionToSuccessor(BundleMember->getInst())) {
+ for (Instruction *I = BundleMember->getInst()->getNextNode();
+ I != ScheduleEnd; I = I->getNextNode()) {
+ if (isSafeToSpeculativelyExecute(I, &*BB->begin(), SLP->AC))
+ continue;
- LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember
- << "\n");
- BundleMember->Dependencies = 0;
- BundleMember->resetUnscheduledDeps();
-
- // Handle def-use chain dependencies.
- for (User *U : BundleMember->Inst->users()) {
- if (ScheduleData *UseSD = getScheduleData(cast<Instruction>(U))) {
- BundleMember->Dependencies++;
- ScheduleData *DestBundle = UseSD->FirstInBundle;
- if (!DestBundle->IsScheduled)
- BundleMember->incrementUnscheduledDeps(1);
- if (!DestBundle->hasValidDependencies())
- WorkList.push_back(DestBundle);
- }
- }
+ // Add the dependency
+ MakeControlDependent(I);
- auto MakeControlDependent = [&](Instruction *I) {
- auto *DepDest = getScheduleData(I);
- assert(DepDest && "must be in schedule window");
- DepDest->ControlDependencies.push_back(BundleMember);
- BundleMember->Dependencies++;
- ScheduleData *DestBundle = DepDest->FirstInBundle;
- if (!DestBundle->IsScheduled)
- BundleMember->incrementUnscheduledDeps(1);
- if (!DestBundle->hasValidDependencies())
- WorkList.push_back(DestBundle);
- };
+ if (!isGuaranteedToTransferExecutionToSuccessor(I))
+ // Everything past here must be control dependent on I.
+ break;
+ }
+ }
- // Any instruction which isn't safe to speculate at the beginning of the
- // block is control dependend on any early exit or non-willreturn call
- // which proceeds it.
- if (!isGuaranteedToTransferExecutionToSuccessor(BundleMember->Inst)) {
- for (Instruction *I = BundleMember->Inst->getNextNode();
+ if (RegionHasStackSave) {
+ // If we have an inalloc alloca instruction, it needs to be scheduled
+ // after any preceeding stacksave. We also need to prevent any alloca
+ // from reordering above a preceeding stackrestore.
+ if (match(BundleMember->getInst(), m_Intrinsic<Intrinsic::stacksave>()) ||
+ match(BundleMember->getInst(),
+ m_Intrinsic<Intrinsic::stackrestore>())) {
+ for (Instruction *I = BundleMember->getInst()->getNextNode();
I != ScheduleEnd; I = I->getNextNode()) {
- if (isSafeToSpeculativelyExecute(I, &*BB->begin(), SLP->AC))
+ if (match(I, m_Intrinsic<Intrinsic::stacksave>()) ||
+ match(I, m_Intrinsic<Intrinsic::stackrestore>()))
+ // Any allocas past here must be control dependent on I, and I
+ // must be memory dependend on BundleMember->Inst.
+ break;
+
+ if (!isa<AllocaInst>(I))
continue;
// Add the dependency
MakeControlDependent(I);
-
- if (!isGuaranteedToTransferExecutionToSuccessor(I))
- // Everything past here must be control dependent on I.
- break;
}
}
- if (RegionHasStackSave) {
- // If we have an inalloc alloca instruction, it needs to be scheduled
- // after any preceeding stacksave. We also need to prevent any alloca
- // from reordering above a preceeding stackrestore.
- if (match(BundleMember->Inst, m_Intrinsic<Intrinsic::stacksave>()) ||
- match(BundleMember->Inst, m_Intrinsic<Intrinsic::stackrestore>())) {
- for (Instruction *I = BundleMember->Inst->getNextNode();
- I != ScheduleEnd; I = I->getNextNode()) {
- if (match(I, m_Intrinsic<Intrinsic::stacksave>()) ||
- match(I, m_Intrinsic<Intrinsic::stackrestore>()))
- // Any allocas past here must be control dependent on I, and I
- // must be memory dependend on BundleMember->Inst.
- break;
-
- if (!isa<AllocaInst>(I))
- continue;
+ // In addition to the cases handle just above, we need to prevent
+ // allocas and loads/stores from moving below a stacksave or a
+ // stackrestore. Avoiding moving allocas below stackrestore is currently
+ // thought to be conservatism. Moving loads/stores below a stackrestore
+ // can lead to incorrect code.
+ if (isa<AllocaInst>(BundleMember->getInst()) ||
+ BundleMember->getInst()->mayReadOrWriteMemory()) {
+ for (Instruction *I = BundleMember->getInst()->getNextNode();
+ I != ScheduleEnd; I = I->getNextNode()) {
+ if (!match(I, m_Intrinsic<Intrinsic::stacksave>()) &&
+ !match(I, m_Intrinsic<Intrinsic::stackrestore>()))
+ continue;
- // Add the dependency
- MakeControlDependent(I);
- }
+ // Add the dependency
+ MakeControlDependent(I);
+ break;
}
+ }
+ }
- // In addition to the cases handle just above, we need to prevent
- // allocas and loads/stores from moving below a stacksave or a
- // stackrestore. Avoiding moving allocas below stackrestore is currently
- // thought to be conservatism. Moving loads/stores below a stackrestore
- // can lead to incorrect code.
- if (isa<AllocaInst>(BundleMember->Inst) ||
- BundleMember->Inst->mayReadOrWriteMemory()) {
- for (Instruction *I = BundleMember->Inst->getNextNode();
- I != ScheduleEnd; I = I->getNextNode()) {
- if (!match(I, m_Intrinsic<Intrinsic::stacksave>()) &&
- !match(I, m_Intrinsic<Intrinsic::stackrestore>()))
- continue;
-
- // Add the dependency
- MakeControlDependent(I);
- break;
- }
- }
+ // Handle the memory dependencies (if any).
+ ScheduleData *NextLoadStore = BundleMember->getNextLoadStore();
+ if (!NextLoadStore)
+ return;
+ Instruction *SrcInst = BundleMember->getInst();
+ assert(SrcInst->mayReadOrWriteMemory() &&
+ "NextLoadStore list for non memory effecting bundle?");
+ MemoryLocation SrcLoc = getLocation(SrcInst);
+ bool SrcMayWrite = SrcInst->mayWriteToMemory();
+ unsigned NumAliased = 0;
+ unsigned DistToSrc = 1;
+ bool IsNonSimpleSrc = !SrcLoc.Ptr || !isSimple(SrcInst);
+
+ for (ScheduleData *DepDest = NextLoadStore; DepDest;
+ DepDest = DepDest->getNextLoadStore()) {
+ assert(isInSchedulingRegion(DepDest) && "Expected to be in region");
+
+ // We have two limits to reduce the complexity:
+ // 1) AliasedCheckLimit: It's a small limit to reduce calls to
+ // SLP->isAliased (which is the expensive part in this loop).
+ // 2) MaxMemDepDistance: It's for very large blocks and it aborts
+ // the whole loop (even if the loop is fast, it's quadratic).
+ // It's important for the loop break condition (see below) to
+ // check this limit even between two read-only instructions.
+ if (DistToSrc >= MaxMemDepDistance ||
+ ((SrcMayWrite || DepDest->getInst()->mayWriteToMemory()) &&
+ (IsNonSimpleSrc || NumAliased >= AliasedCheckLimit ||
+ SLP->isAliased(SrcLoc, SrcInst, DepDest->getInst())))) {
+
+ // We increment the counter only if the locations are aliased
+ // (instead of counting all alias checks). This gives a better
+ // balance between reduced runtime and accurate dependencies.
+ NumAliased++;
+
+ DepDest->addMemoryDependency(BundleMember);
+ BundleMember->incDependencies();
+ if (!DepDest->isScheduled())
+ BundleMember->incrementUnscheduledDeps(1);
+ WorkList.push_back(DepDest);
}
- // Handle the memory dependencies (if any).
- ScheduleData *DepDest = BundleMember->NextLoadStore;
- if (!DepDest)
- continue;
- Instruction *SrcInst = BundleMember->Inst;
- assert(SrcInst->mayReadOrWriteMemory() &&
- "NextLoadStore list for non memory effecting bundle?");
- MemoryLocation SrcLoc = getLocation(SrcInst);
- bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
- unsigned NumAliased = 0;
- unsigned DistToSrc = 1;
-
- for (; DepDest; DepDest = DepDest->NextLoadStore) {
- assert(isInSchedulingRegion(DepDest));
-
- // We have two limits to reduce the complexity:
- // 1) AliasedCheckLimit: It's a small limit to reduce calls to
- // SLP->isAliased (which is the expensive part in this loop).
- // 2) MaxMemDepDistance: It's for very large blocks and it aborts
- // the whole loop (even if the loop is fast, it's quadratic).
- // It's important for the loop break condition (see below) to
- // check this limit even between two read-only instructions.
- if (DistToSrc >= MaxMemDepDistance ||
- ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
- (NumAliased >= AliasedCheckLimit ||
- SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
-
- // We increment the counter only if the locations are aliased
- // (instead of counting all alias checks). This gives a better
- // balance between reduced runtime and accurate dependencies.
- NumAliased++;
-
- DepDest->MemoryDependencies.push_back(BundleMember);
- BundleMember->Dependencies++;
- ScheduleData *DestBundle = DepDest->FirstInBundle;
- if (!DestBundle->IsScheduled) {
- BundleMember->incrementUnscheduledDeps(1);
- }
- if (!DestBundle->hasValidDependencies()) {
- WorkList.push_back(DestBundle);
- }
- }
+ // Example, explaining the loop break condition: Let's assume our
+ // starting instruction is i0 and MaxMemDepDistance = 3.
+ //
+ // +--------v--v--v
+ // i0,i1,i2,i3,i4,i5,i6,i7,i8
+ // +--------^--^--^
+ //
+ // MaxMemDepDistance let us stop alias-checking at i3 and we add
+ // dependencies from i0 to i3,i4,.. (even if they are not aliased).
+ // Previously we already added dependencies from i3 to i6,i7,i8
+ // (because of MaxMemDepDistance). As we added a dependency from
+ // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
+ // and we can abort this loop at i6.
+ if (DistToSrc >= 2 * MaxMemDepDistance)
+ break;
+ DistToSrc++;
+ }
+ };
- // Example, explaining the loop break condition: Let's assume our
- // starting instruction is i0 and MaxMemDepDistance = 3.
- //
- // +--------v--v--v
- // i0,i1,i2,i3,i4,i5,i6,i7,i8
- // +--------^--^--^
- //
- // MaxMemDepDistance let us stop alias-checking at i3 and we add
- // dependencies from i0 to i3,i4,.. (even if they are not aliased).
- // Previously we already added dependencies from i3 to i6,i7,i8
- // (because of MaxMemDepDistance). As we added a dependency from
- // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
- // and we can abort this loop at i6.
- if (DistToSrc >= 2 * MaxMemDepDistance)
- break;
- DistToSrc++;
+ WorkList.push_back(Bundle.getBundle().front());
+ SmallPtrSet<ScheduleBundle *, 16> Visited;
+ while (!WorkList.empty()) {
+ ScheduleData *SD = WorkList.pop_back_val();
+ ArrayRef<ScheduleBundle *> Bundles = getScheduleBundles(SD->getInst());
+ if (!Bundles.empty()) {
+ for (ScheduleBundle *Bundle : Bundles) {
+ if (!Visited.insert(Bundle).second || Bundle->hasValidDependencies())
+ continue;
+ assert(isInSchedulingRegion(*Bundle) &&
+ "ScheduleData not in scheduling region");
+ for (ScheduleData *BundleMember : Bundle->getBundle()) {
+ if (BundleMember->hasValidDependencies())
+ continue;
+ LLVM_DEBUG(dbgs()
+ << "SLP: update deps of " << *BundleMember << "\n");
+ ProcessNode(BundleMember);
----------------
alexey-bataev wrote:
Not so much, and the code is pretty simple
https://github.com/llvm/llvm-project/pull/131625
More information about the llvm-commits
mailing list