[llvm] [MachinePipeliner] Fix constraints aren't considered in certain cases (PR #95356)
Ryotaro KASUGA via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 12 22:15:09 PDT 2024
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/95356
when scheduling
When scheduling an instruction, if both any predecessors and any successors of the instruction are already scheduled, `SchedStart` isn't taken into account. It may result generating incorrect code. This patch fixes the problem. Also, this patch merges `SchedStart` into `EarlyStart` (same for `SchedEnd`).
I checked the schedule changes due to this patch in llvm-test-suite. In conclusion, as far as I observed, there was no change in most cases. Just in case, details of the results are as follows.
Compile options:
```
-O3 \
-mcpu=neoverse-v1 \
-mllvm -aarch64-enable-pipeliner \
-mllvm -pipeliner-max-stages=1000 \
-mllvm -pipeliner-max-mii=1000 \
-mllvm -pipeliner-enable-copytophi=0 \
-mllvm -pipeliner-ii-search-range=100 \
-mllvm -enable-misched=0 \
-mllvm -enable-post-misched=0
```
Observed changes:
| Test | Summary |
|-|-|
| MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 | The order of some instructions was changed |
| MultiSource/Benchmarks/TSVC/CrossingThresholds-flt/CrossingThresholds-flt | The order of some instructions was changed |
| MultiSource/Benchmarks/ASC_Sequoia/AMGmk/AMGmk | The order of some instructions was changed |
>From e3e0d5919d230f873f7a5d5f6444fb4e9e398637 Mon Sep 17 00:00:00 2001
From: "ryotaro.kasuga" <kasuga.ryotaro at jp.fujitsu.com>
Date: Fri, 7 Jun 2024 06:00:05 +0000
Subject: [PATCH] [MachinePipeliner] Fix constraints aren't considered in
certain cases when scheduling
When scheduling an instruction, if both any predecessors and any
successors of the instruction are already scheduled, `SchedStart` isn't
taken into account. It may result generating incorrect code. This patch
fixes the problem.
---
llvm/include/llvm/CodeGen/MachinePipeliner.h | 7 +-
llvm/lib/CodeGen/MachinePipeliner.cpp | 77 ++++++++++++--------
2 files changed, 50 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 8f0a17cf99967..50df76db2d98d 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -594,8 +594,8 @@ class SMSchedule {
/// chain.
int latestCycleInChain(const SDep &Dep);
- void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
- int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
+ void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
+ SwingSchedulerDAG *DAG);
bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
/// Iterators for the cycle to instruction map.
@@ -653,6 +653,9 @@ class SMSchedule {
bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const;
bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def,
MachineOperand &MO) const;
+
+ bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
+ SwingSchedulerDAG *DAG) const;
void print(raw_ostream &os) const;
void dump() const;
};
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6c24cfca793fc..e1639f7d4f72c 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -2420,47 +2420,48 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
// upon the scheduled time for any predecessors/successors.
int EarlyStart = INT_MIN;
int LateStart = INT_MAX;
- // These values are set when the size of the schedule window is limited
- // due to chain dependences.
- int SchedEnd = INT_MAX;
- int SchedStart = INT_MIN;
- Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
- II, this);
+ Schedule.computeStart(SU, &EarlyStart, &LateStart, II, this);
LLVM_DEBUG({
dbgs() << "\n";
dbgs() << "Inst (" << SU->NodeNum << ") ";
SU->getInstr()->dump();
dbgs() << "\n";
});
- LLVM_DEBUG({
- dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
- LateStart, SchedEnd, SchedStart);
- });
+ LLVM_DEBUG(
+ dbgs() << format("\tes: %8x ls: %8x\n", EarlyStart, LateStart));
- if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
- SchedStart > LateStart)
+ if (EarlyStart > LateStart)
scheduleFound = false;
- else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
- SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
- scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
- } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
- SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
- scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
- } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
- SchedEnd =
- std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
- // When scheduling a Phi it is better to start at the late cycle and go
- // backwards. The default order may insert the Phi too far away from
- // its first dependence.
- if (SU->getInstr()->isPHI())
- scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
- else
- scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+ else if (EarlyStart != INT_MIN && LateStart == INT_MAX)
+ scheduleFound =
+ Schedule.insert(SU, EarlyStart, EarlyStart + (int)II - 1, II);
+ else if (EarlyStart == INT_MIN && LateStart != INT_MAX)
+ scheduleFound =
+ Schedule.insert(SU, LateStart, LateStart - (int)II + 1, II);
+ else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
+ // To keep original behaviour, start scheduling at the late cycle and go
+ // backwards when all scheduled predecessors are loop-carried
+ // output/order dependencies. Empirically, there are also cases where
+ // scheduling becomes possible with backward search.
+ if (Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this)) {
+ EarlyStart = std::max(EarlyStart, LateStart - (int)II + 1);
+ scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);
+ } else {
+ LateStart = std::min(LateStart, EarlyStart + (int)II - 1);
+ // When scheduling a Phi it is better to start at the late cycle and
+ // go backwards. The default order may insert the Phi too far away
+ // from its first dependence.
+ if (SU->getInstr()->isPHI())
+ scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);
+ else
+ scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);
+ }
} else {
int FirstCycle = Schedule.getFirstCycle();
scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
FirstCycle + getASAP(SU) + II - 1, II);
}
+
// Even if we find a schedule, make sure the schedule doesn't exceed the
// allowable number of stages. We keep trying if this happens.
if (scheduleFound)
@@ -2868,8 +2869,7 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
/// Compute the scheduling start slot for the instruction. The start slot
/// depends on any predecessor or successor nodes scheduled already.
void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
- int *MinEnd, int *MaxStart, int II,
- SwingSchedulerDAG *DAG) {
+ int II, SwingSchedulerDAG *DAG) {
// Iterate over each instruction that has been scheduled already. The start
// slot computation depends on whether the previously scheduled instruction
// is a predecessor or successor of the specified instruction.
@@ -2888,7 +2888,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
*MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
if (DAG->isLoopCarriedDep(SU, Dep, false)) {
int End = earliestCycleInChain(Dep) + (II - 1);
- *MinEnd = std::min(*MinEnd, End);
+ *MinLateStart = std::min(*MinLateStart, End);
}
} else {
int LateStart = cycle - Dep.getLatency() +
@@ -2912,7 +2912,7 @@ void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
*MinLateStart = std::min(*MinLateStart, LateStart);
if (DAG->isLoopCarriedDep(SU, Dep)) {
int Start = latestCycleInChain(Dep) + 1 - II;
- *MaxStart = std::max(*MaxStart, Start);
+ *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
}
} else {
int EarlyStart = cycle + Dep.getLatency() -
@@ -3105,6 +3105,19 @@ bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,
return false;
}
+/// Return true if all scheduled predecessors are loop-carried output/order
+/// dependencies.
+bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds(
+ SUnit *SU, SwingSchedulerDAG *DAG) const {
+ for (const SDep &Pred : SU->Preds)
+ if (InstrToCycle.count(Pred.getSUnit()) && !DAG->isBackedge(SU, Pred))
+ return false;
+ for (const SDep &Succ : SU->Succs)
+ if (InstrToCycle.count(Succ.getSUnit()) && DAG->isBackedge(SU, Succ))
+ return false;
+ return true;
+}
+
/// Determine transitive dependences of unpipelineable instructions
SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
More information about the llvm-commits
mailing list