[llvm] [MachinePipeliner] Add an abstract layer to manipulate Data Dependenc… (PR #109918)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 23 15:19:01 PST 2024
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/109918
>From 65c5f46434d3c3ebd3b6664b0008f07f849bf9b2 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 25 Sep 2024 15:42:04 +0900
Subject: [PATCH 1/5] [MachinePipeliner] Add an abstract layer to manipulate
Data Dependence Graph
In MachinePipeliner, a DAG class is used to represent the Data
Dependence Graph. Data Dependence Graph generally contains cycles, so
it's not appropriate to use DAG classes. In fact, some "hacks" are used
to express back-edges in the current implementation. This patch adds a
new class to provide a better interface for manipulating dependencies.
Our approach is as follows:
- To build the graph, we use the ScheduleDAGInstrs class as it is,
because it has powerful functions and the current implementation
depends heavily on it.
- After the graph construction is finished (i.e., during scheduling), we
use the new class DataDependenceGraph to manipulate the dependencies.
Since we don't change the dependencies during scheduling, the new class
only provides functions to read them. Also, this patch is only a
refactoring, i.e., scheduling results should not change with or without
this patch.
---
llvm/include/llvm/CodeGen/MachinePipeliner.h | 162 ++++--
llvm/lib/CodeGen/MachinePipeliner.cpp | 521 ++++++++++---------
2 files changed, 421 insertions(+), 262 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 0cc862590d0c02..ebeb933f8c8508 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -40,7 +40,9 @@
#ifndef LLVM_CODEGEN_MACHINEPIPELINER_H
#define LLVM_CODEGEN_MACHINEPIPELINER_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -59,6 +61,8 @@ namespace llvm {
class AAResults;
class NodeSet;
class SMSchedule;
+class SwingSchedulerDDGEdge;
+class SwingSchedulerDDG;
extern cl::opt<bool> SwpEnableCopyToPhi;
extern cl::opt<int> SwpForceIssueWidth;
@@ -114,10 +118,123 @@ class MachinePipeliner : public MachineFunctionPass {
bool useWindowScheduler(bool Changed);
};
+/// Represents a dependnece between two instruction.
+class SwingSchedulerDDGEdge {
+ SUnit *Dst = nullptr;
+ SDep Pred;
+ unsigned Distance = 0;
+
+public:
+ /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
+ /// \p Dep in the original DAG. This pair has no information about the
+ /// direction of the edge, so we need to pass an additional argument \p
+ /// IsSucc.
+ SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)
+ : Dst(PredOrSucc), Pred(Dep), Distance(0u) {
+ SUnit *Src = Dep.getSUnit();
+
+ if (IsSucc) {
+ std::swap(Src, Dst);
+ Pred.setSUnit(Src);
+ }
+
+ // An anti-dependence to PHI means loop-carried dependence.
+ if (Pred.getKind() == SDep::Anti && Src->getInstr()->isPHI()) {
+ Distance = 1;
+ std::swap(Src, Dst);
+ auto Reg = Pred.getReg();
+ Pred = SDep(Src, SDep::Kind::Data, Reg);
+ }
+ }
+
+ /// Returns the SUnit from which the edge comes (source node).
+ SUnit *getSrc() const { return Pred.getSUnit(); }
+
+ /// Returns the SUnit to which the edge points (destination node).
+ SUnit *getDst() const { return Dst; }
+
+ /// Returns the latency value for the edge.
+ unsigned getLatency() const { return Pred.getLatency(); }
+
+ /// Sets the latency for the edge.
+ void setLatency(unsigned Latency) { Pred.setLatency(Latency); }
+
+ /// Returns the distance value for the edge.
+ unsigned getDistance() const { return Distance; }
+
+ /// Sets the distance value for the edge.
+ void setDistance(unsigned D) { Distance = D; }
+
+ /// Returns the register associated with the edge.
+ Register getReg() const { return Pred.getReg(); }
+
+ /// Returns true if the edge represents anti dependence.
+ bool isAntiDep() const { return Pred.getKind() == SDep::Kind::Anti; }
+
+ /// Returns true if the edge represents output dependence.
+ bool isOutputDep() const { return Pred.getKind() == SDep::Kind::Output; }
+
+ /// Returns true if the edge represents a dependence that is not data, anti or
+ /// output dependence.
+ bool isOrderDep() const { return Pred.getKind() == SDep::Kind::Order; }
+
+ /// Returns true if the edge represents unknown scheduling barrier.
+ bool isBarrier() const { return Pred.isBarrier(); }
+
+ /// Returns true if the edge represents an artificial dependence.
+ bool isArtificial() const { return Pred.isArtificial(); }
+
+ /// Tests if this is a Data dependence that is associated with a register.
+ bool isAssignedRegDep() const { return Pred.isAssignedRegDep(); }
+
+ /// Returns true for DDG nodes that we ignore when computing the cost
+ /// functions. We ignore the back-edge recurrence in order to avoid unbounded
+ /// recursion in the calculation of the ASAP, ALAP, etc functions.
+ bool ignoreDependence(bool IgnoreAnti) const;
+};
+
+/// Represents dependencies between instructions. This class is a wrapper of
+/// `SUnits` and its dependencies to manipulate back-edges in a natural way.
+/// Currently it only supports back-edges via PHI, which are expressed as
+/// anti-dependencies in the original DAG.
+/// FIXME: Support any other loop-carried dependencies
+class SwingSchedulerDDG {
+ using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>;
+
+ struct SwingSchedulerDDGEdges {
+ EdgesType Preds;
+ EdgesType Succs;
+ };
+
+ void initEdges(SUnit *SU);
+
+ SUnit *EntrySU;
+ SUnit *ExitSU;
+
+ std::vector<SwingSchedulerDDGEdges> EdgesVec;
+ SwingSchedulerDDGEdges EntrySUEdges;
+ SwingSchedulerDDGEdges ExitSUEdges;
+
+ void addEdge(SUnit *SU, const SwingSchedulerDDGEdge &Edge);
+
+ SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
+ const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
+
+public:
+ SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU);
+
+ const EdgesType &getInEdges(const SUnit *SU) const;
+
+ const EdgesType &getOutEdges(const SUnit *SU) const;
+};
+
/// This class builds the dependence graph for the instructions in a loop,
/// and attempts to schedule the instructions using the SMS algorithm.
class SwingSchedulerDAG : public ScheduleDAGInstrs {
MachinePipeliner &Pass;
+
+ std::unique_ptr<SwingSchedulerDDG> DDG;
+
/// The minimum initiation interval between iterations for this schedule.
unsigned MII = 0;
/// The maximum initiation interval between iterations for this schedule.
@@ -130,7 +247,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
unsigned II_setByPragma = 0;
TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
- /// A toplogical ordering of the SUnits, which is needed for changing
+ /// A topological ordering of the SUnits, which is needed for changing
/// dependences and iterating over the SUnits.
ScheduleDAGTopologicalSort Topo;
@@ -252,27 +369,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
}
- /// Return true if the dependence is a back-edge in the data dependence graph.
- /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
- /// using an anti dependence from a Phi to an instruction.
- bool isBackedge(SUnit *Source, const SDep &Dep) {
- if (Dep.getKind() != SDep::Anti)
- return false;
- return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
- }
-
- bool isLoopCarriedDep(SUnit *Source, const SDep &Dep,
- bool isSucc = true) const;
-
- /// The distance function, which indicates that operation V of iteration I
- /// depends on operations U of iteration I-distance.
- unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
- // Instructions that feed a Phi have a distance of 1. Computing larger
- // values for arrays requires data dependence information.
- if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
- return 1;
- return 0;
- }
+ bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
@@ -294,6 +391,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
static bool classof(const ScheduleDAGInstrs *DAG) { return true; }
+ const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
+
private:
void addLoopCarriedDependences(AAResults *AA);
void updatePhiDependences();
@@ -357,6 +456,7 @@ class NodeSet {
//
// Hold a map from each SUnit in the circle to the maximum distance from the
// source node by only considering the nodes.
+ const SwingSchedulerDDG *DDG = DAG->getDDG();
DenseMap<SUnit *, unsigned> SUnitToDistance;
for (auto *Node : Nodes)
SUnitToDistance[Node] = 0;
@@ -364,8 +464,8 @@ class NodeSet {
for (unsigned I = 1, E = Nodes.size(); I <= E; ++I) {
SUnit *U = Nodes[I - 1];
SUnit *V = Nodes[I % Nodes.size()];
- for (const SDep &Succ : U->Succs) {
- SUnit *SuccSUnit = Succ.getSUnit();
+ for (const SwingSchedulerDDGEdge &Succ : DDG->getOutEdges(U)) {
+ SUnit *SuccSUnit = Succ.getDst();
if (V != SuccSUnit)
continue;
if (SUnitToDistance[U] + Succ.getLatency() > SUnitToDistance[V]) {
@@ -377,13 +477,13 @@ class NodeSet {
SUnit *FirstNode = Nodes[0];
SUnit *LastNode = Nodes[Nodes.size() - 1];
- for (auto &PI : LastNode->Preds) {
+ for (auto &PI : DDG->getInEdges(LastNode)) {
// If we have an order dep that is potentially loop carried then a
// back-edge exists between the last node and the first node that isn't
// modeled in the DAG. Handle it manually by adding 1 to the distance of
// the last node.
- if (PI.getSUnit() != FirstNode || PI.getKind() != SDep::Order ||
- !DAG->isLoopCarriedDep(LastNode, PI, false))
+ if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
+ !DAG->isLoopCarriedDep(PI))
continue;
SUnitToDistance[FirstNode] =
std::max(SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1);
@@ -627,11 +727,13 @@ class SMSchedule {
/// Return the cycle of the earliest scheduled instruction in the dependence
/// chain.
- int earliestCycleInChain(const SDep &Dep);
+ int earliestCycleInChain(const SwingSchedulerDDGEdge &Dep,
+ const SwingSchedulerDDG *DDG);
/// Return the cycle of the latest scheduled instruction in the dependence
/// chain.
- int latestCycleInChain(const SDep &Dep);
+ int latestCycleInChain(const SwingSchedulerDDGEdge &Dep,
+ const SwingSchedulerDDG *DDG);
void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, int II,
SwingSchedulerDAG *DAG);
@@ -694,7 +796,7 @@ class SMSchedule {
MachineOperand &MO) const;
bool onlyHasLoopCarriedOutputOrOrderPreds(SUnit *SU,
- SwingSchedulerDAG *DAG) const;
+ const SwingSchedulerDDG *DDG) const;
void print(raw_ostream &os) const;
void dump() const;
};
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index b7d03a10266b0b..b7203358a026fd 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -567,6 +567,7 @@ void SwingSchedulerDAG::schedule() {
Topo.InitDAGTopologicalSorting();
changeDependences();
postProcessDAG();
+ DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU);
LLVM_DEBUG(dump());
NodeSetType NodeSets;
@@ -1583,29 +1584,6 @@ unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
return RecMII;
}
-/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
-/// but we do this to find the circuits, and then change them back.
-static void swapAntiDependences(std::vector<SUnit> &SUnits) {
- SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
- for (SUnit &SU : SUnits) {
- for (SDep &Pred : SU.Preds)
- if (Pred.getKind() == SDep::Anti)
- DepsAdded.push_back(std::make_pair(&SU, Pred));
- }
- for (std::pair<SUnit *, SDep> &P : DepsAdded) {
- // Remove this anti dependency and add one in the reverse direction.
- SUnit *SU = P.first;
- SDep &D = P.second;
- SUnit *TargetSU = D.getSUnit();
- unsigned Reg = D.getReg();
- unsigned Lat = D.getLatency();
- SU->removePred(D);
- SDep Dep(SU, SDep::Anti, Reg);
- Dep.setLatency(Lat);
- TargetSU->addPred(Dep);
- }
-}
-
/// Create the adjacency structure of the nodes in the graph.
void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
SwingSchedulerDAG *DAG) {
@@ -1614,11 +1592,11 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
for (int i = 0, e = SUnits.size(); i != e; ++i) {
Added.reset();
// Add any successor to the adjacency matrix and exclude duplicates.
- for (auto &SI : SUnits[i].Succs) {
+ for (auto &OE : DAG->DDG->getOutEdges(&SUnits[i])) {
// Only create a back-edge on the first and last nodes of a dependence
// chain. This records any chains and adds them later.
- if (SI.getKind() == SDep::Output) {
- int N = SI.getSUnit()->NodeNum;
+ if (OE.isOutputDep()) {
+ int N = OE.getDst()->NodeNum;
int BackEdge = i;
auto Dep = OutputDeps.find(BackEdge);
if (Dep != OutputDeps.end()) {
@@ -1628,11 +1606,15 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
OutputDeps[N] = BackEdge;
}
// Do not process a boundary node, an artificial node.
- // A back-edge is processed only if it goes to a Phi.
- if (SI.getSUnit()->isBoundaryNode() || SI.isArtificial() ||
- (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
+ if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
+ continue;
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on performance
+ if (OE.isAntiDep())
continue;
- int N = SI.getSUnit()->NodeNum;
+
+ int N = OE.getDst()->NodeNum;
if (!Added.test(N)) {
AdjK[i].push_back(N);
Added.set(N);
@@ -1640,12 +1622,13 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
}
// A chain edge between a store and a load is treated as a back-edge in the
// adjacency matrix.
- for (auto &PI : SUnits[i].Preds) {
- if (!SUnits[i].getInstr()->mayStore() ||
- !DAG->isLoopCarriedDep(&SUnits[i], PI, false))
+ for (auto &IE : DAG->DDG->getInEdges(&SUnits[i])) {
+ SUnit *Src = IE.getSrc();
+ SUnit *Dst = IE.getDst();
+ if (!Dst->getInstr()->mayStore() || !DAG->isLoopCarriedDep(IE))
continue;
- if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
- int N = PI.getSUnit()->NodeNum;
+ if (IE.isOrderDep() && Src->getInstr()->mayLoad()) {
+ int N = Src->NodeNum;
if (!Added.test(N)) {
AdjK[i].push_back(N);
Added.set(N);
@@ -1720,10 +1703,6 @@ void SwingSchedulerDAG::Circuits::unblock(int U) {
/// Identify all the elementary circuits in the dependence graph using
/// Johnson's circuit algorithm.
void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
- // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
- // but we do this to find the circuits, and then change them back.
- swapAntiDependences(SUnits);
-
Circuits Cir(SUnits, Topo);
// Create the adjacency structure.
Cir.createAdjacencyStructure(this);
@@ -1731,9 +1710,6 @@ void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
Cir.reset();
Cir.circuit(I, I, NodeSets, this);
}
-
- // Change the dependences back so that we've created a DAG again.
- swapAntiDependences(SUnits);
}
// Create artificial dependencies between the source of COPY/REG_SEQUENCE that
@@ -1816,15 +1792,6 @@ void SwingSchedulerDAG::CopyToPhiMutation::apply(ScheduleDAGInstrs *DAG) {
}
}
-/// Return true for DAG nodes that we ignore when computing the cost functions.
-/// We ignore the back-edge recurrence in order to avoid unbounded recursion
-/// in the calculation of the ASAP, ALAP, etc functions.
-static bool ignoreDependence(const SDep &D, bool isPred) {
- if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
- return true;
- return D.getKind() == SDep::Anti && isPred;
-}
-
/// Compute several functions need to order the nodes for scheduling.
/// ASAP - Earliest time to schedule a node.
/// ALAP - Latest time to schedule a node.
@@ -1847,15 +1814,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
int asap = 0;
int zeroLatencyDepth = 0;
SUnit *SU = &SUnits[I];
- for (const SDep &P : SU->Preds) {
- SUnit *pred = P.getSUnit();
- if (P.getLatency() == 0)
+ for (const auto &IE : DDG->getInEdges(SU)) {
+ SUnit *Pred = IE.getSrc();
+ if (IE.getLatency() == 0)
zeroLatencyDepth =
- std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
- if (ignoreDependence(P, true))
+ std::max(zeroLatencyDepth, getZeroLatencyDepth(Pred) + 1);
+ if (IE.ignoreDependence(true))
continue;
- asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
- getDistance(pred, SU, P) * MII));
+ asap = std::max(asap, (int)(getASAP(Pred) + IE.getLatency() -
+ IE.getDistance() * MII));
}
maxASAP = std::max(maxASAP, asap);
ScheduleInfo[I].ASAP = asap;
@@ -1867,17 +1834,17 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
int alap = maxASAP;
int zeroLatencyHeight = 0;
SUnit *SU = &SUnits[I];
- for (const SDep &S : SU->Succs) {
- SUnit *succ = S.getSUnit();
- if (succ->isBoundaryNode())
+ for (const auto &OE : DDG->getOutEdges(SU)) {
+ SUnit *Succ = OE.getDst();
+ if (Succ->isBoundaryNode())
continue;
- if (S.getLatency() == 0)
+ if (OE.getLatency() == 0)
zeroLatencyHeight =
- std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
- if (ignoreDependence(S, true))
+ std::max(zeroLatencyHeight, getZeroLatencyHeight(Succ) + 1);
+ if (OE.ignoreDependence(true))
continue;
- alap = std::min(alap, (int)(getALAP(succ) - S.getLatency() +
- getDistance(SU, succ, S) * MII));
+ alap = std::min(alap, (int)(getALAP(Succ) - OE.getLatency() +
+ OE.getDistance() * MII));
}
ScheduleInfo[I].ALAP = alap;
@@ -1906,26 +1873,31 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
/// as the predecessors of the elements of NodeOrder that are not also in
/// NodeOrder.
static bool pred_L(SetVector<SUnit *> &NodeOrder,
- SmallSetVector<SUnit *, 8> &Preds,
+ SmallSetVector<SUnit *, 8> &Preds, SwingSchedulerDDG *DDG,
const NodeSet *S = nullptr) {
Preds.clear();
- for (const SUnit *SU : NodeOrder) {
- for (const SDep &Pred : SU->Preds) {
- if (S && S->count(Pred.getSUnit()) == 0)
+
+ for (SUnit *SU : NodeOrder) {
+ for (const auto &IE : DDG->getInEdges(SU)) {
+ SUnit *PredSU = IE.getSrc();
+ if (S && S->count(PredSU) == 0)
continue;
- if (ignoreDependence(Pred, true))
+ if (IE.ignoreDependence(true))
continue;
- if (NodeOrder.count(Pred.getSUnit()) == 0)
- Preds.insert(Pred.getSUnit());
+ if (NodeOrder.count(PredSU) == 0)
+ Preds.insert(PredSU);
}
- // Back-edges are predecessors with an anti-dependence.
- for (const SDep &Succ : SU->Succs) {
- if (Succ.getKind() != SDep::Anti)
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on performance
+ for (const auto &OE : DDG->getOutEdges(SU)) {
+ SUnit *SuccSU = OE.getDst();
+ if (!OE.isAntiDep())
continue;
- if (S && S->count(Succ.getSUnit()) == 0)
+ if (S && S->count(SuccSU) == 0)
continue;
- if (NodeOrder.count(Succ.getSUnit()) == 0)
- Preds.insert(Succ.getSUnit());
+ if (NodeOrder.count(SuccSU) == 0)
+ Preds.insert(SuccSU);
}
}
return !Preds.empty();
@@ -1935,25 +1907,31 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
/// as the successors of the elements of NodeOrder that are not also in
/// NodeOrder.
static bool succ_L(SetVector<SUnit *> &NodeOrder,
- SmallSetVector<SUnit *, 8> &Succs,
+ SmallSetVector<SUnit *, 8> &Succs, SwingSchedulerDDG *DDG,
const NodeSet *S = nullptr) {
Succs.clear();
- for (const SUnit *SU : NodeOrder) {
- for (const SDep &Succ : SU->Succs) {
- if (S && S->count(Succ.getSUnit()) == 0)
+
+ for (SUnit *SU : NodeOrder) {
+ for (const auto &OE : DDG->getOutEdges(SU)) {
+ SUnit *SuccSU = OE.getDst();
+ if (S && S->count(SuccSU) == 0)
continue;
- if (ignoreDependence(Succ, false))
+ if (OE.ignoreDependence(false))
continue;
- if (NodeOrder.count(Succ.getSUnit()) == 0)
- Succs.insert(Succ.getSUnit());
+ if (NodeOrder.count(SuccSU) == 0)
+ Succs.insert(SuccSU);
}
- for (const SDep &Pred : SU->Preds) {
- if (Pred.getKind() != SDep::Anti)
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on performance
+ for (const auto &IE : DDG->getInEdges(SU)) {
+ SUnit *PredSU = IE.getSrc();
+ if (!IE.isAntiDep())
continue;
- if (S && S->count(Pred.getSUnit()) == 0)
+ if (S && S->count(PredSU) == 0)
continue;
- if (NodeOrder.count(Pred.getSUnit()) == 0)
- Succs.insert(Pred.getSUnit());
+ if (NodeOrder.count(PredSU) == 0)
+ Succs.insert(PredSU);
}
}
return !Succs.empty();
@@ -1964,7 +1942,8 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
SetVector<SUnit *> &DestNodes,
SetVector<SUnit *> &Exclude,
- SmallPtrSet<SUnit *, 8> &Visited) {
+ SmallPtrSet<SUnit *, 8> &Visited,
+ SwingSchedulerDDG *DDG) {
if (Cur->isBoundaryNode())
return false;
if (Exclude.contains(Cur))
@@ -1974,14 +1953,14 @@ static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
if (!Visited.insert(Cur).second)
return Path.contains(Cur);
bool FoundPath = false;
- for (auto &SI : Cur->Succs)
- if (!ignoreDependence(SI, false))
+ for (const auto &OE : DDG->getOutEdges(Cur))
+ if (!OE.ignoreDependence(false))
FoundPath |=
- computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
- for (auto &PI : Cur->Preds)
- if (PI.getKind() == SDep::Anti)
+ computePath(OE.getDst(), Path, DestNodes, Exclude, Visited, DDG);
+ for (const auto &IE : DDG->getInEdges(Cur))
+ if (IE.isAntiDep() && IE.getDistance() == 0)
FoundPath |=
- computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
+ computePath(IE.getSrc(), Path, DestNodes, Exclude, Visited, DDG);
if (FoundPath)
Path.insert(Cur);
return FoundPath;
@@ -2078,14 +2057,14 @@ void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
for (int i = 0, e = NodeSets.size(); i < e; ++i) {
NodeSet &N1 = NodeSets[i];
SmallSetVector<SUnit *, 8> S1;
- if (N1.empty() || !succ_L(N1, S1))
+ if (N1.empty() || !succ_L(N1, S1, DDG.get()))
continue;
for (int j = i + 1; j < e; ++j) {
NodeSet &N2 = NodeSets[j];
if (N1.compareRecMII(N2) != 0)
continue;
SmallSetVector<SUnit *, 8> S2;
- if (N2.empty() || !succ_L(N2, S2))
+ if (N2.empty() || !succ_L(N2, S2, DDG.get()))
continue;
if (llvm::set_is_subset(S1, S2) && S1.size() == S2.size()) {
N1.setColocate(++Colocate);
@@ -2126,22 +2105,22 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
for (NodeSet &I : NodeSets) {
SmallSetVector<SUnit *, 8> N;
// Add the nodes from the current node set to the previous node set.
- if (succ_L(I, N)) {
+ if (succ_L(I, N, DDG.get())) {
SetVector<SUnit *> Path;
for (SUnit *NI : N) {
Visited.clear();
- computePath(NI, Path, NodesAdded, I, Visited);
+ computePath(NI, Path, NodesAdded, I, Visited, DDG.get());
}
if (!Path.empty())
I.insert(Path.begin(), Path.end());
}
// Add the nodes from the previous node set to the current node set.
N.clear();
- if (succ_L(NodesAdded, N)) {
+ if (succ_L(NodesAdded, N, DDG.get())) {
SetVector<SUnit *> Path;
for (SUnit *NI : N) {
Visited.clear();
- computePath(NI, Path, I, NodesAdded, Visited);
+ computePath(NI, Path, I, NodesAdded, Visited, DDG.get());
}
if (!Path.empty())
I.insert(Path.begin(), Path.end());
@@ -2153,7 +2132,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
// in a recurrent set.
NodeSet NewSet;
SmallSetVector<SUnit *, 8> N;
- if (succ_L(NodesAdded, N))
+ if (succ_L(NodesAdded, N, DDG.get()))
for (SUnit *I : N)
addConnectedNodes(I, NewSet, NodesAdded);
if (!NewSet.empty())
@@ -2162,7 +2141,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
// Create a new node set with the connected nodes of any predecessor of a node
// in a recurrent set.
NewSet.clear();
- if (pred_L(NodesAdded, N))
+ if (pred_L(NodesAdded, N, DDG.get()))
for (SUnit *I : N)
addConnectedNodes(I, NewSet, NodesAdded);
if (!NewSet.empty())
@@ -2185,15 +2164,15 @@ void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
SetVector<SUnit *> &NodesAdded) {
NewSet.insert(SU);
NodesAdded.insert(SU);
- for (auto &SI : SU->Succs) {
- SUnit *Successor = SI.getSUnit();
- if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
+ for (auto &OE : DDG->getOutEdges(SU)) {
+ SUnit *Successor = OE.getDst();
+ if (!OE.isArtificial() && !Successor->isBoundaryNode() &&
NodesAdded.count(Successor) == 0)
addConnectedNodes(Successor, NewSet, NodesAdded);
}
- for (auto &PI : SU->Preds) {
- SUnit *Predecessor = PI.getSUnit();
- if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
+ for (auto &IE : DDG->getInEdges(SU)) {
+ SUnit *Predecessor = IE.getSrc();
+ if (!IE.isArtificial() && NodesAdded.count(Predecessor) == 0)
addConnectedNodes(Predecessor, NewSet, NodesAdded);
}
}
@@ -2259,11 +2238,12 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
LLVM_DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
OrderKind Order;
SmallSetVector<SUnit *, 8> N;
- if (pred_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
+ if (pred_L(NodeOrder, N, DDG.get()) && llvm::set_is_subset(N, Nodes)) {
R.insert(N.begin(), N.end());
Order = BottomUp;
LLVM_DEBUG(dbgs() << " Bottom up (preds) ");
- } else if (succ_L(NodeOrder, N) && llvm::set_is_subset(N, Nodes)) {
+ } else if (succ_L(NodeOrder, N, DDG.get()) &&
+ llvm::set_is_subset(N, Nodes)) {
R.insert(N.begin(), N.end());
Order = TopDown;
LLVM_DEBUG(dbgs() << " Top down (succs) ");
@@ -2313,30 +2293,35 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
NodeOrder.insert(maxHeight);
LLVM_DEBUG(dbgs() << maxHeight->NodeNum << " ");
R.remove(maxHeight);
- for (const auto &I : maxHeight->Succs) {
- if (Nodes.count(I.getSUnit()) == 0)
+ for (const auto &OE : DDG->getOutEdges(maxHeight)) {
+ SUnit *SU = OE.getDst();
+ if (Nodes.count(SU) == 0)
continue;
- if (NodeOrder.contains(I.getSUnit()))
+ if (NodeOrder.contains(SU))
continue;
- if (ignoreDependence(I, false))
+ if (OE.ignoreDependence(false))
continue;
- R.insert(I.getSUnit());
+ R.insert(SU);
}
- // Back-edges are predecessors with an anti-dependence.
- for (const auto &I : maxHeight->Preds) {
- if (I.getKind() != SDep::Anti)
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on
+ // performance
+ for (const auto &IE : DDG->getInEdges(maxHeight)) {
+ SUnit *SU = IE.getSrc();
+ if (!IE.isAntiDep())
continue;
- if (Nodes.count(I.getSUnit()) == 0)
+ if (Nodes.count(SU) == 0)
continue;
- if (NodeOrder.contains(I.getSUnit()))
+ if (NodeOrder.contains(SU))
continue;
- R.insert(I.getSUnit());
+ R.insert(SU);
}
}
Order = BottomUp;
LLVM_DEBUG(dbgs() << "\n Switching order to bottom up ");
SmallSetVector<SUnit *, 8> N;
- if (pred_L(NodeOrder, N, &Nodes))
+ if (pred_L(NodeOrder, N, DDG.get(), &Nodes))
R.insert(N.begin(), N.end());
} else {
// Choose the node with the maximum depth. If more than one, choose
@@ -2364,28 +2349,33 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
R.insert(Nodes.getNode(0));
break;
}
- for (const auto &I : maxDepth->Preds) {
- if (Nodes.count(I.getSUnit()) == 0)
+ for (const auto &IE : DDG->getInEdges(maxDepth)) {
+ SUnit *SU = IE.getSrc();
+ if (Nodes.count(SU) == 0)
continue;
- if (NodeOrder.contains(I.getSUnit()))
+ if (NodeOrder.contains(SU))
continue;
- R.insert(I.getSUnit());
+ R.insert(SU);
}
- // Back-edges are predecessors with an anti-dependence.
- for (const auto &I : maxDepth->Succs) {
- if (I.getKind() != SDep::Anti)
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on
+ // performance
+ for (const auto &OE : DDG->getOutEdges(maxDepth)) {
+ SUnit *SU = OE.getDst();
+ if (!OE.isAntiDep())
continue;
- if (Nodes.count(I.getSUnit()) == 0)
+ if (Nodes.count(SU) == 0)
continue;
- if (NodeOrder.contains(I.getSUnit()))
+ if (NodeOrder.contains(SU))
continue;
- R.insert(I.getSUnit());
+ R.insert(SU);
}
}
Order = TopDown;
LLVM_DEBUG(dbgs() << "\n Switching order to top down ");
SmallSetVector<SUnit *, 8> N;
- if (succ_L(NodeOrder, N, &Nodes))
+ if (succ_L(NodeOrder, N, DDG.get(), &Nodes))
R.insert(N.begin(), N.end());
}
}
@@ -2458,7 +2448,7 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
// loop-carried output/order dependencies. Empirically, there are also
// cases where scheduling becomes possible with backward search.
if (SU->getInstr()->isPHI() ||
- Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this))
+ Schedule.onlyHasLoopCarriedOutputOrOrderPreds(SU, this->getDDG()))
scheduleFound = Schedule.insert(SU, LateStart, EarlyStart, II);
else
scheduleFound = Schedule.insert(SU, EarlyStart, LateStart, II);
@@ -2678,22 +2668,20 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
/// Return true for an order or output dependence that is loop carried
/// potentially. A dependence is loop carried if the destination defines a value
/// that may be used or defined by the source in a subsequent iteration.
-bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
- bool isSucc) const {
- if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
- Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
+bool SwingSchedulerDAG::isLoopCarriedDep(
+ const SwingSchedulerDDGEdge &Edge) const {
+ if ((!Edge.isOrderDep() && !Edge.isOutputDep()) || Edge.isArtificial() ||
+ Edge.getDst()->isBoundaryNode())
return false;
if (!SwpPruneLoopCarried)
return true;
- if (Dep.getKind() == SDep::Output)
+ if (Edge.isOutputDep())
return true;
- MachineInstr *SI = Source->getInstr();
- MachineInstr *DI = Dep.getSUnit()->getInstr();
- if (!isSucc)
- std::swap(SI, DI);
+ MachineInstr *SI = Edge.getSrc()->getInstr();
+ MachineInstr *DI = Edge.getDst()->getInstr();
assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
// Assume ordered loads and stores may have a loop carried dependence.
@@ -2815,46 +2803,48 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
}
// Return the cycle of the earliest scheduled instruction in the chain.
-int SMSchedule::earliestCycleInChain(const SDep &Dep) {
+int SMSchedule::earliestCycleInChain(const SwingSchedulerDDGEdge &Dep,
+ const SwingSchedulerDDG *DDG) {
SmallPtrSet<SUnit *, 8> Visited;
- SmallVector<SDep, 8> Worklist;
+ SmallVector<SwingSchedulerDDGEdge, 8> Worklist;
Worklist.push_back(Dep);
int EarlyCycle = INT_MAX;
while (!Worklist.empty()) {
- const SDep &Cur = Worklist.pop_back_val();
- SUnit *PrevSU = Cur.getSUnit();
+ const SwingSchedulerDDGEdge &Cur = Worklist.pop_back_val();
+ SUnit *PrevSU = Cur.getSrc();
if (Visited.count(PrevSU))
continue;
std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
if (it == InstrToCycle.end())
continue;
EarlyCycle = std::min(EarlyCycle, it->second);
- for (const auto &PI : PrevSU->Preds)
- if (PI.getKind() == SDep::Order || PI.getKind() == SDep::Output)
- Worklist.push_back(PI);
+ for (const auto &IE : DDG->getInEdges(PrevSU))
+ if (IE.isOrderDep() || IE.isOutputDep())
+ Worklist.push_back(IE);
Visited.insert(PrevSU);
}
return EarlyCycle;
}
// Return the cycle of the latest scheduled instruction in the chain.
-int SMSchedule::latestCycleInChain(const SDep &Dep) {
+int SMSchedule::latestCycleInChain(const SwingSchedulerDDGEdge &Dep,
+ const SwingSchedulerDDG *DDG) {
SmallPtrSet<SUnit *, 8> Visited;
- SmallVector<SDep, 8> Worklist;
+ SmallVector<SwingSchedulerDDGEdge, 8> Worklist;
Worklist.push_back(Dep);
int LateCycle = INT_MIN;
while (!Worklist.empty()) {
- const SDep &Cur = Worklist.pop_back_val();
- SUnit *SuccSU = Cur.getSUnit();
+ const SwingSchedulerDDGEdge &Cur = Worklist.pop_back_val();
+ SUnit *SuccSU = Cur.getDst();
if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
continue;
std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
if (it == InstrToCycle.end())
continue;
LateCycle = std::max(LateCycle, it->second);
- for (const auto &SI : SuccSU->Succs)
- if (SI.getKind() == SDep::Order || SI.getKind() == SDep::Output)
- Worklist.push_back(SI);
+ for (const auto &OE : DDG->getOutEdges(SuccSU))
+ if (OE.isOrderDep() || OE.isOutputDep())
+ Worklist.push_back(OE);
Visited.insert(SuccSU);
}
return LateCycle;
@@ -2865,7 +2855,7 @@ int SMSchedule::latestCycleInChain(const SDep &Dep) {
/// to a Phi, which contains a reference to another Phi.
static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
for (auto &P : SU->Preds)
- if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
+ if (P.getKind() == SDep::Anti && P.getSUnit()->getInstr()->isPHI())
for (auto &S : P.getSUnit()->Succs)
if (S.getKind() == SDep::Data && S.getSUnit()->getInstr()->isPHI())
return P.getSUnit();
@@ -2876,57 +2866,47 @@ static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
/// depends on any predecessor or successor nodes scheduled already.
void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
int II, SwingSchedulerDAG *DAG) {
+ const SwingSchedulerDDG *DDG = DAG->getDDG();
+
// Iterate over each instruction that has been scheduled already. The start
// slot computation depends on whether the previously scheduled instruction
// is a predecessor or successor of the specified instruction.
for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
-
- // Iterate over each instruction in the current cycle.
for (SUnit *I : getInstructions(cycle)) {
- // Because we're processing a DAG for the dependences, we recognize
- // the back-edge in recurrences by anti dependences.
- for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
- const SDep &Dep = SU->Preds[i];
- if (Dep.getSUnit() == I) {
- if (!DAG->isBackedge(SU, Dep)) {
- int EarlyStart = cycle + Dep.getLatency() -
- DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
- *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
- if (DAG->isLoopCarriedDep(SU, Dep, false)) {
- int End = earliestCycleInChain(Dep) + (II - 1);
- *MinLateStart = std::min(*MinLateStart, End);
- }
- } else {
- int LateStart = cycle - Dep.getLatency() +
- DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
- *MinLateStart = std::min(*MinLateStart, LateStart);
+ for (const auto &IE : DDG->getInEdges(SU)) {
+ if (IE.getSrc() == I) {
+ // FIXME: Add reverse edge to `DDG` instead of calling
+ // `isLoopCarriedDep`
+ if (DAG->isLoopCarriedDep(IE)) {
+ int End = earliestCycleInChain(IE, DDG) + (II - 1);
+ *MinLateStart = std::min(*MinLateStart, End);
}
+ int EarlyStart = cycle + IE.getLatency() - IE.getDistance() * II;
+ *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
}
+ }
+
+ for (const auto &OE : DDG->getOutEdges(SU)) {
+ if (OE.getDst() == I) {
+ // FIXME: Add reverse edge to `DDG` instead of calling
+ // `isLoopCarriedDep`
+ if (DAG->isLoopCarriedDep(OE)) {
+ int Start = latestCycleInChain(OE, DDG) + 1 - II;
+ *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
+ }
+ int LateStart = cycle - OE.getLatency() + OE.getDistance() * II;
+ *MinLateStart = std::min(*MinLateStart, LateStart);
+ }
+ }
+
+ SUnit *BE = multipleIterations(I, DAG);
+ for (const auto &Dep : SU->Preds) {
// For instruction that requires multiple iterations, make sure that
// the dependent instruction is not scheduled past the definition.
- SUnit *BE = multipleIterations(I, DAG);
if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
!SU->isPred(I))
*MinLateStart = std::min(*MinLateStart, cycle);
}
- for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i) {
- if (SU->Succs[i].getSUnit() == I) {
- const SDep &Dep = SU->Succs[i];
- if (!DAG->isBackedge(SU, Dep)) {
- int LateStart = cycle - Dep.getLatency() +
- DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
- *MinLateStart = std::min(*MinLateStart, LateStart);
- if (DAG->isLoopCarriedDep(SU, Dep)) {
- int Start = latestCycleInChain(Dep) + 1 - II;
- *MaxEarlyStart = std::max(*MaxEarlyStart, Start);
- }
- } else {
- int EarlyStart = cycle + Dep.getLatency() -
- DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
- *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
- }
- }
- }
}
}
}
@@ -2943,6 +2923,7 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
unsigned MoveDef = 0;
unsigned MoveUse = 0;
int StageInst1 = stageScheduled(SU);
+ const SwingSchedulerDDG *DDG = SSD->getDDG();
unsigned Pos = 0;
for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
@@ -3000,10 +2981,10 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
}
// Check for order dependences between instructions. Make sure the source
// is ordered before the destination.
- for (auto &S : SU->Succs) {
- if (S.getSUnit() != *I)
+ for (auto &OE : DDG->getOutEdges(SU)) {
+ if (OE.getDst() != *I)
continue;
- if (S.getKind() == SDep::Order && stageScheduled(*I) == StageInst1) {
+ if (OE.isOrderDep() && stageScheduled(*I) == StageInst1) {
OrderBeforeUse = true;
if (Pos < MoveUse)
MoveUse = Pos;
@@ -3011,18 +2992,17 @@ void SMSchedule::orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU,
// We did not handle HW dependences in previous for loop,
// and we normally set Latency = 0 for Anti/Output deps,
// so may have nodes in same cycle with Anti/Output dependent on HW regs.
- else if ((S.getKind() == SDep::Anti || S.getKind() == SDep::Output) &&
+ else if ((OE.isAntiDep() || OE.isOutputDep()) &&
stageScheduled(*I) == StageInst1) {
OrderBeforeUse = true;
if ((MoveUse == 0) || (Pos < MoveUse))
MoveUse = Pos;
}
}
- for (auto &P : SU->Preds) {
- if (P.getSUnit() != *I)
+ for (auto &IE : DDG->getInEdges(SU)) {
+ if (IE.getSrc() != *I)
continue;
- if ((P.getKind() == SDep::Order || P.getKind() == SDep::Anti ||
- P.getKind() == SDep::Output) &&
+ if ((IE.isAntiDep() || IE.isOutputDep() || IE.isOrderDep()) &&
stageScheduled(*I) == StageInst1) {
OrderAfterDef = true;
MoveDef = Pos;
@@ -3117,12 +3097,9 @@ bool SMSchedule::isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD,
/// Return true if all scheduled predecessors are loop-carried output/order
/// dependencies.
bool SMSchedule::onlyHasLoopCarriedOutputOrOrderPreds(
- SUnit *SU, SwingSchedulerDAG *DAG) const {
- for (const SDep &Pred : SU->Preds)
- if (InstrToCycle.count(Pred.getSUnit()) && !DAG->isBackedge(SU, Pred))
- return false;
- for (const SDep &Succ : SU->Succs)
- if (InstrToCycle.count(Succ.getSUnit()) && DAG->isBackedge(SU, Succ))
+ SUnit *SU, const SwingSchedulerDDG *DDG) const {
+ for (const auto &IE : DDG->getInEdges(SU))
+ if (InstrToCycle.count(IE.getSrc()))
return false;
return true;
}
@@ -3137,18 +3114,21 @@ SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
Worklist.push_back(&SU);
+ const SwingSchedulerDDG *DDG = SSD->getDDG();
while (!Worklist.empty()) {
auto SU = Worklist.pop_back_val();
if (DoNotPipeline.count(SU))
continue;
LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
DoNotPipeline.insert(SU);
- for (auto &Dep : SU->Preds)
- Worklist.push_back(Dep.getSUnit());
- if (SU->getInstr()->isPHI())
- for (auto &Dep : SU->Succs)
- if (Dep.getKind() == SDep::Anti)
- Worklist.push_back(Dep.getSUnit());
+ for (const auto &IE : DDG->getInEdges(SU))
+ Worklist.push_back(IE.getSrc());
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on
+ for (const auto &OE : DDG->getOutEdges(SU))
+ if (OE.getDistance() == 1)
+ Worklist.push_back(OE.getDst());
}
return DoNotPipeline;
}
@@ -3170,8 +3150,15 @@ bool SMSchedule::normalizeNonPipelinedInstructions(
// Put the non-pipelined instruction as early as possible in the schedule
int NewCycle = getFirstCycle();
- for (auto &Dep : SU.Preds)
- NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
+ for (const auto &IE : SSD->getDDG()->getInEdges(&SU))
+ if (IE.getDistance() == 0)
+ NewCycle = std::max(InstrToCycle[IE.getSrc()], NewCycle);
+
+ // To preserve previous behavior and prevent regression
+ // FIXME: Remove if this doesn't have significant impact on performance
+ for (auto &OE : SSD->getDDG()->getOutEdges(&SU))
+ if (OE.getDistance() == 1)
+ NewCycle = std::max(InstrToCycle[OE.getDst()], NewCycle);
int OldCycle = InstrToCycle[&SU];
if (OldCycle != NewCycle) {
@@ -3204,14 +3191,16 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
int StageDef = stageScheduled(&SU);
int CycleDef = InstrToCycle[&SU];
assert(StageDef != -1 && "Instruction should have been scheduled.");
- for (auto &SI : SU.Succs)
- if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
- if (Register::isPhysicalRegister(SI.getReg())) {
- if (stageScheduled(SI.getSUnit()) != StageDef)
+ for (auto &OE : SSD->getDDG()->getOutEdges(&SU)) {
+ SUnit *Dst = OE.getDst();
+ if (OE.isAssignedRegDep() && !Dst->isBoundaryNode())
+ if (Register::isPhysicalRegister(OE.getReg())) {
+ if (stageScheduled(Dst) != StageDef)
return false;
- if (InstrToCycle[SI.getSUnit()] <= CycleDef)
+ if (InstrToCycle[Dst] <= CycleDef)
return false;
}
+ }
}
return true;
}
@@ -3223,7 +3212,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
/// The method below checks whether the property is met.
/// If not, debug information is printed and statistics information updated.
/// Note that we do not use an assert statement.
-/// The reason is that although an invalid node oder may prevent
+/// The reason is that although an invalid node order may prevent
/// the pipeliner from finding a pipelined schedule for arbitrary II,
/// it does not lead to the generation of incorrect code.
void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
@@ -3261,8 +3250,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
(void)Succ;
(void)Pred;
- for (SDep &PredEdge : SU->Preds) {
- SUnit *PredSU = PredEdge.getSUnit();
+ for (const auto &IE : DDG->getInEdges(SU)) {
+ SUnit *PredSU = IE.getSrc();
unsigned PredIndex = std::get<1>(
*llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
@@ -3272,8 +3261,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
}
}
- for (SDep &SuccEdge : SU->Succs) {
- SUnit *SuccSU = SuccEdge.getSUnit();
+ for (const auto &OE : DDG->getOutEdges(SU)) {
+ SUnit *SuccSU = OE.getDst();
// Do not process a boundary node, it was not included in NodeOrder,
// hence not in Indices either, call to std::lower_bound() below will
// return Indices.end().
@@ -3750,3 +3739,71 @@ void ResourceManager::init(int II) {
NumScheduledMops.clear();
NumScheduledMops.resize(II);
}
+
+bool SwingSchedulerDDGEdge::ignoreDependence(bool IgnoreAnti) const {
+ if (Pred.isArtificial() || Dst->isBoundaryNode())
+ return true;
+ // Currently, dependence that is an anti-dependences but not a loop-carried is
+ // also ignored. This behavior is preserved to prevent regression.
+ // FIXME: Remove if this doesn't have significant impact on performance
+ return IgnoreAnti && (Pred.getKind() == SDep::Kind::Anti || Distance != 0);
+}
+
+SwingSchedulerDDG::SwingSchedulerDDGEdges &
+SwingSchedulerDDG::getEdges(const SUnit *SU) {
+ if (SU == EntrySU)
+ return EntrySUEdges;
+ if (SU == ExitSU)
+ return ExitSUEdges;
+ return EdgesVec[SU->NodeNum];
+}
+
+const SwingSchedulerDDG::SwingSchedulerDDGEdges &
+SwingSchedulerDDG::getEdges(const SUnit *SU) const {
+ if (SU == EntrySU)
+ return EntrySUEdges;
+ if (SU == ExitSU)
+ return ExitSUEdges;
+ return EdgesVec[SU->NodeNum];
+}
+
+void SwingSchedulerDDG::addEdge(SUnit *SU, const SwingSchedulerDDGEdge &Edge) {
+ auto &Edges = getEdges(SU);
+ if (Edge.getSrc() == SU)
+ Edges.Succs.push_back(Edge);
+ else
+ Edges.Preds.push_back(Edge);
+}
+
+void SwingSchedulerDDG::initEdges(SUnit *SU) {
+ for (const auto &PI : SU->Preds) {
+ SwingSchedulerDDGEdge Edge(SU, PI, false);
+ addEdge(SU, Edge);
+ }
+
+ for (const auto &SI : SU->Succs) {
+ SwingSchedulerDDGEdge Edge(SU, SI, true);
+ addEdge(SU, Edge);
+ }
+}
+
+SwingSchedulerDDG::SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU,
+ SUnit *ExitSU)
+ : EntrySU(EntrySU), ExitSU(ExitSU) {
+ EdgesVec.resize(SUnits.size());
+
+ initEdges(EntrySU);
+ initEdges(ExitSU);
+ for (auto &SU : SUnits)
+ initEdges(&SU);
+}
+
+const SwingSchedulerDDG::EdgesType &
+SwingSchedulerDDG::getInEdges(const SUnit *SU) const {
+ return getEdges(SU).Preds;
+}
+
+const SwingSchedulerDDG::EdgesType &
+SwingSchedulerDDG::getOutEdges(const SUnit *SU) const {
+ return getEdges(SU).Succs;
+}
>From f0e162587ea3373d1952d1a31565963477dee46b Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 29 Oct 2024 17:56:59 +0900
Subject: [PATCH 2/5] fixup! [MachinePipeliner] Add an abstract layer to
manipulate Data Dependence Graph
---
llvm/lib/CodeGen/MachinePipeliner.cpp | 34 +++++++++++++++++----------
1 file changed, 22 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index b7203358a026fd..b048a117c34b60 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1609,8 +1609,12 @@ void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
if (OE.getDst()->isBoundaryNode() || OE.isArtificial())
continue;
- // To preserve previous behavior and prevent regression
- // FIXME: Remove if this doesn't have significant impact on performance
+ // This code is retained o preserve previous behavior and prevent
+ // regression. This condition means that anti-dependnecies within an
+ // iteration are ignored when searching circuits. Therefore it's natural
+ // to consider this dependence as well.
+ // FIXME: Remove this code if it doesn't have significant impact on
+ // performance.
if (OE.isAntiDep())
continue;
@@ -1888,8 +1892,10 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
Preds.insert(PredSU);
}
- // To preserve previous behavior and prevent regression
- // FIXME: Remove if this doesn't have significant impact on performance
+ // FIXME: The following loop-carried dependencies may also need to be
+ // considered.
+ // - Physical register dependnecies (true-dependnece and WAW).
+ // - Memory dependencies.
for (const auto &OE : DDG->getOutEdges(SU)) {
SUnit *SuccSU = OE.getDst();
if (!OE.isAntiDep())
@@ -1922,8 +1928,10 @@ static bool succ_L(SetVector<SUnit *> &NodeOrder,
Succs.insert(SuccSU);
}
- // To preserve previous behavior and prevent regression
- // FIXME: Remove if this doesn't have significant impact on performance
+ // FIXME: The following loop-carried dependencies may also need to be
+ // considered.
+ // - Physical register dependnecies (true-dependnece and WAW).
+ // - Memory dependencies.
for (const auto &IE : DDG->getInEdges(SU)) {
SUnit *PredSU = IE.getSrc();
if (!IE.isAntiDep())
@@ -2304,9 +2312,10 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
R.insert(SU);
}
- // To preserve previous behavior and prevent regression
- // FIXME: Remove if this doesn't have significant impact on
- // performance
+ // FIXME: The following loop-carried dependencies may also need to be
+ // considered.
+ // - Physical register dependnecies (true-dependnece and WAW).
+ // - Memory dependencies.
for (const auto &IE : DDG->getInEdges(maxHeight)) {
SUnit *SU = IE.getSrc();
if (!IE.isAntiDep())
@@ -2358,9 +2367,10 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
R.insert(SU);
}
- // To preserve previous behavior and prevent regression
- // FIXME: Remove if this doesn't have significant impact on
- // performance
+ // FIXME: The following loop-carried dependencies may also need to be
+ // considered.
+ // - Physical register dependnecies (true-dependnece and WAW).
+ // - Memory dependencies.
for (const auto &OE : DDG->getOutEdges(maxDepth)) {
SUnit *SU = OE.getDst();
if (!OE.isAntiDep())
>From c0e17504a7c6abeb39fe5a60990e47a52eb60feb Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 29 Nov 2024 07:17:05 +0000
Subject: [PATCH 3/5] Fix typo
---
llvm/include/llvm/CodeGen/MachinePipeliner.h | 2 +-
llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index ebeb933f8c8508..bce551d0a5f345 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -118,7 +118,7 @@ class MachinePipeliner : public MachineFunctionPass {
bool useWindowScheduler(bool Changed);
};
-/// Represents a dependnece between two instruction.
+/// Represents a dependence between two instruction.
class SwingSchedulerDDGEdge {
SUnit *Dst = nullptr;
SDep Pred;
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index b048a117c34b60..039d15ffea8452 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1894,7 +1894,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
// FIXME: The following loop-carried dependencies may also need to be
// considered.
- // - Physical register dependnecies (true-dependnece and WAW).
+ // - Physical register dependencies (true-dependence and WAW).
// - Memory dependencies.
for (const auto &OE : DDG->getOutEdges(SU)) {
SUnit *SuccSU = OE.getDst();
>From 32feeba4d53420623a2570c73651460e8a5257c4 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 29 Nov 2024 07:20:04 +0000
Subject: [PATCH 4/5] Add const qualifier to an `addEdge` argument
---
llvm/include/llvm/CodeGen/MachinePipeliner.h | 2 +-
llvm/lib/CodeGen/MachinePipeliner.cpp | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index bce551d0a5f345..2f133958a7313f 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -215,7 +215,7 @@ class SwingSchedulerDDG {
SwingSchedulerDDGEdges EntrySUEdges;
SwingSchedulerDDGEdges ExitSUEdges;
- void addEdge(SUnit *SU, const SwingSchedulerDDGEdge &Edge);
+ void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 039d15ffea8452..acd42aa497c6fe 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -3777,7 +3777,8 @@ SwingSchedulerDDG::getEdges(const SUnit *SU) const {
return EdgesVec[SU->NodeNum];
}
-void SwingSchedulerDDG::addEdge(SUnit *SU, const SwingSchedulerDDGEdge &Edge) {
+void SwingSchedulerDDG::addEdge(const SUnit *SU,
+ const SwingSchedulerDDGEdge &Edge) {
auto &Edges = getEdges(SU);
if (Edge.getSrc() == SU)
Edges.Succs.push_back(Edge);
>From f6f6c72322bfef93e21e3b043b4c4fc4f8a13fba Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 18 Dec 2024 17:17:21 +0900
Subject: [PATCH 5/5] Remove unnecessary includes and forward declarations
---
llvm/include/llvm/CodeGen/MachinePipeliner.h | 3 ---
1 file changed, 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 2f133958a7313f..8e47d0cead7571 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -42,7 +42,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -61,8 +60,6 @@ namespace llvm {
class AAResults;
class NodeSet;
class SMSchedule;
-class SwingSchedulerDDGEdge;
-class SwingSchedulerDDG;
extern cl::opt<bool> SwpEnableCopyToPhi;
extern cl::opt<int> SwpForceIssueWidth;
More information about the llvm-commits
mailing list