[llvm-commits] CVS: llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp DependenceAnalyzer.h MSchedGraph.cpp MSchedGraph.h ModuloScheduling.cpp ModuloScheduling.h
Tanya Brethour
tbrethou at cs.uiuc.edu
Thu Apr 21 23:33:05 PDT 2005
Changes in directory llvm/lib/Target/SparcV9/ModuloScheduling:
DependenceAnalyzer.cpp updated: 1.4 -> 1.5
DependenceAnalyzer.h updated: 1.2 -> 1.3
MSchedGraph.cpp updated: 1.18 -> 1.19
MSchedGraph.h updated: 1.11 -> 1.12
ModuloScheduling.cpp updated: 1.47 -> 1.48
ModuloScheduling.h updated: 1.28 -> 1.29
---
Log message:
Updated dependence analyzer. Fixed numerous bugs. Same stage scheduling, etc.
---
Diffs of the changes: (+640 -296)
DependenceAnalyzer.cpp | 329 ++++++++++++++++++++++++++++++++++++++-----------
DependenceAnalyzer.h | 23 +++
MSchedGraph.cpp | 323 +++++++++++++++++++++++++++++-------------------
MSchedGraph.h | 4
ModuloScheduling.cpp | 241 +++++++++++++++++++++--------------
ModuloScheduling.h | 16 +-
6 files changed, 640 insertions(+), 296 deletions(-)
Index: llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp
diff -u llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp:1.4 llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp:1.5
--- llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp:1.4 Thu Apr 21 18:29:16 2005
+++ llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.cpp Fri Apr 22 01:32:48 2005
@@ -1,4 +1,4 @@
-//===-- DependenceAnalyzer.cpp - DependenceAnalyzer ----------------*- C++ -*-===//
+//===-- DependenceAnalyzer.cpp - DependenceAnalyzer ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,94 +16,281 @@
#include "DependenceAnalyzer.h"
#include "llvm/Type.h"
#include "llvm/Support/Debug.h"
-using namespace llvm;
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+using namespace llvm;
-/// Create ModuloSchedulingPass
-///
namespace llvm {
-FunctionPass *createDependenceAnalyzer() {
- return new DependenceAnalyzer();
+
+ /// Create ModuloSchedulingPass
+ FunctionPass *createDependenceAnalyzer() {
+ return new DependenceAnalyzer();
+ }
}
+
+Statistic<> NoDeps("depanalyzer-nodeps", "Number of dependences eliminated");
+Statistic<> NumDeps("depanalyzer-deps",
+ "Number of dependences could not eliminate");
+Statistic<> AdvDeps("depanalyzer-advdeps",
+ "Number of dependences using advanced techniques");
+
+bool DependenceAnalyzer::runOnFunction(Function &F) {
+ AA = &getAnalysis<AliasAnalysis>();
+ TD = &getAnalysis<TargetData>();
+ SE = &getAnalysis<ScalarEvolution>();
+
+ return false;
}
- bool DependenceAnalyzer::runOnFunction(Function &F) {
- AA = &getAnalysis<AliasAnalysis>();
- TD = &getAnalysis<TargetData>();
+static RegisterAnalysis<DependenceAnalyzer>X("depanalyzer",
+ "Dependence Analyzer");
+
+// - Get inter and intra dependences between loads and stores
+//
+// Overview of Method:
+// Step 1: Use alias analysis to determine dependencies if values are loop
+// invariant
+// Step 2: If pointers are not GEP, then there is a dependence.
+// Step 3: Compare GEP base pointers with AA. If no alias, no dependence.
+// If may alias, then add a dependence. If must alias, then analyze
+// further (Step 4)
+// Step 4: do advanced analysis
+void DependenceAnalyzer::AnalyzeDeps(Value *val, Value *val2, bool valLoad,
+ bool val2Load,
+ std::vector<Dependence> &deps,
+ BasicBlock *BB,
+ bool srcBeforeDest) {
+
+ bool loopInvariant = true;
+
+ //Check if both are instructions and prove not loop invariant if possible
+ if(Instruction *valInst = dyn_cast<Instruction>(val))
+ if(valInst->getParent() == BB)
+ loopInvariant = false;
+ if(Instruction *val2Inst = dyn_cast<Instruction>(val2))
+ if(val2Inst->getParent() == BB)
+ loopInvariant = false;
+
+
+ //If Loop invariant, let AA decide
+ if(loopInvariant) {
+ if(AA->alias(val, (unsigned)TD->getTypeSize(val->getType()),
+ val2,(unsigned)TD->getTypeSize(val2->getType()))
+ != AliasAnalysis::NoAlias) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ }
+ else
+ ++NoDeps;
+ return;
+ }
+
+ //Otherwise, continue with step 2
+
+ GetElementPtrInst *GP = dyn_cast<GetElementPtrInst>(val);
+ GetElementPtrInst *GP2 = dyn_cast<GetElementPtrInst>(val2);
- return false;
+ //If both are not GP instructions, we can not do further analysis
+ if(!GP || !GP2) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
}
- static RegisterAnalysis<DependenceAnalyzer>X("depanalyzer", "Dependence Analyzer");
- DependenceResult DependenceAnalyzer::getDependenceInfo(Instruction *inst1, Instruction *inst2) {
- std::vector<Dependence> deps;
+ //Otherwise, compare GEP bases (op #0) with Alias Analysis
- DEBUG(std::cerr << "Inst1: " << *inst1 << "\n");
- DEBUG(std::cerr << "Inst2: " << *inst2 << "\n");
+ Value *GPop = GP->getOperand(0);
+ Value *GP2op = GP2->getOperand(0);
+ int alias = AA->alias(GPop, (unsigned)TD->getTypeSize(GPop->getType()),
+ GP2op,(unsigned)TD->getTypeSize(GP2op->getType()));
- if(LoadInst *ldInst = dyn_cast<LoadInst>(inst1)) {
+ if(alias == AliasAnalysis::MustAlias) {
+ //Further dep analysis to do
+ advancedDepAnalysis(GP, GP2, valLoad, val2Load, deps, srcBeforeDest);
+ ++AdvDeps;
+ }
+ else if(alias == AliasAnalysis::MayAlias) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ }
+ //Otherwise no dependence since there is no alias
+ else
+ ++NoDeps;
+}
- if(StoreInst *stInst = dyn_cast<StoreInst>(inst2)) {
- //Get load mem ref
- Value *ldOp = ldInst->getOperand(0);
-
- //Get store mem ref
- Value *stOp = stInst->getOperand(1);
-
- if(AA->alias(ldOp, (unsigned)TD->getTypeSize(ldOp->getType()),
- stOp,(unsigned)TD->getTypeSize(stOp->getType()))
- != AliasAnalysis::NoAlias) {
-
- //Anti Dep
- deps.push_back(Dependence(0, Dependence::AntiDep));
- }
- }
- }
- else if(StoreInst *stInst = dyn_cast<StoreInst>(inst1)) {
+// advancedDepAnalysis - Do advanced data dependence tests
+void DependenceAnalyzer::advancedDepAnalysis(GetElementPtrInst *gp1,
+ GetElementPtrInst *gp2,
+ bool valLoad,
+ bool val2Load,
+ std::vector<Dependence> &deps,
+ bool srcBeforeDest) {
+
+ //Check if both GEPs are in a simple form: 3 ops, constant 0 as second arg
+ if(gp1->getNumOperands() != 3 || gp2->getNumOperands() != 3) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
+ }
+
+ //Check second arg is constant 0
+ bool GPok = false;
+ if(Constant *c1 = dyn_cast<Constant>(gp1->getOperand(1)))
+ if(Constant *c2 = dyn_cast<Constant>(gp2->getOperand(1)))
+ if(c1->isNullValue() && c2->isNullValue())
+ GPok = true;
+
+ if(!GPok) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
- if(LoadInst *ldInst = dyn_cast<LoadInst>(inst2)) {
- //Get load mem ref
- Value *ldOp = ldInst->getOperand(0);
-
- //Get store mem ref
- Value *stOp = stInst->getOperand(1);
-
-
- if(AA->alias(ldOp, (unsigned)TD->getTypeSize(ldOp->getType()),
- stOp,(unsigned)TD->getTypeSize(stOp->getType()))
- != AliasAnalysis::NoAlias) {
-
- //Anti Dep
- deps.push_back(Dependence(0, Dependence::TrueDep));
- }
- }
- else if(StoreInst *stInst2 = dyn_cast<StoreInst>(inst2)) {
-
- //Get load mem ref
- Value *stOp1 = stInst->getOperand(1);
-
- //Get store mem ref
- Value *stOp2 = stInst2->getOperand(1);
-
-
- if(AA->alias(stOp1, (unsigned)TD->getTypeSize(stOp1->getType()),
- stOp2,(unsigned)TD->getTypeSize(stOp2->getType()))
- != AliasAnalysis::NoAlias) {
-
- //Anti Dep
- deps.push_back(Dependence(0, Dependence::OutputDep));
- }
- }
+ }
+ Value *Gep1Idx = gp1->getOperand(2);
+ Value *Gep2Idx = gp2->getOperand(2);
- }
- else
- assert("Expected a load or a store\n");
+ if(CastInst *c1 = dyn_cast<CastInst>(Gep1Idx))
+ Gep1Idx = c1->getOperand(0);
+ if(CastInst *c2 = dyn_cast<CastInst>(Gep2Idx))
+ Gep2Idx = c2->getOperand(0);
+
+ //Get SCEV for each index into the area
+ SCEVHandle SV1 = SE->getSCEV(Gep1Idx);
+ SCEVHandle SV2 = SE->getSCEV(Gep2Idx);
+
+ //Now handle special cases of dependence analysis
+ SV1->print(std::cerr);
+ std::cerr << "\n";
+ SV2->print(std::cerr);
+ std::cerr << "\n";
+
+ //Check if we have an SCEVAddExpr, cause we can only handle those
+ SCEVAddRecExpr *SVAdd1 = dyn_cast<SCEVAddRecExpr>(SV1);
+ SCEVAddRecExpr *SVAdd2 = dyn_cast<SCEVAddRecExpr>(SV2);
+
+ //Default to having a dependence since we can't analyze further
+ if(!SVAdd1 || !SVAdd2) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
+ }
- DependenceResult dr = DependenceResult(deps);
- return dr;
+ //Check if not Affine, we can't handle those
+ if(!SVAdd1->isAffine( ) || !SVAdd2->isAffine()) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
}
+ //We know the SCEV is in the form A + B*x, check that B is the same for both
+ SCEVConstant *B1 = dyn_cast<SCEVConstant>(SVAdd1->getOperand(1));
+ SCEVConstant *B2 = dyn_cast<SCEVConstant>(SVAdd2->getOperand(1));
+
+ if(B1->getValue() != B2->getValue()) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
+ }
+
+ if(B1->getValue()->getRawValue() != 1 || B2->getValue()->getRawValue() != 1) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
+ }
+
+
+ SCEVConstant *A1 = dyn_cast<SCEVConstant>(SVAdd1->getOperand(0));
+ SCEVConstant *A2 = dyn_cast<SCEVConstant>(SVAdd2->getOperand(0));
+
+ //Come back and deal with nested SCEV!
+ if(!A1 || !A2) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
+ }
+
+ //If equal, create dep as normal
+ if(A1->getValue() == A2->getValue()) {
+ createDep(deps, valLoad, val2Load, srcBeforeDest);
+ return;
+ }
+ //Eliminate a dep if this is a intra dep
+ else if(srcBeforeDest) {
+ ++NoDeps;
+ return;
+ }
+
+ //Find constant index difference
+ int diff = A1->getValue()->getRawValue() - A2->getValue()->getRawValue();
+ std::cerr << diff << "\n";
+
+ if(diff > 0)
+ createDep(deps, valLoad, val2Load, srcBeforeDest, diff);
+
+ //assert(diff > 0 && "Expected diff to be greater then 0");
+}
+
+// Create dependences once its determined these two instructions
+// references the same memory
+void DependenceAnalyzer::createDep(std::vector<Dependence> &deps,
+ bool valLoad, bool val2Load,
+ bool srcBeforeDest, int diff) {
+
+ //If the source instruction occurs after the destination instruction
+ //(execution order), then this dependence is across iterations
+ if(!srcBeforeDest && (diff==0))
+ diff = 1;
+
+ //If load/store pair
+ if(valLoad && !val2Load) {
+ //Anti Dep
+ deps.push_back(Dependence(diff, Dependence::AntiDep));
+ ++NumDeps;
+ }
+ //If store/load pair
+ else if(!valLoad && val2Load) {
+ //True Dep
+ deps.push_back(Dependence(diff, Dependence::TrueDep));
+ ++NumDeps;
+ }
+ //If store/store pair
+ else if(!valLoad && !val2Load) {
+ //True Dep
+ deps.push_back(Dependence(diff, Dependence::OutputDep));
+ ++NumDeps;
+ }
+}
+
+
+
+//Get Dependence Info for a pair of Instructions
+DependenceResult DependenceAnalyzer::getDependenceInfo(Instruction *inst1,
+ Instruction *inst2,
+ bool srcBeforeDest) {
+ std::vector<Dependence> deps;
+
+ DEBUG(std::cerr << "Inst1: " << *inst1 << "\n");
+ DEBUG(std::cerr << "Inst2: " << *inst2 << "\n");
+
+ //No self deps
+ if(inst1 == inst2)
+ return DependenceResult(deps);
+
+ if(LoadInst *ldInst = dyn_cast<LoadInst>(inst1)) {
+
+ if(StoreInst *stInst = dyn_cast<StoreInst>(inst2))
+ AnalyzeDeps(ldInst->getOperand(0), stInst->getOperand(1),
+ true, false, deps, ldInst->getParent(), srcBeforeDest);
+ }
+ else if(StoreInst *stInst = dyn_cast<StoreInst>(inst1)) {
+
+ if(LoadInst *ldInst = dyn_cast<LoadInst>(inst2))
+ AnalyzeDeps(stInst->getOperand(1), ldInst->getOperand(0), false, true,
+ deps, ldInst->getParent(), srcBeforeDest);
+
+ else if(StoreInst *stInst2 = dyn_cast<StoreInst>(inst2))
+ AnalyzeDeps(stInst->getOperand(1), stInst2->getOperand(1), false, false,
+ deps, stInst->getParent(), srcBeforeDest);
+ }
+ else
+ assert(0 && "Expected a load or a store\n");
+
+ DependenceResult dr = DependenceResult(deps);
+ return dr;
+}
+
Index: llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h
diff -u llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h:1.2 llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h:1.3
--- llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h:1.2 Thu Apr 21 18:29:16 2005
+++ llvm/lib/Target/SparcV9/ModuloScheduling/DependenceAnalyzer.h Fri Apr 22 01:32:48 2005
@@ -17,10 +17,12 @@
#include "llvm/Function.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Target/TargetData.h"
#include <vector>
namespace llvm {
+
//class to represent a dependence
struct Dependence {
@@ -47,11 +49,25 @@
class DependenceAnalyzer : public FunctionPass {
+
+
AliasAnalysis *AA;
TargetData *TD;
+ ScalarEvolution *SE;
+
+ void advancedDepAnalysis(GetElementPtrInst *gp1, GetElementPtrInst *gp2,
+ bool valLoad, bool val2Load,
+ std::vector<Dependence> &deps, bool srcBeforeDest);
+
+ void AnalyzeDeps(Value *val, Value *val2, bool val1Load, bool val2Load,
+ std::vector<Dependence> &deps, BasicBlock *BB,
+ bool srcBeforeDest);
+
+ void createDep(std::vector<Dependence> &deps, bool valLoad, bool val2Load,
+ bool srcBeforeDest, int diff = 0);
public:
- DependenceAnalyzer() { AA = 0; TD = 0; }
+ DependenceAnalyzer() { AA = 0; TD = 0; SE = 0; }
virtual bool runOnFunction(Function &F);
virtual const char* getPassName() const { return "DependenceAnalyzer"; }
@@ -59,10 +75,13 @@
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addRequired<TargetData>();
+ AU.addRequired<ScalarEvolution>();
+ AU.setPreservesAll();
}
//get dependence info
- DependenceResult getDependenceInfo(Instruction *inst1, Instruction *inst2);
+ DependenceResult getDependenceInfo(Instruction *inst1, Instruction *inst2,
+ bool srcBeforeDest);
};
Index: llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp
diff -u llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp:1.18 llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp:1.19
--- llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp:1.18 Thu Apr 21 18:29:16 2005
+++ llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.cpp Fri Apr 22 01:32:48 2005
@@ -19,6 +19,7 @@
#include "../SparcV9RegisterInfo.h"
#include "../MachineCodeForInstruction.h"
#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -33,8 +34,9 @@
//MSchedGraphNode constructor
MSchedGraphNode::MSchedGraphNode(const MachineInstr* inst,
MSchedGraph *graph, unsigned idx,
- unsigned late, bool isBranch)
- : Inst(inst), Parent(graph), index(idx), latency(late), isBranchInstr(isBranch) {
+ unsigned late, bool isBranch)
+ : Inst(inst), Parent(graph), index(idx), latency(late),
+ isBranchInstr(isBranch) {
//Add to the graph
graph->addNode(inst, this);
@@ -73,7 +75,8 @@
//Get the iteration difference for the edge from this node to its successor
unsigned MSchedGraphNode::getIteDiff(MSchedGraphNode *succ) {
- for(std::vector<MSchedGraphEdge>::iterator I = Successors.begin(), E = Successors.end();
+ for(std::vector<MSchedGraphEdge>::iterator I = Successors.begin(),
+ E = Successors.end();
I != E; ++I) {
if(I->getDest() == succ)
return I->getIteDiff();
@@ -86,7 +89,8 @@
//Loop over all the successors of our predecessor
//return the edge the corresponds to this in edge
int count = 0;
- for(MSchedGraphNode::succ_iterator I = pred->succ_begin(), E = pred->succ_end();
+ for(MSchedGraphNode::succ_iterator I = pred->succ_begin(),
+ E = pred->succ_end();
I != E; ++I) {
if(*I == this)
return count;
@@ -106,7 +110,8 @@
//Dtermine if pred is a predecessor of this node
bool MSchedGraphNode::isPredecessor(MSchedGraphNode *pred) {
- if(std::find( Predecessors.begin(), Predecessors.end(), pred) != Predecessors.end())
+ if(std::find( Predecessors.begin(), Predecessors.end(),
+ pred) != Predecessors.end())
return true;
else
return false;
@@ -138,13 +143,16 @@
}
-//Create a graph for a machine block. The ignoreInstrs map is so that we ignore instructions
-//associated to the index variable since this is a special case in Modulo Scheduling.
-//We only want to deal with the body of the loop.
-MSchedGraph::MSchedGraph(const MachineBasicBlock *bb, const TargetMachine &targ,
- std::map<const MachineInstr*, unsigned> &ignoreInstrs,
- DependenceAnalyzer &DA, std::map<MachineInstr*, Instruction*> &machineTollvm
- )
+
+//Create a graph for a machine block. The ignoreInstrs map is so that
+//we ignore instructions associated to the index variable since this
+//is a special case in Modulo Scheduling. We only want to deal with
+//the body of the loop.
+MSchedGraph::MSchedGraph(const MachineBasicBlock *bb,
+ const TargetMachine &targ,
+ std::map<const MachineInstr*, unsigned> &ignoreInstrs,
+ DependenceAnalyzer &DA,
+ std::map<MachineInstr*, Instruction*> &machineTollvm)
: BB(bb), Target(targ) {
//Make sure BB is not null,
@@ -160,13 +168,15 @@
}
//Copies the graph and keeps a map from old to new nodes
-MSchedGraph::MSchedGraph(const MSchedGraph &G, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes)
+MSchedGraph::MSchedGraph(const MSchedGraph &G,
+ std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes)
: BB(G.BB), Target(G.Target) {
std::map<MSchedGraphNode*, MSchedGraphNode*> oldToNew;
//Copy all nodes
- for(MSchedGraph::const_iterator N = G.GraphMap.begin(), NE = G.GraphMap.end();
- N != NE; ++N) {
+ for(MSchedGraph::const_iterator N = G.GraphMap.begin(),
+ NE = G.GraphMap.end(); N != NE; ++N) {
+
MSchedGraphNode *newNode = new MSchedGraphNode(*(N->second));
oldToNew[&*(N->second)] = newNode;
newNodes[newNode] = &*(N->second);
@@ -174,7 +184,8 @@
}
//Loop over nodes and update edges to point to new nodes
- for(MSchedGraph::iterator N = GraphMap.begin(), NE = GraphMap.end(); N != NE; ++N) {
+ for(MSchedGraph::iterator N = GraphMap.begin(), NE = GraphMap.end();
+ N != NE; ++N) {
//Get the node we are dealing with
MSchedGraphNode *node = &*(N->second);
@@ -196,15 +207,49 @@
//Deconstructor, deletes all nodes in the graph
MSchedGraph::~MSchedGraph () {
- for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I)
+ for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end();
+ I != E; ++I)
delete I->second;
}
+//Print out graph
+void MSchedGraph::print(std::ostream &os) const {
+ for(MSchedGraph::const_iterator N = GraphMap.begin(), NE = GraphMap.end();
+ N != NE; ++N) {
+
+ //Get the node we are dealing with
+ MSchedGraphNode *node = &*(N->second);
+
+ os << "Node Start\n";
+ node->print(os);
+ os << "Successors:\n";
+ //print successors
+ for(unsigned i = 0; i < node->succ_size(); ++i) {
+ MSchedGraphEdge *edge = node->getSuccessor(i);
+ MSchedGraphNode *oldDest = edge->getDest();
+ oldDest->print(os);
+ }
+ os << "Node End\n";
+ }
+}
+
+//Calculate total delay
+int MSchedGraph::totalDelay() {
+ int sum = 0;
+ for(MSchedGraph::const_iterator N = GraphMap.begin(), NE = GraphMap.end();
+ N != NE; ++N) {
+
+ //Get the node we are dealing with
+ MSchedGraphNode *node = &*(N->second);
+ sum += node->getLatency();
+ }
+ return sum;
+}
//Experimental code to add edges from the branch to all nodes dependent upon it.
-void hasPath(MSchedGraphNode *node, std::set<MSchedGraphNode*> &visited,
- std::set<MSchedGraphNode*> &branches, MSchedGraphNode *startNode,
- std::set<std::pair<MSchedGraphNode*,MSchedGraphNode*> > &newEdges ) {
+void hasPath(MSchedGraphNode *node, std::set<MSchedGraphNode*> &visited,
+ std::set<MSchedGraphNode*> &branches, MSchedGraphNode *startNode,
+ std::set<std::pair<MSchedGraphNode*,MSchedGraphNode*> > &newEdges ) {
visited.insert(node);
DEBUG(std::cerr << "Visiting: " << *node << "\n");
@@ -229,7 +274,8 @@
std::set<MSchedGraphNode*> branches;
std::set<MSchedGraphNode*> nodes;
- for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I) {
+ for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end();
+ I != E; ++I) {
if(I->second->isBranch())
if(I->second->hasPredecessors())
branches.insert(I->second);
@@ -238,7 +284,8 @@
//See if there is a path first instruction to the branches, if so, add an
//iteration dependence between that node and the branch
std::set<std::pair<MSchedGraphNode*, MSchedGraphNode*> > newEdges;
- for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end(); I != E; ++I) {
+ for(MSchedGraph::iterator I = GraphMap.begin(), E = GraphMap.end();
+ I != E; ++I) {
std::set<MSchedGraphNode*> visited;
hasPath((I->second), visited, branches, (I->second), newEdges);
}
@@ -275,7 +322,8 @@
//Add edges between the nodes
void MSchedGraph::buildNodesAndEdges(std::map<const MachineInstr*, unsigned> &ignoreInstrs,
DependenceAnalyzer &DA,
- std::map<MachineInstr*, Instruction*> &machineTollvm) {
+ std::map<MachineInstr*, Instruction*> &machineTollvm) {
+
//Get Machine target information for calculating latency
const TargetInstrInfo *MTI = Target.getInstrInfo();
@@ -289,7 +337,8 @@
unsigned index = 0;
//Loop over instructions in MBB and add nodes and edges
- for (MachineBasicBlock::const_iterator MI = BB->begin(), e = BB->end(); MI != e; ++MI) {
+ for (MachineBasicBlock::const_iterator MI = BB->begin(), e = BB->end();
+ MI != e; ++MI) {
//Ignore indvar instructions
if(ignoreInstrs.count(MI)) {
@@ -329,11 +378,13 @@
isBranch = true;
//Node is created and added to the graph automatically
- MSchedGraphNode *node = new MSchedGraphNode(MI, this, index, delay, isBranch);
+ MSchedGraphNode *node = new MSchedGraphNode(MI, this, index, delay,
+ isBranch);
DEBUG(std::cerr << "Created Node: " << *node << "\n");
- //Check OpCode to keep track of memory operations to add memory dependencies later.
+ //Check OpCode to keep track of memory operations to add memory
+ //dependencies later.
if(MTI->isLoad(opCode) || MTI->isStore(opCode))
memInstructions.push_back(node);
@@ -359,7 +410,8 @@
//Add virtual registers dependencies
//Check if any exist in the value map already and create dependencies
//between them.
- if(mOp.getType() == MachineOperand::MO_VirtualRegister || mOp.getType() == MachineOperand::MO_CCRegister) {
+ if(mOp.getType() == MachineOperand::MO_VirtualRegister
+ || mOp.getType() == MachineOperand::MO_CCRegister) {
//Make sure virtual register value is not null
assert((mOp.getVRegValue() != NULL) && "Null value is defined");
@@ -395,9 +447,11 @@
++index;
}
- //Loop over LLVM BB, examine phi instructions, and add them to our phiInstr list to process
+ //Loop over LLVM BB, examine phi instructions, and add them to our
+ //phiInstr list to process
const BasicBlock *llvm_bb = BB->getBasicBlock();
- for(BasicBlock::const_iterator I = llvm_bb->begin(), E = llvm_bb->end(); I != E; ++I) {
+ for(BasicBlock::const_iterator I = llvm_bb->begin(), E = llvm_bb->end();
+ I != E; ++I) {
if(const PHINode *PN = dyn_cast<PHINode>(I)) {
MachineCodeForInstruction & tempMvec = MachineCodeForInstruction::get(PN);
for (unsigned j = 0; j < tempMvec.size(); j++) {
@@ -414,7 +468,8 @@
addMachRegEdges(regNumtoNodeMap);
//Finally deal with PHI Nodes and Value*
- for(std::vector<const MachineInstr*>::iterator I = phiInstrs.begin(), E = phiInstrs.end(); I != E; ++I) {
+ for(std::vector<const MachineInstr*>::iterator I = phiInstrs.begin(),
+ E = phiInstrs.end(); I != E; ++I) {
//Get Node for this instruction
std::map<const MachineInstr*, MSchedGraphNode*>::iterator X;
@@ -431,7 +486,8 @@
for(unsigned i=0; i < (*I)->getNumOperands(); ++i) {
//Get Operand
const MachineOperand &mOp = (*I)->getOperand(i);
- if((mOp.getType() == MachineOperand::MO_VirtualRegister || mOp.getType() == MachineOperand::MO_CCRegister) && mOp.isUse()) {
+ if((mOp.getType() == MachineOperand::MO_VirtualRegister
+ || mOp.getType() == MachineOperand::MO_CCRegister) && mOp.isUse()) {
//find the value in the map
if (const Value* srcI = mOp.getVRegValue()) {
@@ -444,7 +500,8 @@
//those instructions
//to this one we are processing
if(V != valuetoNodeMap.end()) {
- addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(), phiInstrs, 1);
+ addValueEdges(V->second, node, mOp.isUse(), mOp.isDef(),
+ phiInstrs, 1);
}
}
}
@@ -496,7 +553,8 @@
//Loop over all machine registers in the map, and add dependencies
//between the instructions that use it
typedef std::map<int, std::vector<OpIndexNodePair> > regNodeMap;
- for(regNodeMap::iterator I = regNumtoNodeMap.begin(); I != regNumtoNodeMap.end(); ++I) {
+ for(regNodeMap::iterator I = regNumtoNodeMap.begin();
+ I != regNumtoNodeMap.end(); ++I) {
//Get the register number
int regNum = (*I).first;
@@ -527,24 +585,29 @@
if(Nodes[j].second->getInst()->getOperand(Nodes[j].first).isDef()) {
//Src only uses the register (read)
if(srcIsUse)
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
MSchedGraphEdge::AntiDep);
else if(srcIsUseandDef) {
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
MSchedGraphEdge::AntiDep);
-
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
+
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
MSchedGraphEdge::OutputDep);
}
else
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
MSchedGraphEdge::OutputDep);
}
//Dest node is a read
else {
if(!srcIsUse || srcIsUseandDef)
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
MSchedGraphEdge::TrueDep);
}
@@ -557,25 +620,29 @@
if(Nodes[j].second->getInst()->getOperand(Nodes[j].first).isDef()) {
//Src only uses the register (read)
if(srcIsUse)
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
- MSchedGraphEdge::AntiDep, 1);
-
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
+ MSchedGraphEdge::AntiDep, 1);
else if(srcIsUseandDef) {
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
- MSchedGraphEdge::AntiDep, 1);
-
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
- MSchedGraphEdge::OutputDep, 1);
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
+ MSchedGraphEdge::AntiDep, 1);
+
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
+ MSchedGraphEdge::OutputDep, 1);
}
else
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
- MSchedGraphEdge::OutputDep, 1);
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
+ MSchedGraphEdge::OutputDep, 1);
}
//Dest node is a read
else {
if(!srcIsUse || srcIsUseandDef)
- srcNode->addOutEdge(Nodes[j].second, MSchedGraphEdge::MachineRegister,
- MSchedGraphEdge::TrueDep,1 );
+ srcNode->addOutEdge(Nodes[j].second,
+ MSchedGraphEdge::MachineRegister,
+ MSchedGraphEdge::TrueDep,1 );
}
@@ -589,8 +656,9 @@
//Add edges between all loads and stores
//Can be less strict with alias analysis and data dependence analysis.
-void MSchedGraph::addMemEdges(const std::vector<MSchedGraphNode*>& memInst, DependenceAnalyzer &DA,
- std::map<MachineInstr*, Instruction*> &machineTollvm) {
+void MSchedGraph::addMemEdges(const std::vector<MSchedGraphNode*>& memInst,
+ DependenceAnalyzer &DA,
+ std::map<MachineInstr*, Instruction*> &machineTollvm) {
//Get Target machine instruction info
const TargetInstrInfo *TMI = Target.getInstrInfo();
@@ -603,68 +671,73 @@
//Get the machine opCode to determine type of memory instruction
MachineOpCode srcNodeOpCode = srcInst->getOpcode();
+
+ //All instructions after this one in execution order have an
+ //iteration delay of 0
+ for(unsigned destIndex = 0; destIndex < memInst.size(); ++destIndex) {
- //All instructions after this one in execution order have an iteration delay of 0
- for(unsigned destIndex = srcIndex + 1; destIndex < memInst.size(); ++destIndex) {
+ //No self loops
+ if(destIndex == srcIndex)
+ continue;
MachineInstr *destInst = (MachineInstr*) memInst[destIndex]->getInst();
DEBUG(std::cerr << "MInst1: " << *srcInst << "\n");
- DEBUG(std::cerr << "Inst1: " << *machineTollvm[srcInst] << "\n");
DEBUG(std::cerr << "MInst2: " << *destInst << "\n");
- DEBUG(std::cerr << "Inst2: " << *machineTollvm[destInst] << "\n");
-
- DependenceResult dr = DA.getDependenceInfo(machineTollvm[srcInst], machineTollvm[destInst]);
-
- for(std::vector<Dependence>::iterator d = dr.dependences.begin(), de = dr.dependences.end();
- d != de; ++d) {
- //Add edge from load to store
- memInst[srcIndex]->addOutEdge(memInst[destIndex],
- MSchedGraphEdge::MemoryDep,
- d->getDepType(), d->getIteDiff());
-
- }
-
- }
-
- //All instructions before the src in execution order have an iteration delay of 1
- for(unsigned destIndex = 0; destIndex < srcIndex; ++destIndex) {
-
- MachineInstr *destInst = (MachineInstr*) memInst[destIndex]->getInst();
- bool malias = false;
-
- //source is a Load, so add anti-dependencies (store after load)
- if(TMI->isLoad(srcNodeOpCode)) {
+
+ //Assuming instructions without corresponding llvm instructions
+ //are from constant pools.
+ if (!machineTollvm.count(srcInst) || !machineTollvm.count(destInst))
+ continue;
+
+ bool useDepAnalyzer = true;
- //Get the Value* that we are reading from the load, always the first op
- const MachineOperand &mOp = srcInst->getOperand(0);
- const MachineOperand &mOp2 = destInst->getOperand(0);
-
- if(mOp.hasAllocatedReg())
- if(mOp.getReg() == SparcV9::g0)
+ //Some machine loads and stores are generated by casts, so be
+ //conservative and always add deps
+ Instruction *srcLLVM = machineTollvm[srcInst];
+ Instruction *destLLVM = machineTollvm[destInst];
+ if(!isa<LoadInst>(srcLLVM)
+ && !isa<StoreInst>(srcLLVM)) {
+ if(isa<BinaryOperator>(srcLLVM)) {
+ if(isa<ConstantFP>(srcLLVM->getOperand(0)) || isa<ConstantFP>(srcLLVM->getOperand(1)))
continue;
- else
- malias = true;
- if(mOp2.hasAllocatedReg())
- if(mOp2.getReg() == SparcV9::g0)
+ }
+ useDepAnalyzer = false;
+ }
+ if(!isa<LoadInst>(destLLVM)
+ && !isa<StoreInst>(destLLVM)) {
+ if(isa<BinaryOperator>(destLLVM)) {
+ if(isa<ConstantFP>(destLLVM->getOperand(0)) || isa<ConstantFP>(destLLVM->getOperand(1)))
continue;
- else
- malias = true;
-
- //Only add the edge if we can't verify that they do not alias
- /*if(AA.alias(mOp2.getVRegValue(),
- (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()),
- mOp.getVRegValue(),
- (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType()))
- != AliasAnalysis::NoAlias) {*/
- if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode()))
- memInst[srcIndex]->addOutEdge(memInst[destIndex],
- MSchedGraphEdge::MemoryDep,
- MSchedGraphEdge::AntiDep, 1);
- //}
+ }
+ useDepAnalyzer = false;
}
- if(TMI->isStore(srcNodeOpCode)) {
-
+
+ //Use dep analysis when we have corresponding llvm loads/stores
+ if(useDepAnalyzer) {
+ bool srcBeforeDest = true;
+ if(destIndex < srcIndex)
+ srcBeforeDest = false;
+
+ DependenceResult dr = DA.getDependenceInfo(machineTollvm[srcInst],
+ machineTollvm[destInst],
+ srcBeforeDest);
+
+ for(std::vector<Dependence>::iterator d = dr.dependences.begin(),
+ de = dr.dependences.end(); d != de; ++d) {
+ //Add edge from load to store
+ memInst[srcIndex]->addOutEdge(memInst[destIndex],
+ MSchedGraphEdge::MemoryDep,
+ d->getDepType(), d->getIteDiff());
+
+ }
+ }
+ //Otherwise, we can not do any further analysis and must make a dependence
+ else {
+
+ //Get the machine opCode to determine type of memory instruction
+ MachineOpCode destNodeOpCode = destInst->getOpcode();
+
//Get the Value* that we are reading from the load, always the first op
const MachineOperand &mOp = srcInst->getOperand(0);
const MachineOperand &mOp2 = destInst->getOperand(0);
@@ -672,33 +745,31 @@
if(mOp.hasAllocatedReg())
if(mOp.getReg() == SparcV9::g0)
continue;
- else
- malias = true;
if(mOp2.hasAllocatedReg())
if(mOp2.getReg() == SparcV9::g0)
continue;
- else
- malias = true;
- //Only add the edge if we can't verify that they do not alias
- /*if(AA.alias(mOp2.getVRegValue(),
- (unsigned)TD.getTypeSize(mOp2.getVRegValue()->getType()),
- mOp.getVRegValue(),
- (unsigned)TD.getTypeSize(mOp.getVRegValue()->getType()))
- != AliasAnalysis::NoAlias) {*/
-
- if(TMI->isStore(memInst[destIndex]->getInst()->getOpcode()))
- memInst[srcIndex]->addOutEdge(memInst[destIndex],
- MSchedGraphEdge::MemoryDep,
- MSchedGraphEdge::OutputDep, 1);
+ DEBUG(std::cerr << "Adding dependence for machine instructions\n");
+ //Load-Store deps
+ if(TMI->isLoad(srcNodeOpCode)) {
+
+ if(TMI->isStore(destNodeOpCode))
+ memInst[srcIndex]->addOutEdge(memInst[destIndex],
+ MSchedGraphEdge::MemoryDep,
+ MSchedGraphEdge::AntiDep, 0);
+ }
+ else if(TMI->isStore(srcNodeOpCode)) {
+ if(TMI->isStore(destNodeOpCode))
+ memInst[srcIndex]->addOutEdge(memInst[destIndex],
+ MSchedGraphEdge::MemoryDep,
+ MSchedGraphEdge::OutputDep, 0);
+
else
- memInst[srcIndex]->addOutEdge(memInst[destIndex],
- MSchedGraphEdge::MemoryDep,
- MSchedGraphEdge::TrueDep, 1);
- //}
+ memInst[srcIndex]->addOutEdge(memInst[destIndex],
+ MSchedGraphEdge::MemoryDep,
+ MSchedGraphEdge::TrueDep, 0);
+ }
}
-
}
-
}
}
Index: llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h
diff -u llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h:1.11 llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h:1.12
--- llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h:1.11 Thu Apr 21 18:29:16 2005
+++ llvm/lib/Target/SparcV9/ModuloScheduling/MSchedGraph.h Fri Apr 22 01:32:48 2005
@@ -258,6 +258,9 @@
//Copy constructor with maps to link old nodes to new nodes
MSchedGraph(const MSchedGraph &G, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes);
+
+ //Print graph
+ void print(std::ostream &os) const;
//Deconstructor!
~MSchedGraph();
@@ -265,6 +268,7 @@
//Add or delete nodes from the Graph
void addNode(const MachineInstr* MI, MSchedGraphNode *node);
void deleteNode(MSchedGraphNode *node);
+ int totalDelay();
//iterators
typedef std::map<const MachineInstr*, MSchedGraphNode*>::iterator iterator;
Index: llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp
diff -u llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp:1.47 llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp:1.48
--- llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp:1.47 Thu Apr 21 18:29:16 2005
+++ llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.cpp Fri Apr 22 01:32:48 2005
@@ -74,12 +74,23 @@
//Graph Traits for printing out the dependence graph
namespace llvm {
+
+ //Loop statistics
Statistic<> ValidLoops("modulosched-validLoops", "Number of candidate loops modulo-scheduled");
- Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
- Statistic<> IncreasedII("modulosched-increasedII", "Number of times we had to increase II");
+ Statistic<> JumboBB("modulosched-jumboBB", "Basic Blocks with more then 100 instructions");
+ Statistic<> LoopsWithCalls("modulosched-loopCalls", "Loops with calls");
+ Statistic<> LoopsWithCondMov("modulosched-loopCondMov", "Loops with conditional moves");
+ Statistic<> InvalidLoops("modulosched-invalidLoops", "Loops with unknown trip counts or loop invariant trip counts");
Statistic<> SingleBBLoops("modulosched-singeBBLoops", "Number of single basic block loops");
+
+ //Scheduling Statistics
+ Statistic<> MSLoops("modulosched-schedLoops", "Number of loops successfully modulo-scheduled");
Statistic<> NoSched("modulosched-noSched", "No schedule");
Statistic<> SameStage("modulosched-sameStage", "Max stage is 0");
+ Statistic<> ResourceConstraint("modulosched-resourceConstraint", "Loops constrained by resources");
+ Statistic<> RecurrenceConstraint("modulosched-recurrenceConstraint", "Loops constrained by recurrences");
+ Statistic<> FinalIISum("modulosched-finalIISum", "Sum of all final II");
+ Statistic<> IISum("modulosched-IISum", "Sum of all theoretical II");
template<>
struct DOTGraphTraits<MSchedGraph*> : public DefaultDOTGraphTraits {
@@ -142,7 +153,7 @@
/// 3) Scheduling
///
bool ModuloSchedulingPass::runOnFunction(Function &F) {
- alarm(300);
+ alarm(100);
bool Changed = false;
int numMS = 0;
@@ -160,9 +171,14 @@
//Iterate over BasicBlocks and put them into our worklist if they are valid
for (MachineFunction::iterator BI = MF.begin(); BI != MF.end(); ++BI)
- if(MachineBBisValid(BI)) {
- Worklist.push_back(&*BI);
- ++ValidLoops;
+ if(MachineBBisValid(BI)) {
+ if(BI->size() < 100) {
+ Worklist.push_back(&*BI);
+ ++ValidLoops;
+ }
+ else
+ ++JumboBB;
+ std::cerr << "BB Size: " << BI->size() << "\n";
}
defaultInst = 0;
@@ -174,6 +190,7 @@
BE = Worklist.end(); BI != BE; ++BI) {
//Print out BB for debugging
+ DEBUG(std::cerr << "BB Size: " << (*BI)->size() << "\n");
DEBUG(std::cerr << "ModuloScheduling BB: \n"; (*BI)->print(std::cerr));
//Print out LLVM BB
@@ -195,6 +212,7 @@
//Write Graph out to file
DEBUG(WriteGraphToFile(std::cerr, F.getName(), MSG));
+ DEBUG(MSG->print(std::cerr));
//Calculate Resource II
int ResMII = calculateResMII(*BI);
@@ -204,11 +222,15 @@
DEBUG(std::cerr << "Number of reccurrences found: " << recurrenceList.size() << "\n");
-
-
-
//Our starting initiation interval is the maximum of RecMII and ResMII
+ if(RecMII < ResMII)
+ ++RecurrenceConstraint;
+ else
+ ++ResourceConstraint;
+
II = std::max(RecMII, ResMII);
+ int mII = II;
+ IISum += mII;
//Print out II, RecMII, and ResMII
DEBUG(std::cerr << "II starts out as " << II << " ( RecMII=" << RecMII << " and ResMII=" << ResMII << ")\n");
@@ -252,7 +274,7 @@
});
//Finally schedule nodes
- bool haveSched = computeSchedule(*BI);
+ bool haveSched = computeSchedule(*BI, MSG);
//Print out final schedule
DEBUG(schedule.print(std::cerr));
@@ -363,9 +385,11 @@
MachineOpCode OC = I->getOpcode();
//Look for calls
- if(TMI->isCall(OC))
+ if(TMI->isCall(OC)) {
+ ++LoopsWithCalls;
return false;
-
+ }
+
//Look for conditional move
if(OC == V9::MOVRZr || OC == V9::MOVRZi || OC == V9::MOVRLEZr || OC == V9::MOVRLEZi
|| OC == V9::MOVRLZr || OC == V9::MOVRLZi || OC == V9::MOVRNZr || OC == V9::MOVRNZi
@@ -373,8 +397,10 @@
|| OC == V9::MOVRGEZi || OC == V9::MOVLEr || OC == V9::MOVLEi || OC == V9::MOVLEUr
|| OC == V9::MOVLEUi || OC == V9::MOVFLEr || OC == V9::MOVFLEi
|| OC == V9::MOVNEr || OC == V9::MOVNEi || OC == V9::MOVNEGr || OC == V9::MOVNEGi
- || OC == V9::MOVFNEr || OC == V9::MOVFNEi)
+ || OC == V9::MOVFNEr || OC == V9::MOVFNEi) {
+ ++LoopsWithCondMov;
return false;
+ }
indexMap[I] = count;
@@ -406,14 +432,19 @@
if(Instruction *I = dyn_cast<Instruction>(cond))
if(I->getParent() == BB) {
- if (!assocIndVar(I, indVar, stack, BB))
+ if (!assocIndVar(I, indVar, stack, BB)) {
+ ++InvalidLoops;
return false;
+ }
}
- else
+ else {
+ ++InvalidLoops;
return false;
- else
+ }
+ else {
+ ++InvalidLoops;
return false;
-
+ }
//The indVar set must be >= 3 instructions for this loop to match (FIX ME!)
if(indVar.size() < 3 )
return false;
@@ -523,7 +554,7 @@
//Loop over resources in each cycle and increments their usage count
for(unsigned i=0; i < resources.size(); ++i)
for(unsigned j=0; j < resources[i].size(); ++j) {
- if( resourceUsageCount.find(resources[i][j]) == resourceUsageCount.end()) {
+ if(!resourceUsageCount.count(resources[i][j])) {
resourceUsageCount[resources[i][j]] = 1;
}
else {
@@ -913,67 +944,8 @@
for(std::set<MSchedGraphNode*>::iterator I = AkV.begin(), E = AkV.end(); I != E; ++I) {
if(*I == s) {
//We have a circuit, so add it to our list
-
- std::vector<MSchedGraphNode*> recc;
- //Dump recurrence for now
- DEBUG(std::cerr << "Starting Recc\n");
-
- int totalDelay = 0;
- int totalDistance = 0;
- MSchedGraphNode *lastN = 0;
- MSchedGraphNode *start = 0;
- MSchedGraphNode *end = 0;
-
- //Loop over recurrence, get delay and distance
- for(std::vector<MSchedGraphNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) {
- totalDelay += (*N)->getLatency();
- if(lastN) {
- int iteDiff = (*N)->getInEdge(lastN).getIteDiff();
- totalDistance += iteDiff;
-
- if(iteDiff > 0) {
- start = lastN;
- end = *N;
- }
- }
- //Get the original node
- lastN = *N;
- recc.push_back(newNodes[*N]);
-
- DEBUG(std::cerr << *lastN << "\n");
- }
-
- //Get the loop edge
- totalDistance += lastN->getIteDiff(*stack.begin());
-
- DEBUG(std::cerr << "End Recc\n");
+ addRecc(stack, newNodes);
f = true;
- CircCount++;
-
- if(start && end) {
- //Insert reccurrence into the list
- DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
- edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
- }
- else {
- //Insert reccurrence into the list
- DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n");
- edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN])));
-
- }
- //Adjust II until we get close to the inequality delay - II*distance <= 0
- int RecMII = II; //Starting value
- int value = totalDelay-(RecMII * totalDistance);
- int lastII = II;
- while(value <= 0) {
-
- lastII = RecMII;
- RecMII--;
- value = totalDelay-(RecMII * totalDistance);
- }
-
- recurrenceList.insert(std::make_pair(lastII, recc));
-
}
else if(!blocked.count(*I)) {
if(circuit(*I, stack, blocked, SCC, s, B, II, newNodes))
@@ -1000,6 +972,70 @@
}
+void ModuloSchedulingPass::addRecc(std::vector<MSchedGraphNode*> &stack, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes) {
+ std::vector<MSchedGraphNode*> recc;
+ //Dump recurrence for now
+ DEBUG(std::cerr << "Starting Recc\n");
+
+ int totalDelay = 0;
+ int totalDistance = 0;
+ MSchedGraphNode *lastN = 0;
+ MSchedGraphNode *start = 0;
+ MSchedGraphNode *end = 0;
+
+ //Loop over recurrence, get delay and distance
+ for(std::vector<MSchedGraphNode*>::iterator N = stack.begin(), NE = stack.end(); N != NE; ++N) {
+ DEBUG(std::cerr << **N << "\n");
+ totalDelay += (*N)->getLatency();
+ if(lastN) {
+ int iteDiff = (*N)->getInEdge(lastN).getIteDiff();
+ totalDistance += iteDiff;
+
+ if(iteDiff > 0) {
+ start = lastN;
+ end = *N;
+ }
+ }
+ //Get the original node
+ lastN = *N;
+ recc.push_back(newNodes[*N]);
+
+
+ }
+
+ //Get the loop edge
+ totalDistance += lastN->getIteDiff(*stack.begin());
+
+ DEBUG(std::cerr << "End Recc\n");
+ CircCount++;
+
+ if(start && end) {
+ //Insert reccurrence into the list
+ DEBUG(std::cerr << "Ignore Edge from!!: " << *start << " to " << *end << "\n");
+ edgesToIgnore.insert(std::make_pair(newNodes[start], (newNodes[end])->getInEdgeNum(newNodes[start])));
+ }
+ else {
+ //Insert reccurrence into the list
+ DEBUG(std::cerr << "Ignore Edge from: " << *lastN << " to " << **stack.begin() << "\n");
+ edgesToIgnore.insert(std::make_pair(newNodes[lastN], newNodes[(*stack.begin())]->getInEdgeNum(newNodes[lastN])));
+
+ }
+ //Adjust II until we get close to the inequality delay - II*distance <= 0
+ int RecMII = II; //Starting value
+ int value = totalDelay-(RecMII * totalDistance);
+ int lastII = II;
+ while(value < 0) {
+
+ lastII = RecMII;
+ RecMII--;
+ value = totalDelay-(RecMII * totalDistance);
+ }
+
+ recurrenceList.insert(std::make_pair(lastII, recc));
+
+}
+
+
void ModuloSchedulingPass::findAllCircuits(MSchedGraph *g, int II) {
CircCount = 0;
@@ -1086,12 +1122,13 @@
if(Vk.size() > 1) {
circuit(s, stack, blocked, Vk, s, B, II, newNodes);
+ //Delete nodes from the graph
//Find all nodes up to s and delete them
std::vector<MSchedGraphNode*> nodesToRemove;
nodesToRemove.push_back(s);
for(MSchedGraph::iterator N = MSG->begin(), NE = MSG->end(); N != NE; ++N) {
if(N->second < s )
- nodesToRemove.push_back(N->second);
+ nodesToRemove.push_back(N->second);
}
for(std::vector<MSchedGraphNode*>::iterator N = nodesToRemove.begin(), NE = nodesToRemove.end(); N != NE; ++N) {
DEBUG(std::cerr << "Deleting Node: " << **N << "\n");
@@ -1100,7 +1137,7 @@
}
else
break;
- }
+ }
DEBUG(std::cerr << "Num Circuits found: " << CircCount << "\n");
}
@@ -1253,17 +1290,21 @@
void ModuloSchedulingPass::computePartialOrder() {
TIME_REGION(X, "calculatePartialOrder");
+
+ DEBUG(std::cerr << "Computing Partial Order\n");
- //Only push BA branches onto the final node order, we put other branches after it
- //FIXME: Should we really be pushing branches on it a specific order instead of relying
- //on BA being there?
- std::vector<MSchedGraphNode*> branches;
+ //Only push BA branches onto the final node order, we put other
+ //branches after it FIXME: Should we really be pushing branches on
+ //it a specific order instead of relying on BA being there?
- //Steps to add a recurrence to the partial order
- // 1) Find reccurrence with the highest RecMII. Add it to the partial order.
- // 2) For each recurrence with decreasing RecMII, add it to the partial order along with
- // any nodes that connect this recurrence to recurrences already in the partial order
- for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::reverse_iterator
+ std::vector<MSchedGraphNode*> branches;
+
+ //Steps to add a recurrence to the partial order 1) Find reccurrence
+ //with the highest RecMII. Add it to the partial order. 2) For each
+ //recurrence with decreasing RecMII, add it to the partial order
+ //along with any nodes that connect this recurrence to recurrences
+ //already in the partial order
+ for(std::set<std::pair<int, std::vector<MSchedGraphNode*> > >::reverse_iterator
I = recurrenceList.rbegin(), E=recurrenceList.rend(); I !=E; ++I) {
std::set<MSchedGraphNode*> new_recurrence;
@@ -1296,6 +1337,10 @@
std::vector<MSchedGraphNode*> path;
std::set<MSchedGraphNode*> nodesToAdd;
+ //Dump recc we are dealing with (minus nodes already in PO)
+ DEBUG(std::cerr << "Recc: ");
+ DEBUG(for(std::set<MSchedGraphNode*>::iterator R = new_recurrence.begin(), RE = new_recurrence.end(); R != RE; ++R) { std::cerr << **R ; });
+
//Add nodes that connect this recurrence to recurrences in the partial path
for(std::set<MSchedGraphNode*>::iterator N = new_recurrence.begin(),
NE = new_recurrence.end(); N != NE; ++N)
@@ -1318,6 +1363,15 @@
partialOrder.push_back(new_recurrence);
+
+ //Dump out partial order
+ DEBUG(for(std::vector<std::set<MSchedGraphNode*> >::iterator I = partialOrder.begin(),
+ E = partialOrder.end(); I !=E; ++I) {
+ std::cerr << "Start set in PO\n";
+ for(std::set<MSchedGraphNode*>::iterator J = I->begin(), JE = I->end(); J != JE; ++J)
+ std::cerr << "PO:" << **J << "\n";
+ });
+
}
}
@@ -1649,7 +1703,7 @@
//return FinalNodeOrder;
}
-bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB) {
+bool ModuloSchedulingPass::computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG) {
TIME_REGION(X, "computeSchedule");
@@ -1657,7 +1711,7 @@
//FIXME: Should be set to max II of the original loop
//Cap II in order to prevent infinite loop
- int capII = 100;
+ int capII = MSG->totalDelay();
while(!success) {
@@ -1768,8 +1822,7 @@
success = scheduleNode(*I, EarlyStart, EarlyStart + II - 1);
if(!success) {
- ++IncreasedII;
- ++II;
+ ++II;
schedule.clear();
break;
}
@@ -1781,11 +1834,11 @@
success = schedule.constructKernel(II, branches, indVarInstrs[BB]);
DEBUG(std::cerr << "Done Constructing Schedule Kernel\n");
if(!success) {
- ++IncreasedII;
++II;
schedule.clear();
}
DEBUG(std::cerr << "Final II: " << II << "\n");
+ FinalIISum += II;
}
if(II >= capII) {
Index: llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h
diff -u llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h:1.28 llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h:1.29
--- llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h:1.28 Thu Apr 21 18:29:16 2005
+++ llvm/lib/Target/SparcV9/ModuloScheduling/ModuloScheduling.h Fri Apr 22 01:32:48 2005
@@ -19,6 +19,8 @@
#include "llvm/Pass.h"
#include "DependenceAnalyzer.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include <set>
namespace llvm {
@@ -107,7 +109,9 @@
void unblock(MSchedGraphNode *u, std::set<MSchedGraphNode*> &blocked,
std::map<MSchedGraphNode*, std::set<MSchedGraphNode*> > &B);
- void searchPath(MSchedGraphNode *node,
+ void addRecc(std::vector<MSchedGraphNode*> &stack, std::map<MSchedGraphNode*, MSchedGraphNode*> &newNodes);
+
+ void searchPath(MSchedGraphNode *node,
std::vector<MSchedGraphNode*> &path,
std::set<MSchedGraphNode*> &nodesToAdd);
@@ -117,8 +121,8 @@
void computePartialOrder();
- bool computeSchedule(const MachineBasicBlock *BB);
- bool scheduleNode(MSchedGraphNode *node,
+ bool computeSchedule(const MachineBasicBlock *BB, MSchedGraph *MSG);
+ bool scheduleNode(MSchedGraphNode *node,
int start, int end);
void predIntersect(std::set<MSchedGraphNode*> &CurrentSet, std::set<MSchedGraphNode*> &IntersectResult);
@@ -148,6 +152,12 @@
// getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ /// HACK: We don't actually need loopinfo or scev, but we have
+ /// to say we do so that the pass manager does not delete it
+ /// before we run.
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+
AU.addRequired<DependenceAnalyzer>();
}
More information about the llvm-commits
mailing list