[llvm] r259213 - Temporarily revert "[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten."
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 29 09:22:43 PST 2016
Author: jonpa
Date: Fri Jan 29 11:22:43 2016
New Revision: 259213
URL: http://llvm.org/viewvc/llvm-project?rev=259213&view=rev
Log:
Temporarily revert "[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten."
Some buildbot failures needs to be debugged.
Modified:
llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h
llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
Modified: llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h (original)
+++ llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h Fri Jan 29 11:22:43 2016
@@ -27,8 +27,6 @@ class MachineMemOperand;
class raw_ostream;
raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
-class PseudoSourceValue;
-raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
/// Special value supplied for machine level alias analysis. It indicates that
/// a memory access references the functions stack frame (e.g., a spill slot),
@@ -47,8 +45,6 @@ public:
private:
PSVKind Kind;
- friend raw_ostream &llvm::operator<<(raw_ostream &OS,
- const PseudoSourceValue* PSV);
friend class MachineMemOperand; // For printCustom().
Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Fri Jan 29 11:22:43 2016
@@ -396,17 +396,6 @@ namespace llvm {
/// specified node.
bool addPred(const SDep &D, bool Required = true);
- /// addPredBarrier - This adds a barrier edge to SU by calling
- /// addPred(), with latency 0 generally or latency 1 for a store
- /// followed by a load.
- bool addPredBarrier(SUnit *SU) {
- SDep Dep(SU, SDep::Barrier);
- unsigned TrueMemOrderLatency =
- ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0);
- Dep.setLatency(TrueMemOrderLatency);
- return addPred(Dep);
- }
-
/// removePred - This removes the specified edge as a pred of the current
/// node if it exists. It also removes the current node as a successor of
/// the specified node.
Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Fri Jan 29 11:22:43 2016
@@ -15,14 +15,12 @@
#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
-#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SparseMultiSet.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include <list>
namespace llvm {
class MachineFrameInfo;
@@ -86,10 +84,6 @@ namespace llvm {
typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
VReg2SUnitOperIdxMultiMap;
- typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
- typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
- UnderlyingObjectsVector;
-
/// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
/// MachineInstrs.
class ScheduleDAGInstrs : public ScheduleDAG {
@@ -155,66 +149,10 @@ namespace llvm {
/// Tracks the last instructions in this region using each virtual register.
VReg2SUnitOperIdxMultiMap CurrentVRegUses;
- AliasAnalysis *AAForDep;
-
- /// Remember a generic side-effecting instruction as we proceed.
- /// No other SU ever gets scheduled around it (except in the special
- /// case of a huge region that gets reduced).
- SUnit *BarrierChain;
-
- public:
-
- /// A list of SUnits, used in Value2SUsMap, during DAG construction.
- /// Note: to gain speed it might be worth investigating an optimized
- /// implementation of this data structure, such as a singly linked list
- /// with a memory pool (SmallVector was tried but slow and SparseSet is not
- /// applicable).
- typedef std::list<SUnit *> SUList;
- protected:
- /// A map from ValueType to SUList, used during DAG construction,
- /// as a means of remembering which SUs depend on which memory
- /// locations.
- class Value2SUsMap;
-
- /// Remove in FIFO order some SUs from huge maps.
- void reduceHugeMemNodeMaps(Value2SUsMap &stores,
- Value2SUsMap &loads, unsigned N);
-
- /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis
- /// and Target fail to deny the dependency.
- void addChainDependency(SUnit *SUa, SUnit *SUb,
- unsigned Latency = 0);
-
- /// Add dependencies as needed from all SUs in list to SU.
- void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) {
- for (auto *su : sus)
- addChainDependency(SU, su, Latency);
- }
-
- /// Add dependencies as needed from all SUs in map, to SU.
- void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
-
- /// Add dependencies as needed to SU, from all SUs mapped to V.
- void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
- ValueType V);
-
- /// Add barrier chain edges from all SUs in map, and then clear
- /// the map. This is equivalent to insertBarrierChain(), but
- /// optimized for the common case where the new BarrierChain (a
- /// global memory object) has a higher NodeNum than all SUs in
- /// map. It is assumed BarrierChain has been set before calling
- /// this.
- void addBarrierChain(Value2SUsMap &map);
-
- /// Insert a barrier chain in a huge region, far below current
- /// SU. Add barrier chain edges from all SUs in map with higher
- /// NodeNums than this new BarrierChain, and remove them from
- /// map. It is assumed BarrierChain has been set before calling
- /// this.
- void insertBarrierChain(Value2SUsMap &map);
-
- /// For an unanalyzable memory access, this Value is used in maps.
- UndefValue *UnknownValue;
+ /// PendingLoads - Remember where unknown loads are after the most recent
+ /// unknown store, as we iterate. As with Defs and Uses, this is here
+ /// to minimize construction/destruction.
+ std::vector<SUnit *> PendingLoads;
/// DbgValues - Remember instruction that precedes DBG_VALUE.
/// These are generated by buildSchedGraph but persist so they can be
Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Fri Jan 29 11:22:43 2016
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -27,8 +28,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDFS.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -51,42 +50,12 @@ static cl::opt<bool> EnableAASchedMI("en
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
-// Note: the two options below might be used in tuning compile time vs
-// output quality. Setting HugeRegion so large that it will never be
-// reached means best-effort, but may be slow.
-
-// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
-// together hold this many SUs, a reduction of maps will be done.
-static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
- cl::init(1000), cl::desc("The limit to use while constructing the DAG "
- "prior to scheduling, at which point a trade-off "
- "is made to avoid excessive compile time."));
-
-static cl::opt<unsigned> ReductionSize("dag-maps-reduction-size", cl::Hidden,
- cl::desc("A huge scheduling region will have maps reduced by this many "
- "nodes at a time. Defaults to HugeRegion / 2."));
-
-static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- dbgs() << "{ ";
- for (auto *su : L) {
- dbgs() << "SU(" << su->NodeNum << ")";
- if (su != L.back())
- dbgs() << ", ";
- }
- dbgs() << "}\n";
-#endif
-}
-
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
- TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
- UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction()->getContext()))),
- FirstDbgValue(nullptr) {
+ TrackLaneMasks(false), FirstDbgValue(nullptr) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -152,6 +121,10 @@ static void getUnderlyingObjects(const V
} while (!Working.empty());
}
+typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
+typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
+UnderlyingObjectsVector;
+
/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
/// information and it can be tracked to a normal reference to a known
/// object, return the Value for that object.
@@ -571,31 +544,41 @@ static inline bool isUnsafeMemoryObject(
return true;
}
- if ((*MI->memoperands_begin())->getValue() == nullptr)
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
return true;
+ SmallVector<Value *, 4> Objs;
+ getUnderlyingObjects(V, Objs, DL);
+ for (Value *V : Objs) {
+ // Does this pointer refer to a distinct and identifiable object?
+ if (!isIdentifiedObject(V))
+ return true;
+ }
+
return false;
}
/// This returns true if the two MIs need a chain edge between them.
-/// This is called on normal stores and loads.
+/// If these are not even memory operations, we still may need
+/// chain deps between them. The question really is - could
+/// these two MIs be reordered during scheduling from memory dependency
+/// point of view.
static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
const DataLayout &DL, MachineInstr *MIa,
MachineInstr *MIb) {
const MachineFunction *MF = MIa->getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- assert ((MIa->mayStore() || MIb->mayStore()) &&
- "Dependency checked between two loads");
-
- // buildSchedGraph() will clear list of stores if not using AA,
- // which means all stores have to be chained without AA.
- if (!AA && MIa->mayStore() && MIb->mayStore())
- return true;
-
- // Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
+ // Cover a trivial case - no edge is need to itself.
+ if (MIa == MIb)
return false;
+
+ // Let the target decide if memory accesses cannot possibly overlap.
+ if ((MIa->mayLoad() || MIa->mayStore()) &&
+ (MIb->mayLoad() || MIb->mayStore()))
+ if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
+ return false;
// FIXME: Need to handle multiple memory operands to support all targets.
if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
@@ -604,6 +587,11 @@ static bool MIsNeedChainEdge(AliasAnalys
if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
return true;
+ // If we are dealing with two "normal" loads, we do not need an edge
+ // between them - they could be reordered.
+ if (!MIa->mayStore() && !MIb->mayStore())
+ return false;
+
// To this point analysis is generic. From here on we do need AA.
if (!AA)
return true;
@@ -646,15 +634,106 @@ static bool MIsNeedChainEdge(AliasAnalys
return (AAResult != NoAlias);
}
-/// Check whether two objects need a chain edge and add it if needed.
-void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
- unsigned Latency) {
- if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
- SUb->getInstr())) {
- SDep Dep(SUa, SDep::MayAliasMem);
- Dep.setLatency(Latency);
+/// This recursive function iterates over chain deps of SUb looking for
+/// "latest" node that needs a chain edge to SUa.
+static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SUa, SUnit *SUb,
+ SUnit *ExitSU, unsigned *Depth,
+ SmallPtrSetImpl<const SUnit *> &Visited) {
+ if (!SUa || !SUb || SUb == ExitSU)
+ return *Depth;
+
+ // Remember visited nodes.
+ if (!Visited.insert(SUb).second)
+ return *Depth;
+ // If there is _some_ dependency already in place, do not
+ // descend any further.
+ // TODO: Need to make sure that if that dependency got eliminated or ignored
+ // for any reason in the future, we would not violate DAG topology.
+ // Currently it does not happen, but makes an implicit assumption about
+ // future implementation.
+ //
+ // Independently, if we encounter node that is some sort of global
+ // object (like a call) we already have full set of dependencies to it
+ // and we can stop descending.
+ if (SUa->isSucc(SUb) ||
+ isGlobalMemoryObject(AA, SUb->getInstr()))
+ return *Depth;
+
+ // If we do need an edge, or we have exceeded depth budget,
+ // add that edge to the predecessors chain of SUb,
+ // and stop descending.
+ if (*Depth > 200 ||
+ MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
+ SUb->addPred(SDep(SUa, SDep::MayAliasMem));
+ return *Depth;
+ }
+ // Track current depth.
+ (*Depth)++;
+ // Iterate over memory dependencies only.
+ for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
+ I != E; ++I)
+ if (I->isNormalMemoryOrBarrier())
+ iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+ return *Depth;
+}
+
+/// This function assumes that "downward" from SU there exist
+/// tail/leaf of already constructed DAG. It iterates downward and
+/// checks whether SU can be aliasing any node dominated
+/// by it.
+static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
+ std::set<SUnit *> &CheckList,
+ unsigned LatencyToLoad) {
+ if (!SU)
+ return;
+
+ SmallPtrSet<const SUnit*, 16> Visited;
+ unsigned Depth = 0;
+
+ for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
+ I != IE; ++I) {
+ if (SU == *I)
+ continue;
+ if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
+ SDep Dep(SU, SDep::MayAliasMem);
+ Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+ (*I)->addPred(Dep);
+ }
+
+ // Iterate recursively over all previously added memory chain
+ // successors. Keep track of visited nodes.
+ for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
+ JE = (*I)->Succs.end(); J != JE; ++J)
+ if (J->isNormalMemoryOrBarrier())
+ iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
+ Visited);
+ }
+}
+
+/// Check whether two objects need a chain edge, if so, add it
+/// otherwise remember the rejected SU.
+static inline void addChainDependency(AliasAnalysis *AA,
+ const MachineFrameInfo *MFI,
+ const DataLayout &DL, SUnit *SUa,
+ SUnit *SUb, std::set<SUnit *> &RejectList,
+ unsigned TrueMemOrderLatency = 0,
+ bool isNormalMemory = false) {
+ // If this is a false dependency,
+ // do not add the edge, but remember the rejected node.
+ if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
+ SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
SUb->addPred(Dep);
}
+ else {
+ // Duplicate entries should be ignored.
+ RejectList.insert(SUb);
+ DEBUG(dbgs() << "\tReject chain dep between SU("
+ << SUa->NodeNum << ") and SU("
+ << SUb->NodeNum << ")\n");
+ }
}
/// Create an SUnit for each real instruction, numbered in top-down topological
@@ -753,122 +832,6 @@ void ScheduleDAGInstrs::collectVRegUses(
}
}
-class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
-
- /// Current total number of SUs in map.
- unsigned NumNodes;
-
- /// 1 for loads, 0 for stores. (see comment in SUList)
- unsigned TrueMemOrderLatency;
-public:
-
- Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
-
- /// To keep NumNodes up to date, insert() is used instead of
- /// this operator w/ push_back().
- ValueType &operator[](const SUList &Key) {
- llvm_unreachable("Don't use. Use insert() instead."); };
-
- /// Add SU to the SUList of V. If Map grows huge, reduce its size
- /// by calling reduce().
- void inline insert(SUnit *SU, ValueType V) {
- MapVector::operator[](V).push_back(SU);
- NumNodes++;
- }
-
- /// Clears the list of SUs mapped to V.
- void inline clearList(ValueType V) {
- iterator Itr = find(V);
- if (Itr != end()) {
- assert (NumNodes >= Itr->second.size());
- NumNodes -= Itr->second.size();
-
- Itr->second.clear();
- }
- }
-
- /// Clears map from all contents.
- void clear() {
- MapVector<ValueType, SUList>::clear();
- NumNodes = 0;
- }
-
- unsigned inline size() const { return NumNodes; }
-
- /// Count the number of SUs in this map after a reduction.
- void reComputeSize(void) {
- NumNodes = 0;
- for (auto &I : *this)
- NumNodes += I.second.size();
- }
-
- unsigned inline getTrueMemOrderLatency() const {
- return TrueMemOrderLatency;
- }
-
- void dump();
-};
-
-void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
- Value2SUsMap &Val2SUsMap) {
- for (auto &I : Val2SUsMap)
- addChainDependencies(SU, I.second,
- Val2SUsMap.getTrueMemOrderLatency());
-}
-
-void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
- Value2SUsMap &Val2SUsMap,
- ValueType V) {
- Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
- if (Itr != Val2SUsMap.end())
- addChainDependencies(SU, Itr->second,
- Val2SUsMap.getTrueMemOrderLatency());
-}
-
-void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
-
- for (auto &I : map) {
- SUList &sus = I.second;
- for (auto *SU : sus)
- SU->addPredBarrier(BarrierChain);
- }
- map.clear();
-}
-
-void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
-
- // Go through all lists of SUs.
- for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
- Value2SUsMap::iterator CurrItr = I++;
- SUList &sus = CurrItr->second;
- SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
- for (; SUItr != SUEE; ++SUItr) {
- // Stop on BarrierChain or any instruction above it.
- if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
- break;
-
- (*SUItr)->addPredBarrier(BarrierChain);
- }
-
- // Remove also the BarrierChain from list if present.
- if (*SUItr == BarrierChain)
- SUItr++;
-
- // Remove all SUs that are now successors of BarrierChain.
- if (SUItr != sus.begin())
- sus.erase(sus.begin(), SUItr);
- }
-
- // Remove all entries with empty su lists.
- map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
- return (mapEntry.second.empty()); });
-
- // Recompute the size of the map (NumNodes).
- map.reComputeSize();
-}
-
/// If RegPressure is non-null, compute register pressure as a side effect. The
/// DAG builder is an efficient place to do it because it already visits
/// operands.
@@ -880,9 +843,7 @@ void ScheduleDAGInstrs::buildSchedGraph(
const TargetSubtargetInfo &ST = MF.getSubtarget();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
- AAForDep = UseAA ? AA : nullptr;
-
- BarrierChain = nullptr;
+ AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
@@ -894,30 +855,19 @@ void ScheduleDAGInstrs::buildSchedGraph(
if (PDiffs)
PDiffs->init(SUnits.size());
- // We build scheduling units by walking a block's instruction list
- // from bottom to top.
+ // We build scheduling units by walking a block's instruction list from bottom
+ // to top.
+
+ // Remember where a generic side-effecting instruction is as we proceed.
+ SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
- // Each MIs' memory operand(s) is analyzed to a list of underlying
- // objects. The SU is then inserted in the SUList(s) mapped from
- // that Value(s). Each Value thus gets mapped to a list of SUs
- // depending on it, defs and uses kept separately. Two SUs are
- // non-aliasing to each other if they depend on different Values
- // exclusively.
- Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
-
- // Certain memory accesses are known to not alias any SU in Stores
- // or Loads, and have therefore their own 'NonAlias'
- // domain. E.g. spill / reload instructions never alias LLVM I/R
- // Values. It is assumed that this type of memory accesses always
- // have a proper memory operand modelling, and are therefore never
- // unanalyzable. This means they are non aliasing against all nodes
- // in Stores and Loads, including the unanalyzable ones.
- Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
-
- // Always reduce a huge region with half of the elements, except
- // when user sets this number explicitly.
- if (ReductionSize.getNumOccurrences() == 0)
- ReductionSize = (HugeRegion / 2);
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
+ MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ std::set<SUnit*> RejectMemNodes;
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
@@ -1012,123 +962,221 @@ void ScheduleDAGInstrs::buildSchedGraph(
ExitSU.addPred(Dep);
}
- // Add memory dependencies (Note: isStoreToStackSlot and
- // isLoadFromStackSLot are not usable after stack slots are lowered to
- // actual addresses).
-
- // This is a barrier event that acts as a pivotal node in the DAG.
+ // Add chain dependencies.
+ // Chain dependencies used to enforce memory order should have
+ // latency of 0 (except for true dependency of Store followed by
+ // aliased Load... we estimate that with a single cycle of latency
+ // assuming the hardware will bypass)
+ // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+ // after stack slots are lowered to actual addresses.
+ // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+ // produce more precise dependence information.
+ unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
if (isGlobalMemoryObject(AA, MI)) {
-
- // Become the barrier chain.
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+ I->second[i]->addPred(SDep(SU, SDep::Barrier));
+ }
+ }
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+ SDep Dep(SU, SDep::Barrier);
+ Dep.setLatency(TrueMemOrderLatency);
+ I->second[i]->addPred(Dep);
+ }
+ }
+ // Add SU to the barrier chain.
if (BarrierChain)
- BarrierChain->addPredBarrier(SU);
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
BarrierChain = SU;
+ // This is a barrier event that acts as a pivotal node in the DAG,
+ // so it is safe to clear list of exposed nodes.
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ RejectMemNodes.clear();
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references through SU.
+ if (AliasChain) {
+ unsigned ChainLatency = 0;
+ if (AliasChain->getInstr()->mayLoad())
+ ChainLatency = TrueMemOrderLatency;
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
+ RejectMemNodes, ChainLatency);
+ }
+ AliasChain = SU;
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ I->second[i], RejectMemNodes);
+ }
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, TrueMemOrderLatency);
+ }
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+ // adds to.
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
+ } else if (MI->mayStore()) {
+ // Add dependence on barrier chain, if needed.
+ // There is no point to check aliasing on barrier event. Even if
+ // SU and barrier _could_ be reordered, they should not. In addition,
+ // we have lost all RejectMemNodes below barrier.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
- DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
- << BarrierChain->NodeNum << ").\n";);
-
- // Add dependencies against everything below it and clear maps.
- addBarrierChain(Stores);
- addBarrierChain(Loads);
- addBarrierChain(NonAliasStores);
- addBarrierChain(NonAliasLoads);
-
- continue;
- }
-
- // If it's not a store or a variant load, we're done.
- if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA)))
- continue;
-
- // Always add dependecy edge to BarrierChain if present.
- if (BarrierChain)
- BarrierChain->addPredBarrier(SU);
-
- // Find the underlying objects for MI. The Objs vector is either
- // empty, or filled with the Values of memory locations which this
- // SU depends on. An empty vector means the memory location is
- // unknown, and may alias anything except NonAlias nodes.
- UnderlyingObjectsVector Objs;
- getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
+ UnderlyingObjectsVector Objs;
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
- if (MI->mayStore()) {
if (Objs.empty()) {
- // An unknown store depends on all stores and loads.
- addChainDependencies(SU, Stores);
- addChainDependencies(SU, NonAliasStores);
- addChainDependencies(SU, Loads);
- addChainDependencies(SU, NonAliasLoads);
-
- // If we're not using AA, clear Stores map since all stores
- // will be chained.
- if (!AAForDep)
- Stores.clear();
-
- // Map this store to 'UnknownValue'.
- Stores.insert(SU, UnknownValue);
- continue;
+ // Treat all other stores conservatively.
+ goto new_alias_chain;
}
- // Add precise dependencies against all previously seen memory
- // accesses mapped to the same Value(s).
- for (auto &underlObj : Objs) {
- ValueType V = underlObj.getPointer();
- bool ThisMayAlias = underlObj.getInt();
-
- Value2SUsMap &stores_ = (ThisMayAlias ? Stores : NonAliasStores);
-
- // Add dependencies to previous stores and loads mapped to V.
- addChainDependencies(SU, stores_, V);
- addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
-
- // If we're not using AA, then we only need one store per object.
- if (!AAForDep)
- stores_.clearList(V);
-
- // Map this store to V.
- stores_.insert(SU, V);
+ bool MayAlias = false;
+ for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
+ K != KE; ++K) {
+ ValueType V = K->getPointer();
+ bool ThisMayAlias = K->getInt();
+ if (ThisMayAlias)
+ MayAlias = true;
+
+ // A store to a specific PseudoSourceValue. Add precise dependencies.
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
+ ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, 0, true);
+
+ // If we're not using AA, then we only need one store per object.
+ if (!AAForDep)
+ I->second.clear();
+ I->second.push_back(SU);
+ } else {
+ if (ThisMayAlias) {
+ if (!AAForDep)
+ AliasMemDefs[V].clear();
+ AliasMemDefs[V].push_back(SU);
+ } else {
+ if (!AAForDep)
+ NonAliasMemDefs[V].clear();
+ NonAliasMemDefs[V].push_back(SU);
+ }
+ }
+ // Handle the uses in MemUses, if there are any.
+ MapVector<ValueType, std::vector<SUnit *> >::iterator J =
+ ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
+ ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
+ for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ J->second[i], RejectMemNodes,
+ TrueMemOrderLatency, true);
+ J->second.clear();
+ }
}
- // The store may have dependencies to unanalyzable loads and
- // stores.
- addChainDependencies(SU, Loads, UnknownValue);
- addChainDependencies(SU, Stores, UnknownValue);
- }
- else { // SU is a load.
- if (Objs.empty()) {
- // An unknown load depends on all stores.
- addChainDependencies(SU, Stores);
- addChainDependencies(SU, NonAliasStores);
-
- Loads.insert(SU, UnknownValue);
- continue;
+ if (MayAlias) {
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
+ for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ PendingLoads[k], RejectMemNodes,
+ TrueMemOrderLatency);
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
+ RejectMemNodes);
}
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+ // adds to.
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+ TrueMemOrderLatency);
+ } else if (MI->mayLoad()) {
+ bool MayAlias = true;
+ if (MI->isInvariantLoad(AA)) {
+ // Invariant load, no chain dependencies needed!
+ } else {
+ UnderlyingObjectsVector Objs;
+ getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
+
+ if (Objs.empty()) {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ I->second[i], RejectMemNodes);
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
+ } else {
+ MayAlias = false;
+ }
- for (auto &underlObj : Objs) {
- ValueType V = underlObj.getPointer();
- bool ThisMayAlias = underlObj.getInt();
-
- // Add precise dependencies against all previously seen stores
- // mapping to the same Value(s).
- addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
-
- // Map this load to V.
- (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
+ for (UnderlyingObjectsVector::iterator
+ J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+ ValueType V = J->getPointer();
+ bool ThisMayAlias = J->getInt();
+
+ if (ThisMayAlias)
+ MayAlias = true;
+
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+ ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
+ ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+ I->second[i], RejectMemNodes, 0, true);
+ if (ThisMayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ }
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
+ RejectMemNodes);
+ if (MayAlias)
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency()
+ // possibly adds to.
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
+ RejectMemNodes, /*Latency=*/0);
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
- // The load may have dependencies to unanalyzable stores.
- addChainDependencies(SU, Stores, UnknownValue);
- }
-
- // Reduce maps if they grow huge.
- if (Stores.size() + Loads.size() >= HugeRegion) {
- DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
- reduceHugeMemNodeMaps(Stores, Loads, ReductionSize);
- }
- if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
- DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
- reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, ReductionSize);
}
}
-
if (DbgMI)
FirstDbgValue = DbgMI;
@@ -1136,84 +1184,7 @@ void ScheduleDAGInstrs::buildSchedGraph(
Uses.clear();
CurrentVRegDefs.clear();
CurrentVRegUses.clear();
-}
-
-raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
- PSV->printCustom(OS);
- return OS;
-}
-
-void ScheduleDAGInstrs::Value2SUsMap::dump() {
- for (auto &Itr : *this) {
- if (Itr.first.is<const Value*>()) {
- const Value *V = Itr.first.get<const Value*>();
- if (isa<UndefValue>(V))
- dbgs() << "Unknown";
- else
- V->printAsOperand(dbgs());
- }
- else if (Itr.first.is<const PseudoSourceValue*>())
- dbgs() << Itr.first.get<const PseudoSourceValue*>();
- else
- llvm_unreachable("Unknown Value type.");
-
- dbgs() << " : ";
- dumpSUList(Itr.second);
- }
-}
-
-/// Reduce maps in FIFO order, by N SUs. This is better than turning
-/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
-/// it avoids unnecessary edges between seen SUs above the new
-/// BarrierChain, and those below it.
-void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
- Value2SUsMap &loads, unsigned N) {
- DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
- stores.dump();
- dbgs() << "Loading SUnits:\n";
- loads.dump());
-
- // Insert all SU's NodeNums into a vector and sort it.
- std::vector<unsigned> NodeNums;
- NodeNums.reserve(stores.size() + loads.size());
- for (auto &I : stores)
- for (auto *SU : I.second)
- NodeNums.push_back(SU->NodeNum);
- for (auto &I : loads)
- for (auto *SU : I.second)
- NodeNums.push_back(SU->NodeNum);
- std::sort(NodeNums.begin(), NodeNums.end());
-
- // The N last elements in NodeNums will be removed, and the SU with
- // the lowest NodeNum of them will become the new BarrierChain to
- // let the not yet seen SUs have a dependency to the removed SUs.
- assert (N <= NodeNums.size());
- SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
- if (BarrierChain) {
- // The aliasing and non-aliasing maps reduce independently of each
- // other, but share a common BarrierChain. Check if the
- // newBarrierChain is above the former one. If it is not, it may
- // introduce a loop to use newBarrierChain, so keep the old one.
- if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
- BarrierChain->addPredBarrier(newBarrierChain);
- BarrierChain = newBarrierChain;
- DEBUG(dbgs() << "Inserting new barrier chain: SU("
- << BarrierChain->NodeNum << ").\n";);
- }
- else
- DEBUG(dbgs() << "Keeping old barrier chain: SU("
- << BarrierChain->NodeNum << ").\n";);
- }
- else
- BarrierChain = newBarrierChain;
-
- insertBarrierChain(stores);
- insertBarrierChain(loads);
-
- DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
- stores.dump();
- dbgs() << "Loading SUnits:\n";
- loads.dump());
+ PendingLoads.clear();
}
/// \brief Initialize register live-range state for updating kills.
More information about the llvm-commits
mailing list