[llvm] r259213 - Temporarily revert "[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten."

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 29 09:22:43 PST 2016


Author: jonpa
Date: Fri Jan 29 11:22:43 2016
New Revision: 259213

URL: http://llvm.org/viewvc/llvm-project?rev=259213&view=rev
Log:
Temporarily revert "[ScheduleDAGInstrs::buildSchedGraph()] Handling of memory dependecies rewritten."

Some buildbot failures needs to be debugged.

Modified:
    llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h
    llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
    llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
    llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp

Modified: llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h (original)
+++ llvm/trunk/include/llvm/CodeGen/PseudoSourceValue.h Fri Jan 29 11:22:43 2016
@@ -27,8 +27,6 @@ class MachineMemOperand;
 class raw_ostream;
 
 raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
-class PseudoSourceValue;
-raw_ostream &operator<<(raw_ostream &OS, const PseudoSourceValue* PSV);
 
 /// Special value supplied for machine level alias analysis. It indicates that
 /// a memory access references the functions stack frame (e.g., a spill slot),
@@ -47,8 +45,6 @@ public:
 
 private:
   PSVKind Kind;
-  friend raw_ostream &llvm::operator<<(raw_ostream &OS,
-                                       const PseudoSourceValue* PSV);
 
   friend class MachineMemOperand; // For printCustom().
 

Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Fri Jan 29 11:22:43 2016
@@ -396,17 +396,6 @@ namespace llvm {
     /// specified node.
     bool addPred(const SDep &D, bool Required = true);
 
-    /// addPredBarrier - This adds a barrier edge to SU by calling
-    /// addPred(), with latency 0 generally or latency 1 for a store
-    /// followed by a load.
-    bool addPredBarrier(SUnit *SU) {
-      SDep Dep(SU, SDep::Barrier);
-      unsigned TrueMemOrderLatency =
-        ((SU->getInstr()->mayStore() && this->getInstr()->mayLoad()) ? 1 : 0);
-      Dep.setLatency(TrueMemOrderLatency);
-      return addPred(Dep);
-    }
-
     /// removePred - This removes the specified edge as a pred of the current
     /// node if it exists.  It also removes the current node as a successor of
     /// the specified node.

Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAGInstrs.h Fri Jan 29 11:22:43 2016
@@ -15,14 +15,12 @@
 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 
-#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SparseMultiSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include <list>
 
 namespace llvm {
   class MachineFrameInfo;
@@ -86,10 +84,6 @@ namespace llvm {
   typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
     VReg2SUnitOperIdxMultiMap;
 
-  typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
-  typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
-          UnderlyingObjectsVector;
-
   /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
   /// MachineInstrs.
   class ScheduleDAGInstrs : public ScheduleDAG {
@@ -155,66 +149,10 @@ namespace llvm {
     /// Tracks the last instructions in this region using each virtual register.
     VReg2SUnitOperIdxMultiMap CurrentVRegUses;
 
-    AliasAnalysis *AAForDep;
-
-    /// Remember a generic side-effecting instruction as we proceed.
-    /// No other SU ever gets scheduled around it (except in the special
-    /// case of a huge region that gets reduced).
-    SUnit *BarrierChain;
-
-  public:
-
-    /// A list of SUnits, used in Value2SUsMap, during DAG construction.
-    /// Note: to gain speed it might be worth investigating an optimized
-    /// implementation of this data structure, such as a singly linked list
-    /// with a memory pool (SmallVector was tried but slow and SparseSet is not
-    /// applicable).
-    typedef std::list<SUnit *> SUList;
-  protected:
-    /// A map from ValueType to SUList, used during DAG construction,
-    /// as a means of remembering which SUs depend on which memory
-    /// locations.
-    class Value2SUsMap;
-
-    /// Remove in FIFO order some SUs from huge maps.
-    void reduceHugeMemNodeMaps(Value2SUsMap &stores,
-                               Value2SUsMap &loads, unsigned N);
-
-    /// Add a chain edge between SUa and SUb, but only if both AliasAnalysis
-    /// and Target fail to deny the dependency.
-    void addChainDependency(SUnit *SUa, SUnit *SUb,
-                            unsigned Latency = 0);
-
-    /// Add dependencies as needed from all SUs in list to SU.
-    void addChainDependencies(SUnit *SU, SUList &sus, unsigned Latency) {
-      for (auto *su : sus)
-        addChainDependency(SU, su, Latency);
-    }
-
-    /// Add dependencies as needed from all SUs in map, to SU.
-    void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap);
-
-    /// Add dependencies as needed to SU, from all SUs mapped to V.
-    void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap,
-                              ValueType V);
-
-    /// Add barrier chain edges from all SUs in map, and then clear
-    /// the map. This is equivalent to insertBarrierChain(), but
-    /// optimized for the common case where the new BarrierChain (a
-    /// global memory object) has a higher NodeNum than all SUs in
-    /// map. It is assumed BarrierChain has been set before calling
-    /// this.
-    void addBarrierChain(Value2SUsMap &map);
-
-    /// Insert a barrier chain in a huge region, far below current
-    /// SU. Add barrier chain edges from all SUs in map with higher
-    /// NodeNums than this new BarrierChain, and remove them from
-    /// map. It is assumed BarrierChain has been set before calling
-    /// this.
-    void insertBarrierChain(Value2SUsMap &map);
-
-    /// For an unanalyzable memory access, this Value is used in maps.
-    UndefValue *UnknownValue;
+    /// PendingLoads - Remember where unknown loads are after the most recent
+    /// unknown store, as we iterate. As with Defs and Uses, this is here
+    /// to minimize construction/destruction.
+    std::vector<SUnit *> PendingLoads;
 
     /// DbgValues - Remember instruction that precedes DBG_VALUE.
     /// These are generated by buildSchedGraph but persist so they can be

Modified: llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp?rev=259213&r1=259212&r2=259213&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGInstrs.cpp Fri Jan 29 11:22:43 2016
@@ -14,6 +14,7 @@
 
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -27,8 +28,6 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -51,42 +50,12 @@ static cl::opt<bool> EnableAASchedMI("en
 static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
     cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
 
-// Note: the two options below might be used in tuning compile time vs
-// output quality. Setting HugeRegion so large that it will never be
-// reached means best-effort, but may be slow.
-
-// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
-// together hold this many SUs, a reduction of maps will be done.
-static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
-    cl::init(1000), cl::desc("The limit to use while constructing the DAG "
-                             "prior to scheduling, at which point a trade-off "
-                             "is made to avoid excessive compile time."));
-
-static cl::opt<unsigned> ReductionSize("dag-maps-reduction-size", cl::Hidden,
-    cl::desc("A huge scheduling region will have maps reduced by this many "
-	     "nodes at a time. Defaults to HugeRegion / 2."));
-
-static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  dbgs() << "{ ";
-  for (auto *su : L) {
-    dbgs() << "SU(" << su->NodeNum << ")";
-    if (su != L.back())
-      dbgs() << ", ";
-  }
-  dbgs() << "}\n";
-#endif
-}
-
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo *mli,
                                      bool RemoveKillFlags)
     : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
       RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
-      TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
-      UnknownValue(UndefValue::get(
-                     Type::getVoidTy(mf.getFunction()->getContext()))),
-      FirstDbgValue(nullptr) {
+      TrackLaneMasks(false), FirstDbgValue(nullptr) {
   DbgValues.clear();
 
   const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -152,6 +121,10 @@ static void getUnderlyingObjects(const V
   } while (!Working.empty());
 }
 
+typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
+typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
+UnderlyingObjectsVector;
+
 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference
 /// information and it can be tracked to a normal reference to a known
 /// object, return the Value for that object.
@@ -571,31 +544,41 @@ static inline bool isUnsafeMemoryObject(
     return true;
   }
 
-  if ((*MI->memoperands_begin())->getValue() == nullptr)
+  const Value *V = (*MI->memoperands_begin())->getValue();
+  if (!V)
     return true;
 
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs, DL);
+  for (Value *V : Objs) {
+    // Does this pointer refer to a distinct and identifiable object?
+    if (!isIdentifiedObject(V))
+      return true;
+  }
+
   return false;
 }
 
 /// This returns true if the two MIs need a chain edge between them.
-/// This is called on normal stores and loads.
+/// If these are not even memory operations, we still may need
+/// chain deps between them. The question really is - could
+/// these two MIs be reordered during scheduling from memory dependency
+/// point of view.
 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
                              const DataLayout &DL, MachineInstr *MIa,
                              MachineInstr *MIb) {
   const MachineFunction *MF = MIa->getParent()->getParent();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
 
-  assert ((MIa->mayStore() || MIb->mayStore()) &&
-          "Dependency checked between two loads");
-
-  // buildSchedGraph() will clear list of stores if not using AA,
-  // which means all stores have to be chained without AA.
-  if (!AA && MIa->mayStore() && MIb->mayStore())
-    return true;
-
-  // Let the target decide if memory accesses cannot possibly overlap.
-  if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
+  // Cover a trivial case - no edge is need to itself.
+  if (MIa == MIb)
     return false;
+ 
+  // Let the target decide if memory accesses cannot possibly overlap.
+  if ((MIa->mayLoad() || MIa->mayStore()) &&
+      (MIb->mayLoad() || MIb->mayStore()))
+    if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA))
+      return false;
 
   // FIXME: Need to handle multiple memory operands to support all targets.
   if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand())
@@ -604,6 +587,11 @@ static bool MIsNeedChainEdge(AliasAnalys
   if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL))
     return true;
 
+  // If we are dealing with two "normal" loads, we do not need an edge
+  // between them - they could be reordered.
+  if (!MIa->mayStore() && !MIb->mayStore())
+    return false;
+
   // To this point analysis is generic. From here on we do need AA.
   if (!AA)
     return true;
@@ -646,15 +634,106 @@ static bool MIsNeedChainEdge(AliasAnalys
   return (AAResult != NoAlias);
 }
 
-/// Check whether two objects need a chain edge and add it if needed.
-void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
-                                            unsigned Latency) {
-  if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
-		       SUb->getInstr())) {
-    SDep Dep(SUa, SDep::MayAliasMem);
-    Dep.setLatency(Latency);
+/// This recursive function iterates over chain deps of SUb looking for
+/// "latest" node that needs a chain edge to SUa.
+static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+                                 const DataLayout &DL, SUnit *SUa, SUnit *SUb,
+                                 SUnit *ExitSU, unsigned *Depth,
+                                 SmallPtrSetImpl<const SUnit *> &Visited) {
+  if (!SUa || !SUb || SUb == ExitSU)
+    return *Depth;
+
+  // Remember visited nodes.
+  if (!Visited.insert(SUb).second)
+      return *Depth;
+  // If there is _some_ dependency already in place, do not
+  // descend any further.
+  // TODO: Need to make sure that if that dependency got eliminated or ignored
+  // for any reason in the future, we would not violate DAG topology.
+  // Currently it does not happen, but makes an implicit assumption about
+  // future implementation.
+  //
+  // Independently, if we encounter node that is some sort of global
+  // object (like a call) we already have full set of dependencies to it
+  // and we can stop descending.
+  if (SUa->isSucc(SUb) ||
+      isGlobalMemoryObject(AA, SUb->getInstr()))
+    return *Depth;
+
+  // If we do need an edge, or we have exceeded depth budget,
+  // add that edge to the predecessors chain of SUb,
+  // and stop descending.
+  if (*Depth > 200 ||
+      MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
+    SUb->addPred(SDep(SUa, SDep::MayAliasMem));
+    return *Depth;
+  }
+  // Track current depth.
+  (*Depth)++;
+  // Iterate over memory dependencies only.
+  for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end();
+       I != E; ++I)
+    if (I->isNormalMemoryOrBarrier())
+      iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited);
+  return *Depth;
+}
+
+/// This function assumes that "downward" from SU there exist
+/// tail/leaf of already constructed DAG. It iterates downward and
+/// checks whether SU can be aliasing any node dominated
+/// by it.
+static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI,
+                            const DataLayout &DL, SUnit *SU, SUnit *ExitSU,
+                            std::set<SUnit *> &CheckList,
+                            unsigned LatencyToLoad) {
+  if (!SU)
+    return;
+
+  SmallPtrSet<const SUnit*, 16> Visited;
+  unsigned Depth = 0;
+
+  for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end();
+       I != IE; ++I) {
+    if (SU == *I)
+      continue;
+    if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) {
+      SDep Dep(SU, SDep::MayAliasMem);
+      Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0);
+      (*I)->addPred(Dep);
+    }
+
+    // Iterate recursively over all previously added memory chain
+    // successors. Keep track of visited nodes.
+    for (SUnit::const_succ_iterator J = (*I)->Succs.begin(),
+         JE = (*I)->Succs.end(); J != JE; ++J)
+      if (J->isNormalMemoryOrBarrier())
+        iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth,
+                         Visited);
+  }
+}
+
+/// Check whether two objects need a chain edge, if so, add it
+/// otherwise remember the rejected SU.
+static inline void addChainDependency(AliasAnalysis *AA,
+                                      const MachineFrameInfo *MFI,
+                                      const DataLayout &DL, SUnit *SUa,
+                                      SUnit *SUb, std::set<SUnit *> &RejectList,
+                                      unsigned TrueMemOrderLatency = 0,
+                                      bool isNormalMemory = false) {
+  // If this is a false dependency,
+  // do not add the edge, but remember the rejected node.
+  if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
+    SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
+    Dep.setLatency(TrueMemOrderLatency);
     SUb->addPred(Dep);
   }
+  else {
+    // Duplicate entries should be ignored.
+    RejectList.insert(SUb);
+    DEBUG(dbgs() << "\tReject chain dep between SU("
+          << SUa->NodeNum << ") and SU("
+          << SUb->NodeNum << ")\n");
+  }
 }
 
 /// Create an SUnit for each real instruction, numbered in top-down topological
@@ -753,122 +832,6 @@ void ScheduleDAGInstrs::collectVRegUses(
   }
 }
 
-class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
-
-  /// Current total number of SUs in map.
-  unsigned NumNodes;
-
-  /// 1 for loads, 0 for stores. (see comment in SUList)
-  unsigned TrueMemOrderLatency;
-public:
-
-  Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
-
-  /// To keep NumNodes up to date, insert() is used instead of
-  /// this operator w/ push_back().
-  ValueType &operator[](const SUList &Key) {
-    llvm_unreachable("Don't use. Use insert() instead."); };
-
-  /// Add SU to the SUList of V. If Map grows huge, reduce its size
-  /// by calling reduce().
-  void inline insert(SUnit *SU, ValueType V) {
-    MapVector::operator[](V).push_back(SU);
-    NumNodes++;
-  }
-
-  /// Clears the list of SUs mapped to V.
-  void inline clearList(ValueType V) {
-    iterator Itr = find(V);
-    if (Itr != end()) {
-      assert (NumNodes >= Itr->second.size());
-      NumNodes -= Itr->second.size();
-
-      Itr->second.clear();
-    }
-  }
-
-  /// Clears map from all contents.
-  void clear() {
-    MapVector<ValueType, SUList>::clear();
-    NumNodes = 0;
-  }
-
-  unsigned inline size() const { return NumNodes; }
-
-  /// Count the number of SUs in this map after a reduction.
-  void reComputeSize(void) {
-    NumNodes = 0;
-    for (auto &I : *this)
-      NumNodes += I.second.size();
-  }
-
-  unsigned inline getTrueMemOrderLatency() const {
-    return TrueMemOrderLatency;
-  }
-
-  void dump();
-};
-
-void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
-                                             Value2SUsMap &Val2SUsMap) {
-  for (auto &I : Val2SUsMap)
-    addChainDependencies(SU, I.second,
-                         Val2SUsMap.getTrueMemOrderLatency());
-}
-
-void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
-                                             Value2SUsMap &Val2SUsMap,
-                                             ValueType V) {
-  Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
-  if (Itr != Val2SUsMap.end())
-    addChainDependencies(SU, Itr->second,
-                         Val2SUsMap.getTrueMemOrderLatency());
-}
-
-void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
-  assert (BarrierChain != nullptr);
-
-  for (auto &I : map) {
-    SUList &sus = I.second;
-    for (auto *SU : sus)
-      SU->addPredBarrier(BarrierChain);
-  }
-  map.clear();
-}
-
-void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
-  assert (BarrierChain != nullptr);
-
-  // Go through all lists of SUs.
-  for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
-    Value2SUsMap::iterator CurrItr = I++;
-    SUList &sus = CurrItr->second;
-    SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
-    for (; SUItr != SUEE; ++SUItr) {
-      // Stop on BarrierChain or any instruction above it.
-      if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
-        break;
-
-      (*SUItr)->addPredBarrier(BarrierChain);
-    }
-
-    // Remove also the BarrierChain from list if present.
-    if (*SUItr == BarrierChain)
-      SUItr++;
-
-    // Remove all SUs that are now successors of BarrierChain.
-    if (SUItr != sus.begin())
-      sus.erase(sus.begin(), SUItr);
-  }
-
-  // Remove all entries with empty su lists.
-  map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
-      return (mapEntry.second.empty()); });
-
-  // Recompute the size of the map (NumNodes).
-  map.reComputeSize();
-}
-
 /// If RegPressure is non-null, compute register pressure as a side effect. The
 /// DAG builder is an efficient place to do it because it already visits
 /// operands.
@@ -880,9 +843,7 @@ void ScheduleDAGInstrs::buildSchedGraph(
   const TargetSubtargetInfo &ST = MF.getSubtarget();
   bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
                                                        : ST.useAA();
-  AAForDep = UseAA ? AA : nullptr;
-
-  BarrierChain = nullptr;
+  AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
 
   this->TrackLaneMasks = TrackLaneMasks;
   MISUnitMap.clear();
@@ -894,30 +855,19 @@ void ScheduleDAGInstrs::buildSchedGraph(
   if (PDiffs)
     PDiffs->init(SUnits.size());
 
-  // We build scheduling units by walking a block's instruction list
-  // from bottom to top.
+  // We build scheduling units by walking a block's instruction list from bottom
+  // to top.
+
+  // Remember where a generic side-effecting instruction is as we proceed.
+  SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
 
-  // Each MIs' memory operand(s) is analyzed to a list of underlying
-  // objects. The SU is then inserted in the SUList(s) mapped from
-  // that Value(s). Each Value thus gets mapped to a list of SUs
-  // depending on it, defs and uses kept separately. Two SUs are
-  // non-aliasing to each other if they depend on different Values
-  // exclusively.
-  Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
-
-  // Certain memory accesses are known to not alias any SU in Stores
-  // or Loads, and have therefore their own 'NonAlias'
-  // domain. E.g. spill / reload instructions never alias LLVM I/R
-  // Values. It is assumed that this type of memory accesses always
-  // have a proper memory operand modelling, and are therefore never
-  // unanalyzable. This means they are non aliasing against all nodes
-  // in Stores and Loads, including the unanalyzable ones.
-  Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
-
-  // Always reduce a huge region with half of the elements, except
-  // when user sets this number explicitly.
-  if (ReductionSize.getNumOccurrences() == 0)
-    ReductionSize = (HugeRegion / 2);
+  // Memory references to specific known memory locations are tracked
+  // so that they can be given more precise dependencies. We track
+  // separately the known memory locations that may alias and those
+  // that are known not to alias
+  MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
+  MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+  std::set<SUnit*> RejectMemNodes;
 
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
   // without emitting the info from the previous call.
@@ -1012,123 +962,221 @@ void ScheduleDAGInstrs::buildSchedGraph(
       ExitSU.addPred(Dep);
     }
 
-    // Add memory dependencies (Note: isStoreToStackSlot and
-    // isLoadFromStackSLot are not usable after stack slots are lowered to
-    // actual addresses).
-
-    // This is a barrier event that acts as a pivotal node in the DAG.
+    // Add chain dependencies.
+    // Chain dependencies used to enforce memory order should have
+    // latency of 0 (except for true dependency of Store followed by
+    // aliased Load... we estimate that with a single cycle of latency
+    // assuming the hardware will bypass)
+    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+    // after stack slots are lowered to actual addresses.
+    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+    // produce more precise dependence information.
+    unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0;
     if (isGlobalMemoryObject(AA, MI)) {
-
-      // Become the barrier chain.
+      // Be conservative with these and add dependencies on all memory
+      // references, even those that are known to not alias.
+      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+          I->second[i]->addPred(SDep(SU, SDep::Barrier));
+        }
+      }
+      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
+          SDep Dep(SU, SDep::Barrier);
+          Dep.setLatency(TrueMemOrderLatency);
+          I->second[i]->addPred(Dep);
+        }
+      }
+      // Add SU to the barrier chain.
       if (BarrierChain)
-        BarrierChain->addPredBarrier(SU);
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
       BarrierChain = SU;
+      // This is a barrier event that acts as a pivotal node in the DAG,
+      // so it is safe to clear list of exposed nodes.
+      adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
+      RejectMemNodes.clear();
+      NonAliasMemDefs.clear();
+      NonAliasMemUses.clear();
+
+      // fall-through
+    new_alias_chain:
+      // Chain all possibly aliasing memory references through SU.
+      if (AliasChain) {
+        unsigned ChainLatency = 0;
+        if (AliasChain->getInstr()->mayLoad())
+          ChainLatency = TrueMemOrderLatency;
+        addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
+                           RejectMemNodes, ChainLatency);
+      }
+      AliasChain = SU;
+      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+        addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                           PendingLoads[k], RejectMemNodes,
+                           TrueMemOrderLatency);
+      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+           AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                             I->second[i], RejectMemNodes);
+      }
+      for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                             I->second[i], RejectMemNodes, TrueMemOrderLatency);
+      }
+      // This call must come after calls to addChainDependency() since it
+      // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+      // adds to.
+      adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
+      PendingLoads.clear();
+      AliasMemDefs.clear();
+      AliasMemUses.clear();
+    } else if (MI->mayStore()) {
+      // Add dependence on barrier chain, if needed.
+      // There is no point to check aliasing on barrier event. Even if
+      // SU and barrier _could_ be reordered, they should not. In addition,
+      // we have lost all RejectMemNodes below barrier.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
-      DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
-            << BarrierChain->NodeNum << ").\n";);
-
-      // Add dependencies against everything below it and clear maps.
-      addBarrierChain(Stores);
-      addBarrierChain(Loads);
-      addBarrierChain(NonAliasStores);
-      addBarrierChain(NonAliasLoads);
-
-      continue;
-    }
-
-    // If it's not a store or a variant load, we're done.
-    if (!MI->mayStore() && !(MI->mayLoad() && !MI->isInvariantLoad(AA)))
-      continue;
-
-    // Always add dependecy edge to BarrierChain if present.
-    if (BarrierChain)
-      BarrierChain->addPredBarrier(SU);
-
-    // Find the underlying objects for MI. The Objs vector is either
-    // empty, or filled with the Values of memory locations which this
-    // SU depends on. An empty vector means the memory location is
-    // unknown, and may alias anything except NonAlias nodes.
-    UnderlyingObjectsVector Objs;
-    getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
+      UnderlyingObjectsVector Objs;
+      getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
 
-    if (MI->mayStore()) {
       if (Objs.empty()) {
-        // An unknown store depends on all stores and loads.
-        addChainDependencies(SU, Stores);
-        addChainDependencies(SU, NonAliasStores);
-        addChainDependencies(SU, Loads);
-        addChainDependencies(SU, NonAliasLoads);
-
-        // If we're not using AA, clear Stores map since all stores
-        // will be chained.
-        if (!AAForDep)
-          Stores.clear();
-
-	// Map this store to 'UnknownValue'.
-	Stores.insert(SU, UnknownValue);
-        continue;
+        // Treat all other stores conservatively.
+        goto new_alias_chain;
       }
 
-      // Add precise dependencies against all previously seen memory
-      // accesses mapped to the same Value(s).
-      for (auto &underlObj : Objs) {
-        ValueType V = underlObj.getPointer();
-        bool ThisMayAlias = underlObj.getInt();
-
-        Value2SUsMap &stores_ = (ThisMayAlias ? Stores : NonAliasStores);
-
-        // Add dependencies to previous stores and loads mapped to V.
-        addChainDependencies(SU, stores_, V);
-        addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
-
-        // If we're not using AA, then we only need one store per object.
-        if (!AAForDep)
-          stores_.clearList(V);
-
-        // Map this store to V.
-        stores_.insert(SU, V);
+      bool MayAlias = false;
+      for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
+           K != KE; ++K) {
+        ValueType V = K->getPointer();
+        bool ThisMayAlias = K->getInt();
+        if (ThisMayAlias)
+          MayAlias = true;
+
+        // A store to a specific PseudoSourceValue. Add precise dependencies.
+        // Record the def in MemDefs, first adding a dep if there is
+        // an existing def.
+        MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+          ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+        MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
+          ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+        if (I != IE) {
+          for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+            addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                               I->second[i], RejectMemNodes, 0, true);
+
+          // If we're not using AA, then we only need one store per object.
+          if (!AAForDep)
+            I->second.clear();
+          I->second.push_back(SU);
+        } else {
+          if (ThisMayAlias) {
+            if (!AAForDep)
+              AliasMemDefs[V].clear();
+            AliasMemDefs[V].push_back(SU);
+          } else {
+            if (!AAForDep)
+              NonAliasMemDefs[V].clear();
+            NonAliasMemDefs[V].push_back(SU);
+          }
+        }
+        // Handle the uses in MemUses, if there are any.
+        MapVector<ValueType, std::vector<SUnit *> >::iterator J =
+          ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+        MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
+          ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+        if (J != JE) {
+          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+            addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                               J->second[i], RejectMemNodes,
+                               TrueMemOrderLatency, true);
+          J->second.clear();
+        }
       }
-      // The store may have dependencies to unanalyzable loads and
-      // stores.
-      addChainDependencies(SU, Loads, UnknownValue);
-      addChainDependencies(SU, Stores, UnknownValue);
-    }
-    else { // SU is a load.
-      if (Objs.empty()) {
-        // An unknown load depends on all stores.
-        addChainDependencies(SU, Stores);
-        addChainDependencies(SU, NonAliasStores);
-
-        Loads.insert(SU, UnknownValue);
-        continue;
+      if (MayAlias) {
+        // Add dependencies from all the PendingLoads, i.e. loads
+        // with no underlying object.
+        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                             PendingLoads[k], RejectMemNodes,
+                             TrueMemOrderLatency);
+        // Add dependence on alias chain, if needed.
+        if (AliasChain)
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
+                             RejectMemNodes);
       }
+      // This call must come after calls to addChainDependency() since it
+      // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+      // adds to.
+      adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+                      TrueMemOrderLatency);
+    } else if (MI->mayLoad()) {
+      bool MayAlias = true;
+      if (MI->isInvariantLoad(AA)) {
+        // Invariant load, no chain dependencies needed!
+      } else {
+        UnderlyingObjectsVector Objs;
+        getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
+
+        if (Objs.empty()) {
+          // A load with no underlying object. Depend on all
+          // potentially aliasing stores.
+          for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+            for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+              addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                                 I->second[i], RejectMemNodes);
+
+          PendingLoads.push_back(SU);
+          MayAlias = true;
+        } else {
+          MayAlias = false;
+        }
 
-      for (auto &underlObj : Objs) {
-        ValueType V = underlObj.getPointer();
-        bool ThisMayAlias = underlObj.getInt();
-
-        // Add precise dependencies against all previously seen stores
-        // mapping to the same Value(s).
-        addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
-
-        // Map this load to V.
-        (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
+        for (UnderlyingObjectsVector::iterator
+             J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+          ValueType V = J->getPointer();
+          bool ThisMayAlias = J->getInt();
+
+          if (ThisMayAlias)
+            MayAlias = true;
+
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          MapVector<ValueType, std::vector<SUnit *> >::iterator I =
+            ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
+            ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+              addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
+                                 I->second[i], RejectMemNodes, 0, true);
+          if (ThisMayAlias)
+            AliasMemUses[V].push_back(SU);
+          else
+            NonAliasMemUses[V].push_back(SU);
+        }
+        // Add dependencies on alias and barrier chains, if needed.
+        if (MayAlias && AliasChain)
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
+                             RejectMemNodes);
+        if (MayAlias)
+          // This call must come after calls to addChainDependency() since it
+          // consumes the 'RejectMemNodes' list that addChainDependency()
+          // possibly adds to.
+          adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
+                          RejectMemNodes, /*Latency=*/0);
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Barrier));
       }
-      // The load may have dependencies to unanalyzable stores.
-      addChainDependencies(SU, Stores, UnknownValue);
-    }
-
-    // Reduce maps if they grow huge.
-    if (Stores.size() + Loads.size() >= HugeRegion) {
-      DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
-      reduceHugeMemNodeMaps(Stores, Loads, ReductionSize);
-    }
-    if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
-      DEBUG(dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
-      reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, ReductionSize);
     }
   }
-
   if (DbgMI)
     FirstDbgValue = DbgMI;
 
@@ -1136,84 +1184,7 @@ void ScheduleDAGInstrs::buildSchedGraph(
   Uses.clear();
   CurrentVRegDefs.clear();
   CurrentVRegUses.clear();
-}
-
-raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
-  PSV->printCustom(OS);
-  return OS;
-}
-
-void ScheduleDAGInstrs::Value2SUsMap::dump() {
-  for (auto &Itr : *this) {
-    if (Itr.first.is<const Value*>()) {
-      const Value *V = Itr.first.get<const Value*>();
-      if (isa<UndefValue>(V))
-        dbgs() << "Unknown";
-      else
-        V->printAsOperand(dbgs());
-    }
-    else if (Itr.first.is<const PseudoSourceValue*>())
-      dbgs() <<  Itr.first.get<const PseudoSourceValue*>();
-    else
-      llvm_unreachable("Unknown Value type.");
-
-    dbgs() << " : ";
-    dumpSUList(Itr.second);
-  }
-}
-
-/// Reduce maps in FIFO order, by N SUs. This is better than turning
-/// every Nth memory SU into BarrierChain in buildSchedGraph(), since
-/// it avoids unnecessary edges between seen SUs above the new
-/// BarrierChain, and those below it.
-void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
-                                              Value2SUsMap &loads, unsigned N) {
-  DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n";
-        stores.dump();
-        dbgs() << "Loading SUnits:\n";
-        loads.dump());
-
-  // Insert all SU's NodeNums into a vector and sort it.
-  std::vector<unsigned> NodeNums;
-  NodeNums.reserve(stores.size() + loads.size());
-  for (auto &I : stores)
-    for (auto *SU : I.second)
-      NodeNums.push_back(SU->NodeNum);
-  for (auto &I : loads)
-    for (auto *SU : I.second)
-      NodeNums.push_back(SU->NodeNum);
-  std::sort(NodeNums.begin(), NodeNums.end());
-
-  // The N last elements in NodeNums will be removed, and the SU with
-  // the lowest NodeNum of them will become the new BarrierChain to
-  // let the not yet seen SUs have a dependency to the removed SUs.
-  assert (N <= NodeNums.size());
-  SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
-  if (BarrierChain) {
-    // The aliasing and non-aliasing maps reduce independently of each
-    // other, but share a common BarrierChain. Check if the
-    // newBarrierChain is above the former one. If it is not, it may
-    // introduce a loop to use newBarrierChain, so keep the old one.
-    if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
-      BarrierChain->addPredBarrier(newBarrierChain);
-      BarrierChain = newBarrierChain;
-      DEBUG(dbgs() << "Inserting new barrier chain: SU("
-            << BarrierChain->NodeNum << ").\n";);
-    }
-    else
-      DEBUG(dbgs() << "Keeping old barrier chain: SU("
-            << BarrierChain->NodeNum << ").\n";);
-  }
-  else
-    BarrierChain = newBarrierChain;
-
-  insertBarrierChain(stores);
-  insertBarrierChain(loads);
-
-  DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n";
-        stores.dump();
-        dbgs() << "Loading SUnits:\n";
-        loads.dump());
+  PendingLoads.clear();
 }
 
 /// \brief Initialize register live-range state for updating kills.




More information about the llvm-commits mailing list