[llvm] [MachinePipeliner] Add validation for missed dependencies (PR #135148)

via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 21 05:26:47 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-powerpc

Author: Ryotaro Kasuga (kasuga-fj)

<details>
<summary>Changes</summary>

This patch fixes the correctness issue for missed memory dependencies in MachinePipeliner. #<!-- -->121907 addresses the same problem, but it happened performance regressions that cannot be ignored. This patch attempts to fix that in a different way.

---

Patch is 99.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/135148.diff


22 Files Affected:

- (modified) llvm/include/llvm/CodeGen/MachinePipeliner.h (+107-36) 
- (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+633-207) 
- (modified) llvm/test/CodeGen/AArch64/sms-instruction-scheduled-at-correct-cycle.mir (+6-1) 
- (added) llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir (+107) 
- (added) llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions2.mir (+100) 
- (modified) llvm/test/CodeGen/Hexagon/swp-carried-dep1.mir (+14-9) 
- (modified) llvm/test/CodeGen/Hexagon/swp-carried-dep2.mir (+9-13) 
- (modified) llvm/test/CodeGen/Hexagon/swp-carried-dep3.mir (+7-1) 
- (modified) llvm/test/CodeGen/Hexagon/swp-epilog-phi13.ll (+3-3) 
- (modified) llvm/test/CodeGen/Hexagon/swp-epilog-phi9.ll (+4-4) 
- (added) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir (+110) 
- (added) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir (+104) 
- (added) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir (+108) 
- (added) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir (+107) 
- (added) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir (+106) 
- (added) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir (+153) 
- (modified) llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll (+1-1) 
- (added) llvm/test/CodeGen/Hexagon/swp-no-alias.mir (+88) 
- (modified) llvm/test/CodeGen/Hexagon/swp-phi-start.ll (+1-1) 
- (modified) llvm/test/CodeGen/Hexagon/swp-resmii-1.ll (+1-1) 
- (modified) llvm/test/CodeGen/PowerPC/sms-store-dependence.ll (+8-41) 
- (modified) llvm/test/CodeGen/Thumb2/pipeliner-preserve-ties.mir (-1) 


``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index fee6937e7d502..12ca9c8e62b1f 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -42,6 +42,7 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -120,14 +121,17 @@ class SwingSchedulerDDGEdge {
   SUnit *Dst = nullptr;
   SDep Pred;
   unsigned Distance = 0;
+  bool IsValidationOnly = false;
 
 public:
   /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
   /// \p Dep in the original DAG. This pair has no information about the
   /// direction of the edge, so we need to pass an additional argument \p
   /// IsSucc.
-  SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)
-      : Dst(PredOrSucc), Pred(Dep), Distance(0u) {
+  SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
+                        bool IsValidationOnly)
+      : Dst(PredOrSucc), Pred(Dep), Distance(0u),
+        IsValidationOnly(IsValidationOnly) {
     SUnit *Src = Dep.getSUnit();
 
     if (IsSucc) {
@@ -188,19 +192,92 @@ class SwingSchedulerDDGEdge {
   /// functions. We ignore the back-edge recurrence in order to avoid unbounded
   /// recursion in the calculation of the ASAP, ALAP, etc functions.
   bool ignoreDependence(bool IgnoreAnti) const;
+
+  /// Retruns true if this edge is assumed to be used only for validation of a
+  /// schedule. That is, this edge would not be considered when computing a
+  /// schedule.
+  bool isValidationOnly() const { return IsValidationOnly; }
+};
+
+/// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
+/// assume cycle dependencies as the name suggests, such dependencies must be
+/// handled separately. After DAG construction is finished, these dependencies
+/// are added to SwingSchedulerDDG.
+struct LoopCarriedEdges {
+  using OutputDep = SmallDenseMap<Register, SmallSetVector<SUnit *, 4>>;
+  using OrderDep = SmallSetVector<SUnit *, 8>;
+  using OutputDepsType = DenseMap<SUnit *, OutputDep>;
+  using OrderDepsType = DenseMap<SUnit *, OrderDep>;
+
+  OutputDepsType OutputDeps;
+  OrderDepsType OrderDeps;
+
+private:
+  /// Backedges that should be used when searching a schedule.
+  DenseMap<const SUnit *, SmallPtrSet<const SUnit *, 4>> BackEdges;
+
+public:
+  const OutputDep *getOutputDepOrNull(SUnit *Key) const {
+    auto Ite = OutputDeps.find(Key);
+    if (Ite == OutputDeps.end())
+      return nullptr;
+    return &Ite->second;
+  }
+
+  const OrderDep *getOrderDepOrNull(SUnit *Key) const {
+    auto Ite = OrderDeps.find(Key);
+    if (Ite == OrderDeps.end())
+      return nullptr;
+    return &Ite->second;
+  }
+
+  /// Retruns true if the edge from \p From to \p To is a back-edge that should
+  /// be used when scheduling.
+  bool shouldUseWhenScheduling(const SUnit *From, const SUnit *To) const;
+
+  /// Adds some edges to the original DAG that correspond to loop-carried
+  /// dependencies. Historically, loop-carried edges are represented by using
+  /// non-loop-carried edges in the original DAG. This function appends such
+  /// edges to preserve the previous behavior.
+  void modifySUnits(std::vector<SUnit> &SUnits, const TargetInstrInfo *TII);
+
+  void dump(SUnit *SU, const TargetRegisterInfo *TRI,
+            const MachineRegisterInfo *MRI) const;
 };
 
 /// Represents dependencies between instructions. This class is a wrapper of
 /// `SUnits` and its dependencies to manipulate back-edges in a natural way.
-/// Currently it only supports back-edges via PHI, which are expressed as
-/// anti-dependencies in the original DAG.
-/// FIXME: Support any other loop-carried dependencies
 class SwingSchedulerDDG {
-  using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>;
+  class EdgesType {
+    /// The number of loop-carried edges in Underlying.
+    unsigned LoopCarriedOrderDepsCount = 0;
+
+    /// Hold edges. There is a restriction on the order of the edges. Let N be
+    /// the number of edges, then
+    /// - The first #(N - LoopCarriedOrderDepsCount) edges are not
+    ///   loop-carried.
+    /// - The last #LoopCarriedOrderDepsCount edges are loop-carried.
+    SmallVector<SwingSchedulerDDGEdge, 4> Underlying;
+
+  public:
+    /// Add an \p Edge. To satisfy the order restriction on Underlying, once a
+    /// loop-carried edge is added, an edge that is not loop-carried one must
+    /// not be added.
+    void append(const SwingSchedulerDDGEdge &Edge);
+
+    ArrayRef<SwingSchedulerDDGEdge> get(bool UseLoopCarriedEdges) const {
+      ArrayRef<SwingSchedulerDDGEdge> Res = Underlying;
+      if (!UseLoopCarriedEdges)
+        Res = Res.slice(0, Underlying.size() - LoopCarriedOrderDepsCount);
+      return Res;
+    }
+  };
 
   struct SwingSchedulerDDGEdges {
     EdgesType Preds;
     EdgesType Succs;
+
+    SmallVector<SwingSchedulerDDGEdge, 4> ValidationOnlyPreds;
   };
 
   void initEdges(SUnit *SU);
@@ -211,6 +288,7 @@ class SwingSchedulerDDG {
   std::vector<SwingSchedulerDDGEdges> EdgesVec;
   SwingSchedulerDDGEdges EntrySUEdges;
   SwingSchedulerDDGEdges ExitSUEdges;
+  bool UseLoopCarriedEdges = false;
 
   void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
 
@@ -218,11 +296,25 @@ class SwingSchedulerDDG {
   const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
 
 public:
-  SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU);
+  SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
+                    const LoopCarriedEdges &LCE);
 
-  const EdgesType &getInEdges(const SUnit *SU) const;
+  /// Get in-edges for \p SU.
+  ArrayRef<SwingSchedulerDDGEdge> getInEdges(const SUnit *SU) const;
 
-  const EdgesType &getOutEdges(const SUnit *SU) const;
+  /// Get out-edges for \p SU.
+  ArrayRef<SwingSchedulerDDGEdge> getOutEdges(const SUnit *SU) const;
+
+  /// Returns true if \p Schedule doesn't violate the validation-only
+  /// dependencies.
+  bool isValidSchedule(std::vector<SUnit> &SUnits,
+                       const SMSchedule &Schedule) const;
+
+  /// Include loop-carried edeges to the result of getInEdges/getOutEdges.
+  void applyLoopCarriedEdges() { UseLoopCarriedEdges = true; }
+
+  /// Exclude loop-carried edeges from the result of getInEdges/getOutEdges.
+  void removeLoopCarriedEdges() { UseLoopCarriedEdges = false; }
 };
 
 /// This class builds the dependence graph for the instructions in a loop,
@@ -300,7 +392,6 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
     }
     Circuits &operator=(const Circuits &other) = delete;
     Circuits(const Circuits &other) = delete;
-    ~Circuits() { delete Node2Idx; }
 
     /// Reset the data structures used in the circuit algorithm.
     void reset() {
@@ -310,9 +401,9 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
       NumPaths = 0;
     }
 
-    void createAdjacencyStructure(SwingSchedulerDAG *DAG);
+    void createAdjacencyStructure(const SwingSchedulerDDG *DDG);
     bool circuit(int V, int S, NodeSetType &NodeSets,
-                 const SwingSchedulerDAG *DAG, bool HasBackedge = false);
+                 const SwingSchedulerDDG *DDG, bool HasBackedge = false);
     void unblock(int U);
   };
 
@@ -366,8 +457,6 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
     return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight;
   }
 
-  bool isLoopCarriedDep(const SwingSchedulerDDGEdge &Edge) const;
-
   void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
 
   void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
@@ -390,11 +479,11 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
 
   const SwingSchedulerDDG *getDDG() const { return DDG.get(); }
 
-  bool mayOverlapInLaterIter(const MachineInstr *BaseMI,
-                             const MachineInstr *OtherMI) const;
+  AliasResult::Kind mayOverlapInLaterIter(const MachineInstr *BaseMI,
+                                          const MachineInstr *OtherMI) const;
 
 private:
-  void addLoopCarriedDependences(AAResults *AA);
+  LoopCarriedEdges addLoopCarriedDependences(AAResults *AA);
   void updatePhiDependences();
   void changeDependences();
   unsigned calculateResMII();
@@ -440,7 +529,7 @@ class NodeSet {
   using iterator = SetVector<SUnit *>::const_iterator;
 
   NodeSet() = default;
-  NodeSet(iterator S, iterator E, const SwingSchedulerDAG *DAG)
+  NodeSet(iterator S, iterator E, const SwingSchedulerDDG *DDG)
       : Nodes(S, E), HasRecurrence(true) {
     // Calculate the latency of this node set.
     // Example to demonstrate the calculation:
@@ -456,7 +545,6 @@ class NodeSet {
     //
     // Hold a map from each SUnit in the circle to the maximum distance from the
     // source node by only considering the nodes.
-    const SwingSchedulerDDG *DDG = DAG->getDDG();
     DenseMap<SUnit *, unsigned> SUnitToDistance;
     for (auto *Node : Nodes)
       SUnitToDistance[Node] = 0;
@@ -474,23 +562,6 @@ class NodeSet {
           DV = DU + Succ.getLatency();
       }
     }
-    // Handle a back-edge in loop carried dependencies
-    SUnit *FirstNode = Nodes[0];
-    SUnit *LastNode = Nodes[Nodes.size() - 1];
-
-    for (auto &PI : DDG->getInEdges(LastNode)) {
-      // If we have an order dep that is potentially loop carried then a
-      // back-edge exists between the last node and the first node that isn't
-      // modeled in the DAG. Handle it manually by adding 1 to the distance of
-      // the last node.
-      if (PI.getSrc() != FirstNode || !PI.isOrderDep() ||
-          !DAG->isLoopCarriedDep(PI))
-        continue;
-      unsigned &First = SUnitToDistance[FirstNode];
-      unsigned Last = SUnitToDistance[LastNode];
-      First = std::max(First, Last + 1);
-    }
-
     // The latency is the distance from the source node to itself.
     Latency = SUnitToDistance[Nodes.front()];
   }
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6cb0299a30d7a..f06a8ea7792bc 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -562,18 +562,32 @@ void SwingSchedulerDAG::setMAX_II() {
 void SwingSchedulerDAG::schedule() {
   AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
   buildSchedGraph(AA);
-  addLoopCarriedDependences(AA);
+  auto LCE = addLoopCarriedDependences(AA);
+  LCE.modifySUnits(SUnits, TII);
   updatePhiDependences();
   Topo.InitDAGTopologicalSorting();
   changeDependences();
   postProcessDAG();
-  DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU);
-  LLVM_DEBUG(dump());
+
+  LLVM_DEBUG({
+    dump();
+    dbgs() << "Loop Carried Edges:\n";
+    for (SUnit &SU : SUnits)
+      LCE.dump(&SU, TRI, &MRI);
+  });
+
+  DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU, LCE);
+
+  // Consider loop-carried edges when finding circuits.
+  DDG->applyLoopCarriedEdges();
 
   NodeSetType NodeSets;
   findCircuits(NodeSets);
   NodeSetType Circuits = NodeSets;
 
+  // Ignore loop-carried edges when determinating the node order.
+  DDG->removeLoopCarriedEdges();
+
   // Calculate the MII.
   unsigned ResMII = calculateResMII();
   unsigned RecMII = calculateRecMII(NodeSets);
@@ -651,6 +665,8 @@ void SwingSchedulerDAG::schedule() {
   // check for node order issues
   checkValidNodeOrder(Circuits);
 
+  // Take into account loop-carried edges when scheduling.
+  DDG->applyLoopCarriedEdges();
   SMSchedule Schedule(Pass.MF, this);
   Scheduled = schedulePipeline(Schedule);
 
@@ -801,126 +817,25 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
   return false;
 }
 
-/// Return true if the instruction causes a chain between memory
-/// references before and after it.
-static bool isDependenceBarrier(MachineInstr &MI) {
-  return MI.isCall() || MI.mayRaiseFPException() ||
-         MI.hasUnmodeledSideEffects() ||
-         (MI.hasOrderedMemoryRef() &&
-          (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad()));
-}
-
-/// Return the underlying objects for the memory references of an instruction.
+/// Collect the underlying objects for the memory references of an instruction.
 /// This function calls the code in ValueTracking, but first checks that the
 /// instruction has a memory operand.
-static void getUnderlyingObjects(const MachineInstr *MI,
-                                 SmallVectorImpl<const Value *> &Objs) {
+/// Returns false if we cannot find the underlying objects.
+bool getUnderlyingObjects(const MachineInstr *MI,
+                          SmallVectorImpl<const Value *> &Objs,
+                          const Value *&MMOValue, AAMDNodes &AATags) {
   if (!MI->hasOneMemOperand())
-    return;
+    return false;
   MachineMemOperand *MM = *MI->memoperands_begin();
   if (!MM->getValue())
-    return;
-  getUnderlyingObjects(MM->getValue(), Objs);
-  for (const Value *V : Objs) {
-    if (!isIdentifiedObject(V)) {
-      Objs.clear();
-      return;
-    }
-  }
-}
+    return false;
+  MMOValue = MM->getValue();
+  getUnderlyingObjects(MMOValue, Objs);
 
-/// Add a chain edge between a load and store if the store can be an
-/// alias of the load on a subsequent iteration, i.e., a loop carried
-/// dependence. This code is very similar to the code in ScheduleDAGInstrs
-/// but that code doesn't create loop carried dependences.
-void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
-  MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
-  Value *UnknownValue =
-    UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
-  for (auto &SU : SUnits) {
-    MachineInstr &MI = *SU.getInstr();
-    if (isDependenceBarrier(MI))
-      PendingLoads.clear();
-    else if (MI.mayLoad()) {
-      SmallVector<const Value *, 4> Objs;
-      ::getUnderlyingObjects(&MI, Objs);
-      if (Objs.empty())
-        Objs.push_back(UnknownValue);
-      for (const auto *V : Objs) {
-        SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
-        SUs.push_back(&SU);
-      }
-    } else if (MI.mayStore()) {
-      SmallVector<const Value *, 4> Objs;
-      ::getUnderlyingObjects(&MI, Objs);
-      if (Objs.empty())
-        Objs.push_back(UnknownValue);
-      for (const auto *V : Objs) {
-        MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
-            PendingLoads.find(V);
-        if (I == PendingLoads.end())
-          continue;
-        for (auto *Load : I->second) {
-          if (isSuccOrder(Load, &SU))
-            continue;
-          MachineInstr &LdMI = *Load->getInstr();
-          // First, perform the cheaper check that compares the base register.
-          // If they are the same and the load offset is less than the store
-          // offset, then mark the dependence as loop carried potentially.
-          const MachineOperand *BaseOp1, *BaseOp2;
-          int64_t Offset1, Offset2;
-          bool Offset1IsScalable, Offset2IsScalable;
-          if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1,
-                                           Offset1IsScalable, TRI) &&
-              TII->getMemOperandWithOffset(MI, BaseOp2, Offset2,
-                                           Offset2IsScalable, TRI)) {
-            if (BaseOp1->isIdenticalTo(*BaseOp2) &&
-                Offset1IsScalable == Offset2IsScalable &&
-                (int)Offset1 < (int)Offset2) {
-              assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
-                     "What happened to the chain edge?");
-              SDep Dep(Load, SDep::Barrier);
-              Dep.setLatency(1);
-              SU.addPred(Dep);
-              continue;
-            }
-          }
-          // Second, the more expensive check that uses alias analysis on the
-          // base registers. If they alias, and the load offset is less than
-          // the store offset, the mark the dependence as loop carried.
-          if (!AA) {
-            SDep Dep(Load, SDep::Barrier);
-            Dep.setLatency(1);
-            SU.addPred(Dep);
-            continue;
-          }
-          MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
-          MachineMemOperand *MMO2 = *MI.memoperands_begin();
-          if (!MMO1->getValue() || !MMO2->getValue()) {
-            SDep Dep(Load, SDep::Barrier);
-            Dep.setLatency(1);
-            SU.addPred(Dep);
-            continue;
-          }
-          if (MMO1->getValue() == MMO2->getValue() &&
-              MMO1->getOffset() <= MMO2->getOffset()) {
-            SDep Dep(Load, SDep::Barrier);
-            Dep.setLatency(1);
-            SU.addPred(Dep);
-            continue;
-          }
-          if (!AA->isNoAlias(
-                  MemoryLocation::getAfter(MMO1->getValue(), MMO1->getAAInfo()),
-                  MemoryLocation::getAfter(MMO2->getValue(),
-                                           MMO2->getAAInfo()))) {
-            SDep Dep(Load, SDep::Barrier);
-            Dep.setLatency(1);
-            SU.addPred(Dep);
-          }
-        }
-      }
-    }
-  }
+  // TODO: A no alias scope may be valid only in a single iteration. In this
+  // case we need to peel off it like LoopAccessAnalysis does.
+  AATags = MM->getAAInfo();
+  return true;
 }
 
 /// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
@@ -1545,8 +1460,390 @@ class HighRegisterPressureDetector {
   }
 };
 
+struct SUnitWithMemInfo {
+  SUnit *SU;
+  SmallVector<const Value *, 2> UnderlyingObjs;
+  const Value *MemOpValue = nullptr;
+  AAMDNodes AATags;
+  bool IsAllIdentified = false;
+
+  SUnitWithMemInfo(SUnit *SU);
+
+  bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const;
+
+  bool isUnknown() const { return MemOpValue == nullptr; }
+};
+
+/// Add loop-carried chain dependencies. This class handles the same type of
+/// dependencies added by `ScheduleDAGInstrs::buildSchedGraph`, but takes into
+/// account dependencies across iterations.
+class LoopCarriedOrderDepsTracker {
+  // Type of instruction that is relevant to order-dependencies
+  enum class InstrTag {
+    Barrier = 0,      ///< A barrier event instruction.
+    LoadOrStore = 1,  ///< An instruction that may load or store memory, but is
+                      ///< not a barrier event.
+    FPExceptions = 2, ///< An instruction that does not match above, but may
+                      ///< raise floatin-point exceptions.
+  };
+
+  struct TaggedSUnit : PointerIntPair<SUnit *, 2> {
+    TaggedSUnit(SUnit *SU, InstrTag Tag)
+        : PointerIntPair<SUnit *, 2>(SU, unsigned(Tag)) {}
+
+    InstrTag getTag() const { return InstrTag(getInt()); }
+  };
+
+  /// Holds loads and stores with memory related information.
+  struct LoadStoreChunk {
+    SmallVector<SUnitWithMemInfo, 4> Loads;
+    SmallVector<SUnitWithMemInfo, 4> Stores;
+
+    void append(SUnit *SU);
+  };
+
+  SwingSchedulerDAG *DAG;
+  std::unique_ptr<BatchAAResults> BAA;
+  std::vector<SUnit> &SUnits;
+
+  /// The size of SUnits, for convenience.
+  const unsigned N;
+
+  /// Adjacency matrix consisiting of order dependencies of the original DAG.
+  std::vector<BitVector> AdjMatrix;
+
+  /// Loop-carried Edges.
+  std::vector<BitVector> LoopCarried;
+
+  /// Instructions related to chain dependencies. They are one of the
+  /// following:
+  ///
+  ///  1. Barrier event.
+  ///  2. Load, but neither a barrier event, invariant load, nor may load trap
+  ///     value.
+  ///  3. Store, but not a barrier event.
+  ///  4. None of them, but may raise floating-point exceptions.
+  ///
+  /// This is used when analyzing loop-carried dependencies that access global
+  /// barrier instructions.
+  std::vector<TaggedSUnit> TaggedSUnits;
+
+  const TargetInstrInfo *TII = nullptr;
+
+public:
+  LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, AAResults *AA,
+                              const TargetInstrInfo *TII);
+
+  /// The main function to compute loop-carried order-dependencies.
+  void computeDependencies();
+
+  const BitVector &getLoopCarried(unsigned Idx) const {
+    return LoopCarried[Idx];
+  }
+
+private:
+  /// Calculate reachability induced by the original DAG.
+  void initAdjMatrix();
+
+  /// Tags to \p SU if the instruction may affect the order-dependencies.
+  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/135148


More information about the llvm-commits mailing list