[llvm] [MachinePipeliner] Add validation for missed dependencies (PR #135148)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 21 05:25:10 PDT 2025


================
@@ -1545,8 +1460,390 @@ class HighRegisterPressureDetector {
   }
 };
 
+struct SUnitWithMemInfo {
+  SUnit *SU;
+  SmallVector<const Value *, 2> UnderlyingObjs;
+  const Value *MemOpValue = nullptr;
+  AAMDNodes AATags;
+  bool IsAllIdentified = false;
+
+  SUnitWithMemInfo(SUnit *SU);
+
+  bool isTriviallyDisjoint(const SUnitWithMemInfo &Other) const;
+
+  bool isUnknown() const { return MemOpValue == nullptr; }
+};
+
+/// Add loop-carried chain dependencies. This class handles the same type of
+/// dependencies added by `ScheduleDAGInstrs::buildSchedGraph`, but takes into
+/// account dependencies across iterations.
+class LoopCarriedOrderDepsTracker {
+  // Type of instruction that is relevant to order-dependencies
+  enum class InstrTag {
+    Barrier = 0,      ///< A barrier event instruction.
+    LoadOrStore = 1,  ///< An instruction that may load or store memory, but is
+                      ///< not a barrier event.
+    FPExceptions = 2, ///< An instruction that does not match above, but may
+                      ///< raise floatin-point exceptions.
+  };
+
+  struct TaggedSUnit : PointerIntPair<SUnit *, 2> {
+    TaggedSUnit(SUnit *SU, InstrTag Tag)
+        : PointerIntPair<SUnit *, 2>(SU, unsigned(Tag)) {}
+
+    InstrTag getTag() const { return InstrTag(getInt()); }
+  };
+
+  /// Holds loads and stores with memory related information.
+  struct LoadStoreChunk {
+    SmallVector<SUnitWithMemInfo, 4> Loads;
+    SmallVector<SUnitWithMemInfo, 4> Stores;
+
+    void append(SUnit *SU);
+  };
+
+  SwingSchedulerDAG *DAG;
+  std::unique_ptr<BatchAAResults> BAA;
+  std::vector<SUnit> &SUnits;
+
+  /// The size of SUnits, for convenience.
+  const unsigned N;
+
+  /// Adjacency matrix consisiting of order dependencies of the original DAG.
+  std::vector<BitVector> AdjMatrix;
+
+  /// Loop-carried Edges.
+  std::vector<BitVector> LoopCarried;
+
+  /// Instructions related to chain dependencies. They are one of the
+  /// following:
+  ///
+  ///  1. Barrier event.
+  ///  2. Load, but neither a barrier event, invariant load, nor may load trap
+  ///     value.
+  ///  3. Store, but not a barrier event.
+  ///  4. None of them, but may raise floating-point exceptions.
+  ///
+  /// This is used when analyzing loop-carried dependencies that access global
+  /// barrier instructions.
+  std::vector<TaggedSUnit> TaggedSUnits;
+
+  const TargetInstrInfo *TII = nullptr;
+
+public:
+  LoopCarriedOrderDepsTracker(SwingSchedulerDAG *SSD, AAResults *AA,
+                              const TargetInstrInfo *TII);
+
+  /// The main function to compute loop-carried order-dependencies.
+  void computeDependencies();
+
+  const BitVector &getLoopCarried(unsigned Idx) const {
+    return LoopCarried[Idx];
+  }
+
+private:
+  /// Calculate reachability induced by the original DAG.
+  void initAdjMatrix();
+
+  /// Tags to \p SU if the instruction may affect the order-dependencies.
+  std::optional<TaggedSUnit> checkInstrType(SUnit *SU) const;
+
+  /// Retruns true if there is a loop-carried dependence between \p Src and \p
+  /// Dst.
+  bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src,
+                            const SUnitWithMemInfo &Dst) const;
+
+  /// Add a loop-carried dependency between \p From and \p To if it exists.
+  void addDependencesBetweenSUs(const SUnitWithMemInfo &From,
+                                const SUnitWithMemInfo &To);
+
+  /// Add loop-carried dependencies between nodes in \p From and \p To.
+  void addDependencesBetweenChunks(const LoadStoreChunk &From,
+                                   const LoadStoreChunk &To);
+
+  void computeDependenciesAux();
+};
+
 } // end anonymous namespace
 
+SUnitWithMemInfo::SUnitWithMemInfo(SUnit *SU) : SU(SU) {
+  if (!getUnderlyingObjects(SU->getInstr(), UnderlyingObjs, MemOpValue, AATags))
+    return;
+  for (const Value *Obj : UnderlyingObjs)
+    if (!isIdentifiedObject(Obj)) {
+      IsAllIdentified = false;
+      break;
+    }
+}
+
+bool SUnitWithMemInfo::isTriviallyDisjoint(
+    const SUnitWithMemInfo &Other) const {
+  // If all underlying objects are identified objects and there is no overlap
+  // between them, then these two instructions are disjoint.
+  if (!IsAllIdentified || !Other.IsAllIdentified)
+    return false;
+  for (const Value *Obj : UnderlyingObjs)
+    if (llvm::is_contained(Other.UnderlyingObjs, Obj))
+      return false;
+  return true;
+}
+
+void LoopCarriedOrderDepsTracker::LoadStoreChunk::append(SUnit *SU) {
+  const MachineInstr *MI = SU->getInstr();
+  if (!MI->mayLoadOrStore())
+    return;
+  (MI->mayStore() ? Stores : Loads).emplace_back(SU);
+}
+
+LoopCarriedOrderDepsTracker::LoopCarriedOrderDepsTracker(
+    SwingSchedulerDAG *SSD, AAResults *AA, const TargetInstrInfo *TII)
+    : DAG(SSD), BAA(nullptr), SUnits(DAG->SUnits), N(SUnits.size()),
+      AdjMatrix(N, BitVector(N)), LoopCarried(N, BitVector(N)), TII(TII) {
+  if (AA) {
+    BAA = std::make_unique<BatchAAResults>(*AA);
+    BAA->enableCrossIterationMode();
+  }
+  initAdjMatrix();
+}
+
+void LoopCarriedOrderDepsTracker::computeDependencies() {
+  // Traverse all instructions and extract only what we are targetting.
+  for (auto &SU : SUnits) {
+    auto Tagged = checkInstrType(&SU);
+
+    // This instruction has no loop-carried order-dependencies.
+    if (!Tagged)
+      continue;
+
+    TaggedSUnits.push_back(*Tagged);
+  }
+
+  computeDependenciesAux();
+
+  // Finalize the results.
+  for (int I = 0; I != int(N); I++) {
+    // If the dependence between two instructions already exists in the original
+    // DAG, then a loop-carried dependence for them is unnecessary because the
+    // original one expresses stricter constraint than loop-carried one.
+    LoopCarried[I].reset(AdjMatrix[I]);
+
+    // Self-loops are noisy.
+    LoopCarried[I].reset(I);
+  }
+}
+
+void LoopCarriedOrderDepsTracker::initAdjMatrix() {
+  // The original graph is DAG, so we can compute them from bottom to top.
+  for (int RI = 0; RI != int(N); RI++) {
+    int I = SUnits.size() - (RI + 1);
+    for (const auto &Succ : SUnits[I].Succs)
+      if (Succ.isNormalMemoryOrBarrier()) {
+        SUnit *SSU = Succ.getSUnit();
+        if (SSU->isBoundaryNode())
+          continue;
+        int J = SSU->NodeNum;
+        AdjMatrix[I].set(J);
+      }
+  }
+}
+
+std::optional<LoopCarriedOrderDepsTracker::TaggedSUnit>
+LoopCarriedOrderDepsTracker::checkInstrType(SUnit *SU) const {
+  MachineInstr *MI = SU->getInstr();
+  if (TII->isGlobalMemoryObject(MI))
+    return TaggedSUnit(SU, InstrTag::Barrier);
+
+  if (MI->mayStore() ||
+      (MI->mayLoad() && !MI->isDereferenceableInvariantLoad()))
+    return TaggedSUnit(SU, InstrTag::LoadOrStore);
+
+  if (MI->mayRaiseFPException())
+    return TaggedSUnit(SU, InstrTag::FPExceptions);
+
+  return std::nullopt;
+}
+
+bool LoopCarriedOrderDepsTracker::hasLoopCarriedMemDep(
+    const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst) const {
+  if (!SwpPruneLoopCarried)
+    return true;
+
+  if (Src.isTriviallyDisjoint(Dst))
+    return false;
+
+  // First, check the dependence by comparing base register, offset, and
+  // step value of the loop.
+  switch (DAG->mayOverlapInLaterIter(Src.SU->getInstr(), Dst.SU->getInstr())) {
+  case AliasResult::Kind::MustAlias:
+    return true;
+  case AliasResult::Kind::NoAlias:
+    return false;
+  case AliasResult::Kind::MayAlias:
+    break;
+  default:
+    llvm_unreachable("Unexpected alias");
+  }
+
+  // If we cannot determine the dependence by previouse check, then
+  // check by using alias analysis.
+
+  if (!BAA || Src.isUnknown() || Dst.isUnknown())
+    return true;
+
+  // Query AliasAnalysis by using the value of the memory operand.
+  if (Src.MemOpValue && Dst.MemOpValue) {
+    const auto SrcLoc =
+        MemoryLocation::getBeforeOrAfter(Src.MemOpValue, Src.AATags);
+    const auto DstLoc =
+        MemoryLocation::getBeforeOrAfter(Dst.MemOpValue, Dst.AATags);
+    if (BAA->isNoAlias(SrcLoc, DstLoc))
+      return false;
+  }
+
+  // Try all combinations of the underlying objects.
+  for (const Value *SrcObj : Src.UnderlyingObjs)
+    for (const Value *DstObj : Dst.UnderlyingObjs) {
+      const auto SrcLoc = MemoryLocation::getBeforeOrAfter(SrcObj, Src.AATags);
+      const auto DstLoc = MemoryLocation::getBeforeOrAfter(DstObj, Dst.AATags);
+      if (!BAA->isNoAlias(SrcLoc, DstLoc))
+        return true;
+    }
----------------
kasuga-fj wrote:

I am not too sure if I am using the alias analysis correctly.

https://github.com/llvm/llvm-project/pull/135148


More information about the llvm-commits mailing list