[llvm] [MachinePipeliner] Add validation for missed loop-carried memory deps (PR #145878)

Thu Jul 10 03:23:49 PDT 2025

https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/145878

>From fa87f4d35423aff6598b3bef63dfeb161630260b Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 25 Jun 2025 13:13:13 +0000
Subject: [PATCH 1/2] [MachinePipeliner] Add missed loop-carried memory deps
 for validation

---
 llvm/include/llvm/CodeGen/MachinePipeliner.h  |  38 ++--
 llvm/lib/CodeGen/MachinePipeliner.cpp         | 194 ++++++++++++++----
 .../Hexagon/swp-loop-carried-order-dep1.mir   |  14 +-
 .../Hexagon/swp-loop-carried-order-dep2.mir   |   8 +-
 .../Hexagon/swp-loop-carried-order-dep3.mir   |   8 +-
 .../Hexagon/swp-loop-carried-order-dep4.mir   |   6 +-
 .../Hexagon/swp-loop-carried-order-dep5.mir   |  22 +-
 7 files changed, 204 insertions(+), 86 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index e4e794c434adb..3a3b1fc353146 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -120,14 +120,17 @@ class SwingSchedulerDDGEdge {
   SUnit *Dst = nullptr;
   SDep Pred;
   unsigned Distance = 0;
+  bool IsValidationOnly = false;
 
 public:
   /// Creates an edge corresponding to an edge represented by \p PredOrSucc and
   /// \p Dep in the original DAG. This pair has no information about the
   /// direction of the edge, so we need to pass an additional argument \p
   /// IsSucc.
-  SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc)
-      : Dst(PredOrSucc), Pred(Dep), Distance(0u) {
+  SwingSchedulerDDGEdge(SUnit *PredOrSucc, const SDep &Dep, bool IsSucc,
+                        bool IsValidationOnly)
+      : Dst(PredOrSucc), Pred(Dep), Distance(0u),
+        IsValidationOnly(IsValidationOnly) {
     SUnit *Src = Dep.getSUnit();
 
     if (IsSucc) {
@@ -188,6 +191,10 @@ class SwingSchedulerDDGEdge {
   /// functions. We ignore the back-edge recurrence in order to avoid unbounded
   /// recursion in the calculation of the ASAP, ALAP, etc functions.
   bool ignoreDependence(bool IgnoreAnti) const;
+
+  /// Returns true if this edge is intended to be used only for validating the
+  /// schedule.
+  bool isValidationOnly() const { return IsValidationOnly; }
 };
 
 /// Represents loop-carried dependencies. Because SwingSchedulerDAG doesn't
@@ -208,25 +215,21 @@ struct LoopCarriedEdges {
     return &Ite->second;
   }
 
-  /// Retruns true if the edge from \p From to \p To is a back-edge that should
-  /// be used when scheduling.
-  bool shouldUseWhenScheduling(const SUnit *From, const SUnit *To) const;
-
   /// Adds some edges to the original DAG that correspond to loop-carried
   /// dependencies. Historically, loop-carried edges are represented by using
   /// non-loop-carried edges in the original DAG. This function appends such
   /// edges to preserve the previous behavior.
-  void modifySUnits(std::vector<SUnit> &SUnits);
+  void modifySUnits(std::vector<SUnit> &SUnits, const TargetInstrInfo *TII);
 
   void dump(SUnit *SU, const TargetRegisterInfo *TRI,
             const MachineRegisterInfo *MRI) const;
 };
 
-/// Represents dependencies between instructions. This class is a wrapper of
-/// `SUnits` and its dependencies to manipulate back-edges in a natural way.
-/// Currently it only supports back-edges via PHI, which are expressed as
-/// anti-dependencies in the original DAG.
-/// FIXME: Support any other loop-carried dependencies
+/// This class provides APIs to retrieve edges from/to an SUnit node, with a
+/// particular focus on loop-carried dependencies. Since SUnit is not designed
+/// to represent such edges, handling them directly using its APIs has required
+/// non-trivial logic in the past. This class serves as a wrapper around SUnit,
+/// offering a simpler interface for managing these dependencies.
 class SwingSchedulerDDG {
   using EdgesType = SmallVector<SwingSchedulerDDGEdge, 4>;
 
@@ -244,17 +247,26 @@ class SwingSchedulerDDG {
   SwingSchedulerDDGEdges EntrySUEdges;
   SwingSchedulerDDGEdges ExitSUEdges;
 
+  /// Edges that are used only when validating the schedule. These edges are
+  /// not considered to drive the optimization heuristics.
+  SmallVector<SwingSchedulerDDGEdge, 8> ValidationOnlyEdges;
+
+  /// Adds a NON-validation-only edge to the DDG. Assumes to be called only by
+  /// the cotr.
   void addEdge(const SUnit *SU, const SwingSchedulerDDGEdge &Edge);
 
   SwingSchedulerDDGEdges &getEdges(const SUnit *SU);
   const SwingSchedulerDDGEdges &getEdges(const SUnit *SU) const;
 
 public:
-  SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU);
+  SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU, SUnit *ExitSU,
+                    const LoopCarriedEdges &LCE);
 
   const EdgesType &getInEdges(const SUnit *SU) const;
 
   const EdgesType &getOutEdges(const SUnit *SU) const;
+
+  bool isValidSchedule(const SMSchedule &Schedule) const;
 };
 
 /// This class builds the dependence graph for the instructions in a loop,
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index d2c79f64afe64..c224654c062c5 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -338,6 +338,17 @@ class LoopCarriedOrderDepsTracker {
   void addLoopCarriedDepenenciesForChunks(const LoadStoreChunk &From,
                                           const LoadStoreChunk &To);
 
+  /// Add a loop-carried order dependency between \p Src and \p Dst if we
+  /// cannot prove tye are independent. When \p PerformCheapCheck is true, a
+  /// lightweight dependency test (referred to as "cheap check" below) is
+  /// performed at first. Note that the cheap check is retained to maintain the
+  /// existing behavior and not expected to be used anymore.
+  ///
+  /// TODO: Remove \p PerformCheapCheck and the corresponding cheap check.
+  void addDependenciesBetweenSUs(const SUnitWithMemInfo &Src,
+                                 const SUnitWithMemInfo &Dst,
+                                 bool PerformCheapCheck = false);
+
   void computeDependenciesAux();
 };
 
@@ -673,7 +684,7 @@ void SwingSchedulerDAG::schedule() {
   Topo.InitDAGTopologicalSorting();
   changeDependences();
   postProcessDAG();
-  DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU);
+  DDG = std::make_unique<SwingSchedulerDDG>(SUnits, &EntrySU, &ExitSU, LCE);
   LLVM_DEBUG({
     dump();
     dbgs() << "===== Loop Carried Edges Begin =====\n";
@@ -958,11 +969,11 @@ bool SUnitWithMemInfo::getUnderlyingObjects() {
 
 /// Returns true if there is a loop-carried order dependency from \p Src to \p
 /// Dst.
-static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src,
-                                 const SUnitWithMemInfo &Dst,
-                                 BatchAAResults &BAA,
-                                 const TargetInstrInfo *TII,
-                                 const TargetRegisterInfo *TRI) {
+static bool
+hasLoopCarriedMemDep(const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
+                     BatchAAResults &BAA, const TargetInstrInfo *TII,
+                     const TargetRegisterInfo *TRI,
+                     const SwingSchedulerDAG *SSD, bool PerformCheapCheck) {
   if (Src.isTriviallyDisjoint(Dst))
     return false;
   if (isSuccOrder(Src.SU, Dst.SU))
@@ -970,24 +981,32 @@ static bool hasLoopCarriedMemDep(const SUnitWithMemInfo &Src,
 
   MachineInstr &SrcMI = *Src.SU->getInstr();
   MachineInstr &DstMI = *Dst.SU->getInstr();
-  // First, perform the cheaper check that compares the base register.
-  // If they are the same and the load offset is less than the store
-  // offset, then mark the dependence as loop carried potentially.
-  const MachineOperand *BaseOp1, *BaseOp2;
-  int64_t Offset1, Offset2;
-  bool Offset1IsScalable, Offset2IsScalable;
-  if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
-                                   TRI) &&
-      TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
-                                   TRI)) {
-    if (BaseOp1->isIdenticalTo(*BaseOp2) &&
-        Offset1IsScalable == Offset2IsScalable && (int)Offset1 < (int)Offset2) {
-      assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
-             "What happened to the chain edge?");
-      return true;
+  if (PerformCheapCheck) {
+    // First, perform the cheaper check that compares the base register.
+    // If they are the same and the load offset is less than the store
+    // offset, then mark the dependence as loop carried potentially.
+    //
+    // TODO: This check will be removed.
+    const MachineOperand *BaseOp1, *BaseOp2;
+    int64_t Offset1, Offset2;
+    bool Offset1IsScalable, Offset2IsScalable;
+    if (TII->getMemOperandWithOffset(SrcMI, BaseOp1, Offset1, Offset1IsScalable,
+                                     TRI) &&
+        TII->getMemOperandWithOffset(DstMI, BaseOp2, Offset2, Offset2IsScalable,
+                                     TRI)) {
+      if (BaseOp1->isIdenticalTo(*BaseOp2) &&
+          Offset1IsScalable == Offset2IsScalable &&
+          (int)Offset1 < (int)Offset2) {
+        assert(TII->areMemAccessesTriviallyDisjoint(SrcMI, DstMI) &&
+               "What happened to the chain edge?");
+        return true;
+      }
     }
   }
 
+  if (!SSD->mayOverlapInLaterIter(&SrcMI, &DstMI))
+    return false;
+
   // Second, the more expensive check that uses alias analysis on the
   // base registers. If they alias, and the load offset is less than
   // the store offset, the mark the dependence as loop carried.
@@ -1056,20 +1075,34 @@ LoopCarriedOrderDepsTracker::getInstrTag(SUnit *SU) const {
   return std::nullopt;
 }
 
+void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
+    const SUnitWithMemInfo &Src, const SUnitWithMemInfo &Dst,
+    bool PerformCheapCheck) {
+  // Avoid self-dependencies.
+  if (Src.SU == Dst.SU)
+    return;
+
+  if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG, PerformCheapCheck))
+    LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
+}
+
 void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
     const LoadStoreChunk &From, const LoadStoreChunk &To) {
-  // Add dependencies for load-to-store (WAR) from top to bottom.
+  // Add load-to-store dependencies (WAR).
   for (const SUnitWithMemInfo &Src : From.Loads)
     for (const SUnitWithMemInfo &Dst : To.Stores)
-      if (Src.SU->NodeNum < Dst.SU->NodeNum &&
-          hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI))
-        LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
+      // Perform a cheap check first if this is a forward dependency.
+      addDependenciesBetweenSUs(Src, Dst, Src.SU->NodeNum < Dst.SU->NodeNum);
 
-  // TODO: The following dependencies are missed.
-  //
-  // - Dependencies for load-to-store from bottom to top.
-  // - Dependencies for store-to-load (RAW).
-  // - Dependencies for store-to-store (WAW).
+  // Add store-to-load dependencies (RAW).
+  for (const SUnitWithMemInfo &Src : From.Stores)
+    for (const SUnitWithMemInfo &Dst : To.Loads)
+      addDependenciesBetweenSUs(Src, Dst);
+
+  // Add store-to-store dependencies (WAW).
+  for (const SUnitWithMemInfo &Src : From.Stores)
+    for (const SUnitWithMemInfo &Dst : To.Stores)
+      addDependenciesBetweenSUs(Src, Dst);
 }
 
 void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
@@ -1116,7 +1149,7 @@ LoopCarriedEdges SwingSchedulerDAG::addLoopCarriedDependences() {
     for (const int Succ : LCODTracker.getLoopCarried(I).set_bits())
       LCE.OrderDeps[&SUnits[I]].insert(&SUnits[Succ]);
 
-  LCE.modifySUnits(SUnits);
+  LCE.modifySUnits(SUnits, TII);
   return LCE;
 }
 
@@ -2676,6 +2709,11 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
       });
     } while (++NI != NE && scheduleFound);
 
+    // If a schedule is found, validate it against the validation-only
+    // dependencies.
+    if (scheduleFound)
+      scheduleFound = DDG->isValidSchedule(Schedule);
+
     // If a schedule is found, ensure non-pipelined instructions are in stage 0
     if (scheduleFound)
       scheduleFound =
@@ -4118,6 +4156,8 @@ SwingSchedulerDDG::getEdges(const SUnit *SU) const {
 
 void SwingSchedulerDDG::addEdge(const SUnit *SU,
                                 const SwingSchedulerDDGEdge &Edge) {
+  assert(!Edge.isValidationOnly() &&
+         "Validation-only edges are not expected here.");
   auto &Edges = getEdges(SU);
   if (Edge.getSrc() == SU)
     Edges.Succs.push_back(Edge);
@@ -4127,25 +4167,43 @@ void SwingSchedulerDDG::addEdge(const SUnit *SU,
 
 void SwingSchedulerDDG::initEdges(SUnit *SU) {
   for (const auto &PI : SU->Preds) {
-    SwingSchedulerDDGEdge Edge(SU, PI, false);
+    SwingSchedulerDDGEdge Edge(SU, PI, /*IsSucc=*/false,
+                               /*IsValidationOnly=*/false);
     addEdge(SU, Edge);
   }
 
   for (const auto &SI : SU->Succs) {
-    SwingSchedulerDDGEdge Edge(SU, SI, true);
+    SwingSchedulerDDGEdge Edge(SU, SI, /*IsSucc=*/true,
+                               /*IsValidationOnly=*/false);
     addEdge(SU, Edge);
   }
 }
 
 SwingSchedulerDDG::SwingSchedulerDDG(std::vector<SUnit> &SUnits, SUnit *EntrySU,
-                                     SUnit *ExitSU)
+                                     SUnit *ExitSU, const LoopCarriedEdges &LCE)
     : EntrySU(EntrySU), ExitSU(ExitSU) {
   EdgesVec.resize(SUnits.size());
 
+  // Add non-loop-carried edges based on the DAG.
   initEdges(EntrySU);
   initEdges(ExitSU);
   for (auto &SU : SUnits)
     initEdges(&SU);
+
+  // Add loop-carried edges, which are not represented in the DAG.
+  for (SUnit &SU : SUnits) {
+    SUnit *Src = &SU;
+    if (const LoopCarriedEdges::OrderDep *OD = LCE.getOrderDepOrNull(Src)) {
+      SDep Base(Src, SDep::Barrier);
+      Base.setLatency(1);
+      for (SUnit *Dst : *OD) {
+        SwingSchedulerDDGEdge Edge(Dst, Base, /*IsSucc=*/false,
+                                   /*IsValidationOnly=*/true);
+        Edge.setDistance(1);
+        ValidationOnlyEdges.push_back(Edge);
+      }
+    }
+  }
 }
 
 const SwingSchedulerDDG::EdgesType &
@@ -4158,17 +4216,73 @@ SwingSchedulerDDG::getOutEdges(const SUnit *SU) const {
   return getEdges(SU).Succs;
 }
 
-void LoopCarriedEdges::modifySUnits(std::vector<SUnit> &SUnits) {
-  // Currently this function simply adds all dependencies represented by this
-  // object. After we properly handle missed dependencies, the logic here will
-  // be more complex, as currently missed edges should not be added to the DAG.
+/// Check if \p Schedule doesn't violate the validation-only dependencies.
+bool SwingSchedulerDDG::isValidSchedule(const SMSchedule &Schedule) const {
+  unsigned II = Schedule.getInitiationInterval();
+
+  auto ExpandCycle = [&](SUnit *SU) {
+    int Stage = Schedule.stageScheduled(SU);
+    int Cycle = Schedule.cycleScheduled(SU);
+    return Cycle + (Stage * II);
+  };
+
+  for (const SwingSchedulerDDGEdge &Edge : ValidationOnlyEdges) {
+    SUnit *Src = Edge.getSrc();
+    SUnit *Dst = Edge.getDst();
+    if (!Src->isInstr() || !Dst->isInstr())
+      continue;
+    int CycleSrc = ExpandCycle(Src);
+    int CycleDst = ExpandCycle(Dst);
+    int MaxLateStart = CycleDst + Edge.getDistance() * II - Edge.getLatency();
+    if (CycleSrc > MaxLateStart) {
+      LLVM_DEBUG({
+        dbgs() << "Validation failed for edge from " << Src->NodeNum << " to "
+               << Dst->NodeNum << "\n";
+      });
+      return false;
+    }
+  }
+  return true;
+}
+
+void LoopCarriedEdges::modifySUnits(std::vector<SUnit> &SUnits,
+                                    const TargetInstrInfo *TII) {
   for (SUnit &SU : SUnits) {
     SUnit *Src = &SU;
     if (auto *OrderDep = getOrderDepOrNull(Src)) {
       SDep Dep(Src, SDep::Barrier);
       Dep.setLatency(1);
-      for (SUnit *Dst : *OrderDep)
-        Dst->addPred(Dep);
+      for (SUnit *Dst : *OrderDep) {
+        SUnit *From = Src;
+        SUnit *To = Dst;
+        if (From->NodeNum > To->NodeNum)
+          std::swap(From, To);
+
+        // Add a forward edge if the following conditions are met:
+        //
+        // - The instruction of the source node (FromMI) may read memory.
+        // - The instruction of the target node (ToMI) may modify memory, but
+        //   does not read it.
+        // - Neither instruction is a global barrier.
+        // - The load appears before the store in the original basic block.
+        // - There are no barrier or store instructions between the two nodes.
+        // - The target node is unreachable from the source node in the current
+        //   DAG.
+        //
+        // TODO: These conditions are inherited from a previous implementation,
+        // and some may no longer be necessary. For now, we conservatively
+        // retain all of them to avoid regressions, but the logic could
+        // potentially be simplified
+        MachineInstr *FromMI = From->getInstr();
+        MachineInstr *ToMI = To->getInstr();
+        if (FromMI->mayLoad() && !ToMI->mayLoad() && ToMI->mayStore() &&
+            !TII->isGlobalMemoryObject(FromMI) &&
+            !TII->isGlobalMemoryObject(ToMI) && !isSuccOrder(From, To)) {
+          SDep Pred = Dep;
+          Pred.setSUnit(Src);
+          Dst->addPred(Pred);
+        }
+      }
     }
   }
 }
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir
index 17ee07f49324a..7182e0a112560 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep1.mir
@@ -14,16 +14,14 @@
 # ```
 # 
 # Loop-carried dependencies exist from store for a[i+1] to load/store for a[i], but not vice versa.
-# FIXME: Currently the following dependencies are missed.
-#
-# Loop carried edges from SU(6)
-#   Order
-#     SU(4)
-# Loop carried edges from SU(8)
-#   Order
-#     SU(4)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
+# CHECK-NEXT:   Loop carried edges from SU(6)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(4)
+# CHECK-NEXT:   Loop carried edges from SU(8)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(4)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir
index 850e602c9146f..56485e04ad35c 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep2.mir
@@ -14,16 +14,14 @@
 # ```
 # 
 # Loop-carried dependencies exist from load/store for a[i] to store for a[i-1], but not vice versa.
-# FIXME: Currently the following dependencies are missed.
-#
-#  Loop carried edges from SU(5)
-#    Order
-#      SU(7)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
 # CHECK-NEXT:   Loop carried edges from SU(3)
 # CHECK-NEXT:     Order
 # CHECK-NEXT:       SU(7)
+# CHECK-NEXT:   Loop carried edges from SU(5)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(7)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir
index ca59b97dd11e9..69f56fa7934f2 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep3.mir
@@ -14,13 +14,11 @@
 # ```
 # 
 # Loop-carried dependencies exist from load for a[i+1] to store for a[i].
-# FIXME: Currently the following dependencies are missed.
-#
-# Loop carried edges from SU(7)
-#   Order
-#     SU(5)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
+# CHECK-NEXT:   Loop carried edges from SU(7)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(5)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir
index 4bc4b48735947..cc4e9e1d67c5c 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep4.mir
@@ -20,15 +20,15 @@
 #
 # FIXME: Currently the following dependencies are missed.
 #
-# Loop carried edges from SU(4)
-#   Order
-#     SU(3)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
 # CHECK-NEXT:   Loop carried edges from SU(2)
 # CHECK-NEXT:     Order
 # CHECK-NEXT:       SU(3)
 # CHECK-NEXT:       SU(4)
+# CHECK-NEXT:   Loop carried edges from SU(4)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(3)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir
index 77c3d569db181..3c2e0c40680c8 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep5.mir
@@ -23,20 +23,18 @@
 # Note that if there is already a dependency between two instructions, we don't
 # add loop-carried on between them since non-loop-carried one imposes stronger
 # constraint than loop-carried one.
-#
-# FIXME: Currently the following dependencies are missed.
-#  Loop carried edges from SU(5)
-#    Order
-#      SU(2)
-#  Loop carried edges from SU(6)
-#    Order
-#      SU(5)
-#  Loop carried edges from SU(8)
-#    Order
-#      SU(3)
-#      SU(5)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
+# CHECK-NEXT:   Loop carried edges from SU(5)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(2)
+# CHECK-NEXT:   Loop carried edges from SU(6)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(5)
+# CHECK-NEXT:   Loop carried edges from SU(8)
+# CHECK-NEXT:     Order
+# CHECK-NEXT:       SU(3)
+# CHECK-NEXT:       SU(5)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |

>From 46c21dd8869842d5f12392c1c292ef41804c8b21 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Thu, 10 Jul 2025 19:23:41 +0900
Subject: [PATCH 2/2] Update llvm/lib/CodeGen/MachinePipeliner.cpp

Co-authored-by: aankit-ca <quic_aankit at quicinc.com>
---
 llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index c224654c062c5..b38a4d1c55af9 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -339,7 +339,7 @@ class LoopCarriedOrderDepsTracker {
                                           const LoadStoreChunk &To);
 
   /// Add a loop-carried order dependency between \p Src and \p Dst if we
-  /// cannot prove tye are independent. When \p PerformCheapCheck is true, a
+  /// cannot prove they are independent. When \p PerformCheapCheck is true, a
   /// lightweight dependency test (referred to as "cheap check" below) is
   /// performed at first. Note that the cheap check is retained to maintain the
   /// existing behavior and not expected to be used anymore.