[llvm-branch-commits] [llvm] [MachinePipelner] Add loop-carried dependencies for global barriers (PR #174391)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jan 5 04:40:14 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Ryotaro Kasuga (kasuga-fj)

<details>
<summary>Changes</summary>

The loads/stores must not be reordered across barrier instructions. However, in MachinePipeliner, it potentially could happen since loop-carried dependencies from loads/stores to a barrier instruction were not considered. The same problem exists for barrier-to-barrier dependencies. This patch adds the handling for those cases. The implementation is based on that of `ScheduleDAGInstrs::buildSchedGraph`.

Split off from https://github.com/llvm/llvm-project/pull/135148

---
Full diff: https://github.com/llvm/llvm-project/pull/174391.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/MachinePipeliner.cpp (+51-4) 
- (modified) llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir (+10-2) 
- (modified) llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir (+25-15) 


``````````diff
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 6b022783f4bb8..4c8ef0aaa3d66 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -349,6 +349,10 @@ class LoopCarriedOrderDepsTracker {
                                  const SUnitWithMemInfo &Dst);
 
   void computeDependenciesAux();
+
+  void setLoopCarriedDep(const SUnit *Src, const SUnit *Dst) {
+    LoopCarried[Src->NodeNum].set(Dst->NodeNum);
+  }
 };
 
 } // end anonymous namespace
@@ -1137,7 +1141,7 @@ void LoopCarriedOrderDepsTracker::addDependenciesBetweenSUs(
     return;
 
   if (hasLoopCarriedMemDep(Src, Dst, *BAA, TII, TRI, DAG))
-    LoopCarried[Src.SU->NodeNum].set(Dst.SU->NodeNum);
+    setLoopCarriedDep(Src.SU, Dst.SU);
 }
 
 void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
@@ -1160,11 +1164,16 @@ void LoopCarriedOrderDepsTracker::addLoopCarriedDepenenciesForChunks(
 
 void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
   SmallVector<LoadStoreChunk, 2> Chunks(1);
+  SUnit *FirstBarrier = nullptr;
+  SUnit *LastBarrier = nullptr;
   for (const auto &TSU : TaggedSUnits) {
     InstrTag Tag = TSU.getTag();
     SUnit *SU = TSU.getPointer();
     switch (Tag) {
     case InstrTag::Barrier:
+      if (!FirstBarrier)
+        FirstBarrier = SU;
+      LastBarrier = SU;
       Chunks.emplace_back();
       break;
     case InstrTag::LoadOrStore:
@@ -1182,9 +1191,47 @@ void LoopCarriedOrderDepsTracker::computeDependenciesAux() {
   for (const LoadStoreChunk &Chunk : Chunks)
     addLoopCarriedDepenenciesForChunks(Chunk, Chunk);
 
-  // TODO: If there are multiple barrier instructions, dependencies from the
-  // last barrier instruction (or load/store below it) to the first barrier
-  // instruction (or load/store above it).
+  // There is no barrier instruction between load/store instructions in the same
+  // LoadStoreChunk. If there are one or more barrier instructions, the
+  // instructions sequence is as follows:
+  //
+  //   Loads/Stores (Chunks.front())
+  //   Barrier (FirstBarrier)
+  //   Loads/Stores
+  //   Barrier
+  //   ...
+  //   Loads/Stores
+  //   Barrier (LastBarrier)
+  //   Loads/Stores (Chunks.back())
+  //
+  // Since loads/stores must not be reordered across barrier instructions, and
+  // the order of barrier instructions must be preserved, add the following
+  // loop-carried dependences:
+  //
+  //       Loads/Stores (Chunks.front()) <-----+
+  //  +--> Barrier (FirstBarrier) <---------+  |
+  //  |    Loads/Stores                     |  |
+  //  |    Barrier                          |  |
+  //  |    ...                              |  |
+  //  |    Loads/Stores                     |  |
+  //  |    Barrier (LastBarrier) -----------+--+
+  //  +--- Loads/Stores (Chunks.back())
+  //
+  if (FirstBarrier) {
+    assert(LastBarrier && "Both barriers should be set.");
+    for (const SUnitWithMemInfo &Dst : Chunks.front().Loads)
+      setLoopCarriedDep(LastBarrier, Dst.SU);
+    for (const SUnitWithMemInfo &Dst : Chunks.front().Stores)
+      setLoopCarriedDep(LastBarrier, Dst.SU);
+
+    for (const SUnitWithMemInfo &Src : Chunks.back().Loads)
+      setLoopCarriedDep(Src.SU, FirstBarrier);
+    for (const SUnitWithMemInfo &Src : Chunks.back().Stores)
+      setLoopCarriedDep(Src.SU, FirstBarrier);
+
+    if (FirstBarrier != LastBarrier)
+      setLoopCarriedDep(LastBarrier, FirstBarrier);
+  }
 }
 
 /// Add a chain edge between a load and store if the store can be an
diff --git a/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir b/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
index bcc6a3ea9b285..214f7e245030a 100644
--- a/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
+++ b/llvm/test/CodeGen/AArch64/sms-loop-carried-fp-exceptions1.mir
@@ -5,16 +5,24 @@
 # floating-point exception, and there is an instruction for barrier event. In
 # this case the order of them must not change.
 #
+# SU(2): May raise FP exception
+# SU(3): May raise FP exception
+# SU(4): Store
+# SU(5): Barrier
+# SU(7): Barrier
+#
 # FIXME: Currently the following dependencies are missed.
 #
 # Loop carried edges from SU(7)
 #   Order
 #     SU(2)
 #     SU(3)
-#     SU(4)
-#     SU(5)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
+# CHECK-NEXT: Loop carried edges from SU(7)
+# CHECK-NEXT:   Order
+# CHECK-NEXT:     SU(4)
+# CHECK-NEXT:     SU(5)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
index 4281d15377141..8fe3b3d83aa94 100644
--- a/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-order-dep6.mir
@@ -19,23 +19,33 @@
 # }
 # ```
 #
-# FIXME: Currently the following dependencies are missed.
-# Loop carried edges from SU(16)
-#   Order
-#     SU(6)
-#     SU(8)
-#     SU(10)
-#     SU(11)
-# Loop carried edges from SU(17)
-#   Order
-#     SU(10)
-#     SU(11)
-# Loop carried edges from SU(19)
-#   Order
-#     SU(10)
-#     SU(11)
+# SU(6): Load
+# SU(8): Store
+# SU(10): Store
+# SU(11): Barrier
+# SU(16): Barrier
+# SU(17): Load
+# SU(19): Load
+#
+# As the order between load/store and barrier must be preserved, the following
+# loop-carried dependnecies need to be added:
+#   - SU(16) -> SU(6), SU(8), SU(10)
+#   - SU(17), SU(19) -> SU(11)
+#   - SU(16) -> SU(11) (barrier to barrier)
 
 # CHECK:      ===== Loop Carried Edges Begin =====
+# CHECK-NEXT: Loop carried edges from SU(16)
+# CHECK-NEXT:   Order
+# CHECK-NEXT:     SU(6)
+# CHECK-NEXT:     SU(8)
+# CHECK-NEXT:     SU(10)
+# CHECK-NEXT:     SU(11)
+# CHECK-NEXT: Loop carried edges from SU(17)
+# CHECK-NEXT:   Order
+# CHECK-NEXT:     SU(11)
+# CHECK-NEXT: Loop carried edges from SU(19)
+# CHECK-NEXT:   Order
+# CHECK-NEXT:     SU(11)
 # CHECK-NEXT: ===== Loop Carried Edges End =====
 
 --- |

``````````

</details>


https://github.com/llvm/llvm-project/pull/174391


More information about the llvm-branch-commits mailing list