[llvm] [MachinePipeliner] Fix store-store dependences (#72508) (PR #72575)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 27 19:46:32 PST 2023


https://github.com/bcahoon updated https://github.com/llvm/llvm-project/pull/72575

>From 23f5f59c284f5dac9cafe52e5cec0f41c36251f9 Mon Sep 17 00:00:00 2001
From: Brendon Cahoon <brendon.cahoon at amd.com>
Date: Thu, 16 Nov 2023 15:32:10 -0600
Subject: [PATCH 1/2] [MachinePipeliner] Fix store-store dependences (#72508)

The pipeliner needs to mark store-store order dependences as
loop carried dependences. Otherwise, the stores may be scheduled
further apart than the MII. The order dependences implies that
the first instance of the dependent store is scheduled before the
second instance of the source store instruction.
---
 llvm/lib/CodeGen/MachinePipeliner.cpp         |  8 +-
 .../CodeGen/PowerPC/sms-store-dependence.ll   | 84 +++++++++++++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/sms-store-dependence.ll

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 81b7fdcc5961d68..ea96aae500992d9 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -2251,7 +2251,13 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
       SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
     return true;
 
-  // Only chain dependences between a load and store can be loop carried.
+  // Dependences between stores are loop carried to ensure that the dependent
+  // store is not scheduled after the source store on the next iteration.
+  if (Dep.isNormalMemory() && DI->mayStore() && SI->mayStore())
+    return true;
+
+  // The rest of this function handles chain dependences between a load and
+  // store can be loop carried.
   if (!DI->mayStore() || !SI->mayLoad())
     return false;
 
diff --git a/llvm/test/CodeGen/PowerPC/sms-store-dependence.ll b/llvm/test/CodeGen/PowerPC/sms-store-dependence.ll
new file mode 100644
index 000000000000000..d1ec320d55680f9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sms-store-dependence.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
+; RUN:       -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s
+
+; Test that the pipeliner schedules the store instructions correctly. Since
+; there is a dependence between the store, they cannot be scheduled further than
+; MII cycles/instructions apart. That is, the first store cannot occur multiple
+; times before the second ctore in the schedule.
+define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr {
+; CHECK-LABEL: comp_method:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    extsw 7, 8
+; CHECK-NEXT:    extsw 8, 9
+; CHECK-NEXT:    clrldi 9, 6, 32
+; CHECK-NEXT:    addi 6, 3, -1
+; CHECK-NEXT:    mtctr 9
+; CHECK-NEXT:    li 11, 0
+; CHECK-NEXT:    sradi 12, 11, 2
+; CHECK-NEXT:    add 5, 5, 8
+; CHECK-NEXT:    li 8, 2
+; CHECK-NEXT:    li 3, 8
+; CHECK-NEXT:    addi 11, 7, 0
+; CHECK-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lbzu 9, 1(6)
+; CHECK-NEXT:    add 12, 12, 10
+; CHECK-NEXT:    extsb 9, 9
+; CHECK-NEXT:    stbx 8, 4, 9
+; CHECK-NEXT:    add 9, 9, 12
+; CHECK-NEXT:    bdz .LBB0_2
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    lbzu 0, 1(6)
+; CHECK-NEXT:    sradi 12, 11, 2
+; CHECK-NEXT:    add 11, 11, 7
+; CHECK-NEXT:    add 12, 12, 10
+; CHECK-NEXT:    sldi 30, 9, 2
+; CHECK-NEXT:    add 9, 9, 30
+; CHECK-NEXT:    extsb 0, 0
+; CHECK-NEXT:    stbx 3, 5, 9
+; CHECK-NEXT:    add 9, 0, 12
+; CHECK-NEXT:    stbx 8, 4, 0
+; CHECK-NEXT:    bdnz .LBB0_1
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    sldi 4, 9, 2
+; CHECK-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-NEXT:    add 4, 9, 4
+; CHECK-NEXT:    stbx 3, 5, 4
+; CHECK-NEXT:    blr
+  %8 = icmp sgt i32 %3, 64
+  tail call void @llvm.assume(i1 %8)
+  %9 = and i32 %3, 1
+  %10 = icmp eq i32 %9, 0
+  tail call void @llvm.assume(i1 %10)
+  %11 = sext i32 %5 to i64
+  %12 = sext i32 %6 to i64
+  %13 = zext nneg i32 %3 to i64
+  %14 = getelementptr i8, ptr %2, i64 %12
+  br label %16
+
+15:
+  ret void
+
+16:
+  %17 = phi i64 [ 0, %7 ], [ %24, %16 ]
+  %18 = getelementptr inbounds i8, ptr %0, i64 %17
+  %19 = load i8, ptr %18, align 1
+  %20 = sext i8 %19 to i64
+  %21 = getelementptr inbounds i8, ptr %1, i64 %20
+  store i8 2, ptr %21, align 1
+  %22 = mul nsw i64 %17, %11
+  %a1 = ashr i64 %22, 2
+  %a2 = add i64 %a1, %v1
+  %a3 = add i64 %20, %a2
+  %a4 = mul nsw i64 %a3, 5
+  %23 = getelementptr i8, ptr %14, i64 %a4
+  store i8 8, ptr %23, align 1
+  %24 = add nuw nsw i64 %17, 1
+  %25 = icmp eq i64 %24, %13
+  br i1 %25, label %15, label %16
+}
+
+declare void @llvm.assume(i1 noundef) #1
+
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }

>From 3b821d2226f5477af453683e9a70a165e952ccf0 Mon Sep 17 00:00:00 2001
From: Brendon Cahoon <brendon.cahoon at amd.com>
Date: Mon, 27 Nov 2023 21:40:51 -0600
Subject: [PATCH 2/2] Change isLoopCarriedDep to handle dependences with stores

Previously we handled the case when there is a store-load loop
independence dependence. This change handles all memory-memory
dependences. Unless it can be proven otherwise, the function assumes
that the dependence is loop carried.
---
 llvm/lib/CodeGen/MachinePipeliner.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index ea96aae500992d9..8cd7f4ebe88d96a 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -2225,7 +2225,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
 }
 
 /// Return true for an order or output dependence that is loop carried
-/// potentially. A dependence is loop carried if the destination defines a valu
+/// potentially. A dependence is loop carried if the destination defines a value
 /// that may be used or defined by the source in a subsequent iteration.
 bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
                                          bool isSucc) {
@@ -2251,16 +2251,12 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
       SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
     return true;
 
-  // Dependences between stores are loop carried to ensure that the dependent
-  // store is not scheduled after the source store on the next iteration.
-  if (Dep.isNormalMemory() && DI->mayStore() && SI->mayStore())
-    return true;
-
-  // The rest of this function handles chain dependences between a load and
-  // store can be loop carried.
-  if (!DI->mayStore() || !SI->mayLoad())
+  if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())
     return false;
 
+  // The conservative assumption is that a dependence between memory operations
+  // may be loop carried. The following code checks when it can be proved that
+  // there is no loop carried dependence.
   unsigned DeltaS, DeltaD;
   if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
     return true;



More information about the llvm-commits mailing list