[llvm] a6e9f52 - [Scheduling] Improve group algorithm for store cluster

QingShan Zhang via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 26 19:05:38 PDT 2020


Author: QingShan Zhang
Date: 2020-07-27T02:02:40Z
New Revision: a6e9f5264c855dfa5dda3c5940ffdaaf22d7e693

URL: https://github.com/llvm/llvm-project/commit/a6e9f5264c855dfa5dda3c5940ffdaaf22d7e693
DIFF: https://github.com/llvm/llvm-project/commit/a6e9f5264c855dfa5dda3c5940ffdaaf22d7e693.diff

LOG: [Scheduling] Improve group algorithm for store cluster

Store Addr and Store Addr+8 are clusterable pair. They have memory(ctrl) dependency on different loads.
Current implementation will put these two stores into different group and miss to cluster them.

Reviewed By: evandro

Differential Revision: https://reviews.llvm.org/D84139

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineScheduler.cpp
    llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index d9d0a783f8a2..cec7a0c031eb 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1653,7 +1653,13 @@ void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
 
     unsigned ChainPredID = DAG->SUnits.size();
     for (const SDep &Pred : SU.Preds) {
-      if (Pred.isCtrl() && !Pred.isArtificial()) {
+      // We only want to cluster the mem ops that have the same ctrl(non-data)
+      // pred so that they didn't have ctrl dependency for each other. But for
+      // store instrs, we can still cluster them if the pred is load instr.
+      if ((Pred.isCtrl() &&
+           (IsLoad ||
+            (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
+          !Pred.isArtificial()) {
         ChainPredID = Pred.getSUnit()->NodeNum;
         break;
       }

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
index 5f75b4ef944b..bfe7e4941da8 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
@@ -147,3 +147,50 @@ entry:
   ret i64 %v
 }
 
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i64_with_ld:%bb.0
+; CHECK:Cluster ld/st SU(5) - SU(10)
+; CHECK:Cluster ld/st SU(15) - SU(20)
+; CHECK:SU(5):   STRXui %7:gpr64, %0:gpr64common, 0 ::
+; CHECK:SU(10):   STRXui %12:gpr64, %0:gpr64common, 1 ::
+; CHECK:SU(15):   STRXui %17:gpr64, %0:gpr64common, 2 ::
+; CHECK:SU(20):   STRXui %22:gpr64, %0:gpr64common, 3 ::
+define void @stp_i64_with_ld(i64* noalias nocapture %a, i64* noalias nocapture readnone %b, i64* noalias nocapture readnone %c) {
+entry:
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 8
+  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 16
+  %1 = load i64, i64* %arrayidx3, align 8
+  %mul = mul nsw i64 %1, %0
+  %2 = load i64, i64* %a, align 8
+  %add6 = add nsw i64 %2, %mul
+  store i64 %add6, i64* %a, align 8
+  %arrayidx.1 = getelementptr inbounds i64, i64* %a, i64 9
+  %3 = load i64, i64* %arrayidx.1, align 8
+  %arrayidx3.1 = getelementptr inbounds i64, i64* %a, i64 17
+  %4 = load i64, i64* %arrayidx3.1, align 8
+  %mul.1 = mul nsw i64 %4, %3
+  %arrayidx5.1 = getelementptr inbounds i64, i64* %a, i64 1
+  %5 = load i64, i64* %arrayidx5.1, align 8
+  %add6.1 = add nsw i64 %5, %mul.1
+  store i64 %add6.1, i64* %arrayidx5.1, align 8
+  %arrayidx.2 = getelementptr inbounds i64, i64* %a, i64 10
+  %6 = load i64, i64* %arrayidx.2, align 8
+  %arrayidx3.2 = getelementptr inbounds i64, i64* %a, i64 18
+  %7 = load i64, i64* %arrayidx3.2, align 8
+  %mul.2 = mul nsw i64 %7, %6
+  %arrayidx5.2 = getelementptr inbounds i64, i64* %a, i64 2
+  %8 = load i64, i64* %arrayidx5.2, align 8
+  %add6.2 = add nsw i64 %8, %mul.2
+  store i64 %add6.2, i64* %arrayidx5.2, align 8
+  %arrayidx.3 = getelementptr inbounds i64, i64* %a, i64 11
+  %9 = load i64, i64* %arrayidx.3, align 8
+  %arrayidx3.3 = getelementptr inbounds i64, i64* %a, i64 19
+  %10 = load i64, i64* %arrayidx3.3, align 8
+  %mul.3 = mul nsw i64 %10, %9
+  %arrayidx5.3 = getelementptr inbounds i64, i64* %a, i64 3
+  %11 = load i64, i64* %arrayidx5.3, align 8
+  %add6.3 = add nsw i64 %11, %mul.3
+  store i64 %add6.3, i64* %arrayidx5.3, align 8
+  ret void
+}


        


More information about the llvm-commits mailing list