[PATCH] D84139: [Scheduling] Improve group algorithm for store cluster
Qing Shan Zhang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 23 03:04:58 PDT 2020
steven.zhang updated this revision to Diff 280055.
steven.zhang added a comment.
Address comments.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D84139/new/
https://reviews.llvm.org/D84139
Files:
llvm/lib/CodeGen/MachineScheduler.cpp
llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
Index: llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
===================================================================
--- llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
+++ llvm/test/CodeGen/AArch64/aarch64-stp-cluster.ll
@@ -147,3 +147,50 @@
ret i64 %v
}
+; CHECK: ********** MI Scheduling **********
+; CHECK-LABEL: stp_i64_with_ld:%bb.0
+; CHECK:Cluster ld/st SU(5) - SU(10)
+; CHECK:Cluster ld/st SU(15) - SU(20)
+; CHECK:SU(5): STRXui %7:gpr64, %0:gpr64common, 0 ::
+; CHECK:SU(10): STRXui %12:gpr64, %0:gpr64common, 1 ::
+; CHECK:SU(15): STRXui %17:gpr64, %0:gpr64common, 2 ::
+; CHECK:SU(20): STRXui %22:gpr64, %0:gpr64common, 3 ::
+define void @stp_i64_with_ld(i64* noalias nocapture %a, i64* noalias nocapture readnone %b, i64* noalias nocapture readnone %c) {
+entry:
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 8
+ %0 = load i64, i64* %arrayidx, align 8
+ %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 16
+ %1 = load i64, i64* %arrayidx3, align 8
+ %mul = mul nsw i64 %1, %0
+ %2 = load i64, i64* %a, align 8
+ %add6 = add nsw i64 %2, %mul
+ store i64 %add6, i64* %a, align 8
+ %arrayidx.1 = getelementptr inbounds i64, i64* %a, i64 9
+ %3 = load i64, i64* %arrayidx.1, align 8
+ %arrayidx3.1 = getelementptr inbounds i64, i64* %a, i64 17
+ %4 = load i64, i64* %arrayidx3.1, align 8
+ %mul.1 = mul nsw i64 %4, %3
+ %arrayidx5.1 = getelementptr inbounds i64, i64* %a, i64 1
+ %5 = load i64, i64* %arrayidx5.1, align 8
+ %add6.1 = add nsw i64 %5, %mul.1
+ store i64 %add6.1, i64* %arrayidx5.1, align 8
+ %arrayidx.2 = getelementptr inbounds i64, i64* %a, i64 10
+ %6 = load i64, i64* %arrayidx.2, align 8
+ %arrayidx3.2 = getelementptr inbounds i64, i64* %a, i64 18
+ %7 = load i64, i64* %arrayidx3.2, align 8
+ %mul.2 = mul nsw i64 %7, %6
+ %arrayidx5.2 = getelementptr inbounds i64, i64* %a, i64 2
+ %8 = load i64, i64* %arrayidx5.2, align 8
+ %add6.2 = add nsw i64 %8, %mul.2
+ store i64 %add6.2, i64* %arrayidx5.2, align 8
+ %arrayidx.3 = getelementptr inbounds i64, i64* %a, i64 11
+ %9 = load i64, i64* %arrayidx.3, align 8
+ %arrayidx3.3 = getelementptr inbounds i64, i64* %a, i64 19
+ %10 = load i64, i64* %arrayidx3.3, align 8
+ %mul.3 = mul nsw i64 %10, %9
+ %arrayidx5.3 = getelementptr inbounds i64, i64* %a, i64 3
+ %11 = load i64, i64* %arrayidx5.3, align 8
+ %add6.3 = add nsw i64 %11, %mul.3
+ store i64 %add6.3, i64* %arrayidx5.3, align 8
+ ret void
+}
Index: llvm/lib/CodeGen/MachineScheduler.cpp
===================================================================
--- llvm/lib/CodeGen/MachineScheduler.cpp
+++ llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1653,7 +1653,13 @@
unsigned ChainPredID = DAG->SUnits.size();
for (const SDep &Pred : SU.Preds) {
- if (Pred.isCtrl() && !Pred.isArtificial()) {
+ // We only want to cluster the mem ops that have the same ctrl(non-data)
+ // pred so that they didn't have ctrl dependency for each other. But for
+ // store instrs, we can still cluster them if the pred is load instr.
+ if ((Pred.isCtrl() &&
+ (IsLoad ||
+ (Pred.getSUnit() && Pred.getSUnit()->getInstr()->mayStore()))) &&
+ !Pred.isArtificial()) {
ChainPredID = Pred.getSUnit()->NodeNum;
break;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D84139.280055.patch
Type: text/x-patch
Size: 3296 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200723/8c2e0144/attachment.bin>
More information about the llvm-commits
mailing list