[llvm] [RISCV] Change heuristic used for load clustering (PR #75341)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 13 05:56:19 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Alex Bradbury (asb)
<details>
<summary>Changes</summary>
Split out from #<!-- -->73789, so as to leave that PR just for flipping load clustering to on by default. Clusters if the operations are within a cache line of each other (as AMDGPU does in shouldScheduleLoadsNear). X86 does something similar, but does `((Offset2 - Offset1) / 8 > 64)`. I'm not sure if that's intentionally set to 512 bytes or if the division is in error.
Adopts the suggestion from @<!-- -->wangpc-pp to query the cache line size and
use it if available.
---
Stacks on top of #<!-- -->75338 (though could be rebased).
---
Full diff: https://github.com/llvm/llvm-project/pull/75341.diff
5 Files Affected:
- (modified) llvm/include/llvm/CodeGen/MachineScheduler.h (+8-2)
- (modified) llvm/lib/CodeGen/MachineScheduler.cpp (+20-11)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfo.cpp (+7-3)
- (modified) llvm/lib/Target/RISCV/RISCVTargetMachine.cpp (+1-1)
- (modified) llvm/test/CodeGen/RISCV/misched-load-clustering.ll (+3-3)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 9f16cf5d5bc387..47127a6c29603b 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1347,13 +1347,19 @@ ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
+/// If ReorderWhileClustering is set to true, no attempt will be made to
+/// reduce reordering due to store clustering.
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI);
+ const TargetRegisterInfo *TRI,
+ bool ReorderWhileClustering = false);
+/// If ReorderWhileClustering is set to true, no attempt will be made to
+/// reduce reordering due to store clustering.
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI);
+ const TargetRegisterInfo *TRI,
+ bool ReorderWhileClustering = false);
std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 886137d86f87de..554776783eff6f 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1743,11 +1743,14 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
bool IsLoad;
+ bool ReorderWhileClustering;
public:
BaseMemOpClusterMutation(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri, bool IsLoad)
- : TII(tii), TRI(tri), IsLoad(IsLoad) {}
+ const TargetRegisterInfo *tri, bool IsLoad,
+ bool ReorderWhileClustering)
+ : TII(tii), TRI(tri), IsLoad(IsLoad),
+ ReorderWhileClustering(ReorderWhileClustering) {}
void apply(ScheduleDAGInstrs *DAGInstrs) override;
@@ -1763,14 +1766,16 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
class StoreClusterMutation : public BaseMemOpClusterMutation {
public:
StoreClusterMutation(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : BaseMemOpClusterMutation(tii, tri, false) {}
+ const TargetRegisterInfo *tri,
+ bool ReorderWhileClustering)
+ : BaseMemOpClusterMutation(tii, tri, false, ReorderWhileClustering) {}
};
class LoadClusterMutation : public BaseMemOpClusterMutation {
public:
- LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
- : BaseMemOpClusterMutation(tii, tri, true) {}
+ LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri,
+ bool ReorderWhileClustering)
+ : BaseMemOpClusterMutation(tii, tri, true, ReorderWhileClustering) {}
};
} // end anonymous namespace
@@ -1779,15 +1784,19 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
+ const TargetRegisterInfo *TRI,
+ bool ReorderWhileClustering) {
+ return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(
+ TII, TRI, ReorderWhileClustering)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
+ const TargetRegisterInfo *TRI,
+ bool ReorderWhileClustering) {
+ return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(
+ TII, TRI, ReorderWhileClustering)
: nullptr;
}
@@ -1840,7 +1849,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
SUnit *SUa = MemOpa.SU;
SUnit *SUb = MemOpb.SU;
- if (SUa->NodeNum > SUb->NodeNum)
+ if (!ReorderWhileClustering && SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);
// FIXME: Is this check really required?
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 1dcff7eb563e20..b8960fd9571c9b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2282,9 +2282,13 @@ bool RISCVInstrInfo::shouldClusterMemOps(
return false;
}
- // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
- // indicate they likely share a cache line.
- return ClusterSize <= 4;
+ unsigned CacheLineSize =
+ BaseOps1.front()->getParent()->getMF()->getSubtarget().getCacheLineSize();
+ // Assume a cache line size of 64 bytes if no size is set in RISCVSubtarget.
+ CacheLineSize = CacheLineSize ? CacheLineSize : 64;
+ // Cluster if the memory operations are on the same or a neighbouring cache
+ // line.
+ return std::abs(Offset1 - Offset2) < CacheLineSize;
}
// Set BaseReg (the base register operand), Offset (the byte offset being
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 3abdb6003659fa..ae10a72e889239 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -348,7 +348,7 @@ class RISCVPassConfig : public TargetPassConfig {
ScheduleDAGMILive *DAG = nullptr;
if (EnableMISchedLoadClustering) {
DAG = createGenericSchedLive(C);
- DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI, true));
}
if (ST.hasMacroFusion()) {
DAG = DAG ? DAG : createGenericSchedLive(C);
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 4eb969a357a9ee..d046aaf98f5edd 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -23,10 +23,10 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; LDCLUSTER: ********** MI Scheduling **********
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
; LDCLUSTER: *** Final schedule for %bb.0 ***
-; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
-; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
-; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
%val0 = load i32, i32* %arrayidx0
``````````
</details>
https://github.com/llvm/llvm-project/pull/75341
More information about the llvm-commits
mailing list