[PATCH] D73292: [AMDGPU] Correct NumLoads in clustering
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 12:46:03 PST 2020
rampitec updated this revision to Diff 240272.
rampitec added a comment.
Added operand description to TargetInstrInfo::shouldClusterMemOps()
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D73292/new/
https://reviews.llvm.org/D73292
Files:
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/MachineScheduler.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -457,7 +457,7 @@
if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
(isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
(isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
- const unsigned MaxGlobalLoadCluster = 6;
+ const unsigned MaxGlobalLoadCluster = 7;
if (NumLoads > MaxGlobalLoadCluster)
return false;
@@ -497,7 +497,11 @@
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
- return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
+ // FIXME: NumLoads should not be subtracted 1. This is to match behavior
+ // of clusterNeighboringMemOps which was previosly passing cluster length
+ // less 1. LoadClusterThreshold should be tuned instead.
+ return ((NumLoads - 1) * (RI.getRegSizeInBits(*DstRC) / 8)) <=
+ LoadClusterThreshold;
}
// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2422,7 +2422,7 @@
return false;
// Only cluster up to a single pair.
- if (NumLoads > 1)
+ if (NumLoads > 2)
return false;
if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
Index: llvm/lib/CodeGen/MachineScheduler.cpp
===================================================================
--- llvm/lib/CodeGen/MachineScheduler.cpp
+++ llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1584,7 +1584,7 @@
SUnit *SUb = MemOpRecords[Idx+1].SU;
if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
MemOpRecords[Idx + 1].BaseOps,
- ClusterLength)) {
+ ClusterLength + 1)) {
if (SUa->NodeNum > SUb->NodeNum)
std::swap(SUa, SUb);
if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1276,6 +1276,10 @@
/// or
/// DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
/// to TargetPassConfig::createMachineScheduler() to have an effect.
+ ///
+ /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
+ /// \p NumLoads is the number of loads that will be in the cluster if this
+ /// hook returns true.
virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2,
unsigned NumLoads) const {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D73292.240272.patch
Type: text/x-patch
Size: 3043 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200124/43e8307d/attachment-0001.bin>
More information about the llvm-commits
mailing list