[PATCH] D73292: [AMDGPU] Correct NumLoads in clustering

Fri Jan 24 12:46:16 PST 2020

This revision was automatically updated to reflect the committed changes.
Closed by commit rGbe8e38cbd978: Correct NumLoads in clustering (authored by rampitec).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D73292/new/

https://reviews.llvm.org/D73292

Files:
  llvm/include/llvm/CodeGen/TargetInstrInfo.h
  llvm/lib/CodeGen/MachineScheduler.cpp
  llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
  llvm/lib/Target/AMDGPU/SIInstrInfo.cpp


Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
===================================================================

--- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -457,7 +457,7 @@
   if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
       (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
       (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
-    const unsigned MaxGlobalLoadCluster = 6;
+    const unsigned MaxGlobalLoadCluster = 7;
     if (NumLoads > MaxGlobalLoadCluster)
       return false;
 
@@ -497,7 +497,11 @@
                                          ? MRI.getRegClass(Reg)
                                          : RI.getPhysRegClass(Reg);
 
-  return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
+  // FIXME: NumLoads should not be subtracted 1. This is to match behavior
+  // of clusterNeighboringMemOps which was previosly passing cluster length
+  // less 1. LoadClusterThreshold should be tuned instead.
+  return ((NumLoads - 1) * (RI.getRegSizeInBits(*DstRC) / 8)) <=
+         LoadClusterThreshold;
 }
 
 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2422,7 +2422,7 @@
     return false;
 
   // Only cluster up to a single pair.
-  if (NumLoads > 1)
+  if (NumLoads > 2)
     return false;
 
   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
Index: llvm/lib/CodeGen/MachineScheduler.cpp
===================================================================
--- llvm/lib/CodeGen/MachineScheduler.cpp
+++ llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1584,7 +1584,7 @@
     SUnit *SUb = MemOpRecords[Idx+1].SU;
     if (TII->shouldClusterMemOps(MemOpRecords[Idx].BaseOps,
                                  MemOpRecords[Idx + 1].BaseOps,
-                                 ClusterLength)) {
+                                 ClusterLength + 1)) {
       if (SUa->NodeNum > SUb->NodeNum)
         std::swap(SUa, SUb);
       if (DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1276,6 +1276,10 @@
   /// or
   ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
   /// to TargetPassConfig::createMachineScheduler() to have an effect.
+  ///
+  /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
+  /// \p NumLoads is the number of loads that will be in the cluster if this
+  /// hook returns true.
   virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                                    ArrayRef<const MachineOperand *> BaseOps2,
                                    unsigned NumLoads) const {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D73292.240274.patch
Type: text/x-patch
Size: 3043 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200124/76ba4d0e/attachment-0001.bin>