[llvm-branch-commits] [llvm-branch] r227596 - R600/SI: Define a schedule model and enable the generic machine scheduler

Tom Stellard thomas.stellard at amd.com
Fri Jan 30 10:55:30 PST 2015


Author: tstellar
Date: Fri Jan 30 12:55:30 2015
New Revision: 227596

URL: http://llvm.org/viewvc/llvm-project?rev=227596&view=rev
Log:
R600/SI: Define a schedule model and enable the generic machine scheduler

The schedule model is not complete yet, and could be improved.

This is a partial merge of r227461.  The difference is that it
does not enable the machine scheduler by default.

Modified:
    llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp
    llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h
    llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp
    llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h
    llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll
    llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll

Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp Fri Jan 30 12:55:30 2015
@@ -20,6 +20,7 @@
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 
 using namespace llvm;
 
@@ -120,3 +121,21 @@ bool AMDGPUSubtarget::isVGPRSpillingEnab
                                        const SIMachineFunctionInfo *MFI) const {
   return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
 }
+
+void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+                                          MachineInstr *begin,
+                                          MachineInstr *end,
+                                          unsigned NumRegionInstrs) const {
+  if (getGeneration() >= SOUTHERN_ISLANDS) {
+
+    // Track register pressure so the scheduler can try to decrease
+    // pressure once register usage is above the threshold defined by
+    // SIRegisterInfo::getRegPressureSetLimit()
+    Policy.ShouldTrackPressure = true;
+
+    // Enabling both top down and bottom up scheduling seems to give us less
+    // register spills than just using one of these approaches on its own.
+    Policy.OnlyTopDown = false;
+    Policy.OnlyBottomUp = false;
+  }
+}

Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h Fri Jan 30 12:55:30 2015
@@ -209,6 +209,10 @@ public:
     return getGeneration() <= NORTHERN_ISLANDS;
   }
 
+  void overrideSchedPolicy(MachineSchedPolicy &Policy,
+                           MachineInstr *begin, MachineInstr *end,
+                           unsigned NumRegionInstrs) const override;
+
   // Helper functions to simplify if statements
   bool isTargetELF() const {
     return false;
@@ -228,6 +232,14 @@ public:
     return TargetTriple.getOS() == Triple::AMDHSA;
   }
   bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+
+  unsigned getMaxWavesPerCU() const {
+    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 10;
+
+    // FIXME: Not sure what this is for other subtagets.
+    llvm_unreachable("do not know max waves per CU for this subtarget.");
+  }
 };
 
 } // End namespace llvm

Modified: llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp Fri Jan 30 12:55:30 2015
@@ -51,9 +51,32 @@ BitVector SIRegisterInfo::getReservedReg
   return Reserved;
 }
 
-unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
-                                             MachineFunction &MF) const {
-  return RC->getNumRegs();
+unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+
+  // FIXME: We should adjust the max number of waves based on LDS size.
+  unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
+  unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+
+  for (regclass_iterator I = regclass_begin(), E = regclass_end();
+       I != E; ++I) {
+
+    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+    unsigned Limit;
+
+    if (isSGPRClass(*I)) {
+      Limit = SGPRLimit / NumSubRegs;
+    } else {
+      Limit = VGPRLimit / NumSubRegs;
+    }
+
+    const int *Sets = getRegClassPressureSets(*I);
+    assert(Sets);
+    for (unsigned i = 0; Sets[i] != -1; ++i) {
+	    if (Sets[i] == (int)Idx)
+        return Limit;
+    }
+  }
+  return 256;
 }
 
 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
@@ -455,3 +478,29 @@ unsigned SIRegisterInfo::findUnusedRegis
   return AMDGPU::NoRegister;
 }
 
+unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
+  switch(WaveCount) {
+    case 10: return 24;
+    case 9:  return 28;
+    case 8:  return 32;
+    case 7:  return 36;
+    case 6:  return 40;
+    case 5:  return 48;
+    case 4:  return 64;
+    case 3:  return 84;
+    case 2:  return 128;
+    default: return 256;
+  }
+}
+
+unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
+  switch(WaveCount) {
+    case 10: return 48;
+    case 9:  return 56;
+    case 8:  return 64;
+    case 7:  return 72;
+    case 6:  return 80;
+    case 5:  return 96;
+    default: return 103;
+  }
+}

Modified: llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h (original)
+++ llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h Fri Jan 30 12:55:30 2015
@@ -17,6 +17,7 @@
 #define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
 
 #include "AMDGPURegisterInfo.h"
+#include "llvm/Support/Debug.h"
 
 namespace llvm {
 
@@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPUReg
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
-                               MachineFunction &MF) const override;
+  unsigned getRegPressureSetLimit(unsigned Idx) const override;
 
   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
 
@@ -105,6 +105,14 @@ struct SIRegisterInfo : public AMDGPUReg
   unsigned getPreloadedValue(const MachineFunction &MF,
                              enum PreloadedValue Value) const;
 
+  /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
+  ///        concurrent waves.
+  unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
+
+  /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
+  ///        concurrent waves.
+  unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+
   unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
                               const TargetRegisterClass *RC) const;
 

Modified: llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll Fri Jan 30 12:55:30 2015
@@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_of
 
 ; SI-LABEL: @simple_read2st64_f32_over_max_offset
 ; SI-NOT: ds_read2st64_b32
-; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
 ; SI: s_endpgm
 define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
@@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_of
 
 ; SI-LABEL: @simple_read2st64_f64_over_max_offset
 ; SI-NOT: ds_read2st64_b64
-; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
 ; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
 ; SI: s_endpgm
 define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {

Modified: llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll Fri Jan 30 12:55:30 2015
@@ -51,8 +51,8 @@ define void @no_reorder_local_load_volat
 
 ; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI: buffer_store_dword
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
 define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
   %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 





More information about the llvm-branch-commits mailing list