[llvm-branch-commits] [llvm-branch] r227596 - R600/SI: Define a schedule model and enable the generic machine scheduler
Tom Stellard
thomas.stellard at amd.com
Fri Jan 30 10:55:30 PST 2015
Author: tstellar
Date: Fri Jan 30 12:55:30 2015
New Revision: 227596
URL: http://llvm.org/viewvc/llvm-project?rev=227596&view=rev
Log:
R600/SI: Define a schedule model and enable the generic machine scheduler
The schedule model is not complete yet, and could be improved.
This is a partial merge of r227461. The difference is that it
does not enable the machine scheduler by default.
Modified:
llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp
llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h
llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp
llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h
llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll
llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.cpp Fri Jan 30 12:55:30 2015
@@ -20,6 +20,7 @@
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/MachineScheduler.h"
using namespace llvm;
@@ -120,3 +121,21 @@ bool AMDGPUSubtarget::isVGPRSpillingEnab
const SIMachineFunctionInfo *MFI) const {
return MFI->getShaderType() == ShaderType::COMPUTE || EnableVGPRSpilling;
}
+
+void AMDGPUSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (getGeneration() >= SOUTHERN_ISLANDS) {
+
+ // Track register pressure so the scheduler can try to decrease
+ // pressure once register usage is above the threshold defined by
+ // SIRegisterInfo::getRegPressureSetLimit()
+ Policy.ShouldTrackPressure = true;
+
+ // Enabling both top down and bottom up scheduling seems to give us less
+ // register spills than just using one of these approaches on its own.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+}
Modified: llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h (original)
+++ llvm/branches/release_36/lib/Target/R600/AMDGPUSubtarget.h Fri Jan 30 12:55:30 2015
@@ -209,6 +209,10 @@ public:
return getGeneration() <= NORTHERN_ISLANDS;
}
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin, MachineInstr *end,
+ unsigned NumRegionInstrs) const override;
+
// Helper functions to simplify if statements
bool isTargetELF() const {
return false;
@@ -228,6 +232,14 @@ public:
return TargetTriple.getOS() == Triple::AMDHSA;
}
bool isVGPRSpillingEnabled(const SIMachineFunctionInfo *MFI) const;
+
+ unsigned getMaxWavesPerCU() const {
+ if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ return 10;
+
+ // FIXME: Not sure what this is for other subtagets.
+ llvm_unreachable("do not know max waves per CU for this subtarget.");
+ }
};
} // End namespace llvm
Modified: llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp (original)
+++ llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.cpp Fri Jan 30 12:55:30 2015
@@ -51,9 +51,32 @@ BitVector SIRegisterInfo::getReservedReg
return Reserved;
}
-unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- return RC->getNumRegs();
+unsigned SIRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
+
+ // FIXME: We should adjust the max number of waves based on LDS size.
+ unsigned SGPRLimit = getNumSGPRsAllowed(ST.getMaxWavesPerCU());
+ unsigned VGPRLimit = getNumVGPRsAllowed(ST.getMaxWavesPerCU());
+
+ for (regclass_iterator I = regclass_begin(), E = regclass_end();
+ I != E; ++I) {
+
+ unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+ unsigned Limit;
+
+ if (isSGPRClass(*I)) {
+ Limit = SGPRLimit / NumSubRegs;
+ } else {
+ Limit = VGPRLimit / NumSubRegs;
+ }
+
+ const int *Sets = getRegClassPressureSets(*I);
+ assert(Sets);
+ for (unsigned i = 0; Sets[i] != -1; ++i) {
+ if (Sets[i] == (int)Idx)
+ return Limit;
+ }
+ }
+ return 256;
}
bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
@@ -455,3 +478,29 @@ unsigned SIRegisterInfo::findUnusedRegis
return AMDGPU::NoRegister;
}
+unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
+ switch(WaveCount) {
+ case 10: return 24;
+ case 9: return 28;
+ case 8: return 32;
+ case 7: return 36;
+ case 6: return 40;
+ case 5: return 48;
+ case 4: return 64;
+ case 3: return 84;
+ case 2: return 128;
+ default: return 256;
+ }
+}
+
+unsigned SIRegisterInfo::getNumSGPRsAllowed(unsigned WaveCount) const {
+ switch(WaveCount) {
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return 103;
+ }
+}
Modified: llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h (original)
+++ llvm/branches/release_36/lib/Target/R600/SIRegisterInfo.h Fri Jan 30 12:55:30 2015
@@ -17,6 +17,7 @@
#define LLVM_LIB_TARGET_R600_SIREGISTERINFO_H
#include "AMDGPURegisterInfo.h"
+#include "llvm/Support/Debug.h"
namespace llvm {
@@ -26,8 +27,7 @@ struct SIRegisterInfo : public AMDGPUReg
BitVector getReservedRegs(const MachineFunction &MF) const override;
- unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const override;
+ unsigned getRegPressureSetLimit(unsigned Idx) const override;
bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
@@ -105,6 +105,14 @@ struct SIRegisterInfo : public AMDGPUReg
unsigned getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const;
+ /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
+ /// concurrent waves.
+ unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
+
+ /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
+ /// concurrent waves.
+ unsigned getNumSGPRsAllowed(unsigned WaveCount) const;
+
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
Modified: llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/ds_read2st64.ll Fri Jan 30 12:55:30 2015
@@ -65,8 +65,8 @@ define void @simple_read2st64_f32_max_of
; SI-LABEL: @simple_read2st64_f32_over_max_offset
; SI-NOT: ds_read2st64_b32
-; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
@@ -197,8 +197,8 @@ define void @simple_read2st64_f64_max_of
; SI-LABEL: @simple_read2st64_f64_over_max_offset
; SI-NOT: ds_read2st64_b64
-; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
; SI: s_endpgm
define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
Modified: llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll?rev=227596&r1=227595&r2=227596&view=diff
==============================================================================
--- llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll (original)
+++ llvm/branches/release_36/test/CodeGen/R600/si-triv-disjoint-mem-access.ll Fri Jan 30 12:55:30 2015
@@ -51,8 +51,8 @@ define void @no_reorder_local_load_volat
; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
-; CI: buffer_store_dword
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
%ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
More information about the llvm-branch-commits
mailing list