[llvm] r266336 - AMDGPU/SI: Use the correct scratch wave offset register for shaders.
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 14 09:27:04 PDT 2016
Author: tstellar
Date: Thu Apr 14 11:27:03 2016
New Revision: 266336
URL: http://llvm.org/viewvc/llvm-project?rev=266336&view=rev
Log:
AMDGPU/SI: Use the correct scratch wave offset register for shaders.
Summary:
The code previously always used s1 as it was using the user + system SGPR
information for compute kernels. This is incorrect for Mesa shaders though,
The register should be the next SGPR after all user and system SGPR's.
We use that Mesa adds arguments for all input and system SGPR's and
take the next available SGPR for the scratch wave offset register.
Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Reviewers: mareko, arsenm, nhaehnle, tstellarAMD
Subscribers: qcolombet, arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D18941
Patch By: Bas Nieuwenhuizen
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=266336&r1=266335&r2=266336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Apr 14 11:27:03 2016
@@ -37,6 +37,16 @@
using namespace llvm;
+static unsigned findFirstFreeSGPR(CCState &CCInfo) {
+ unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+ for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
+ if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
+ return AMDGPU::SGPR0 + Reg;
+ }
+ }
+ llvm_unreachable("Cannot allocate sgpr");
+}
+
SITargetLowering::SITargetLowering(TargetMachine &TM,
const AMDGPUSubtarget &STI)
: AMDGPUTargetLowering(TM, STI) {
@@ -712,6 +722,15 @@ SDValue SITargetLowering::LowerFormalArg
if (!AMDGPU::isShader(CallConv)) {
getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
Splits);
+
+ assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
+ } else {
+ assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+ !Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
+ !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
}
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
@@ -834,8 +853,7 @@ SDValue SITargetLowering::LowerFormalArg
unsigned Reg = Info->addWorkGroupIDX();
MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
CCInfo.AllocateReg(Reg);
- } else
- llvm_unreachable("work group id x is always enabled");
+ }
if (Info->hasWorkGroupIDY()) {
unsigned Reg = Info->addWorkGroupIDY();
@@ -857,8 +875,13 @@ SDValue SITargetLowering::LowerFormalArg
if (Info->hasPrivateSegmentWaveByteOffset()) {
// Scratch wave offset passed in system SGPR.
- unsigned PrivateSegmentWaveByteOffsetReg
- = Info->addPrivateSegmentWaveByteOffset();
+ unsigned PrivateSegmentWaveByteOffsetReg;
+
+ if (AMDGPU::isShader(CallConv)) {
+ PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
+ Info->setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
+ } else
+ PrivateSegmentWaveByteOffsetReg = Info->addPrivateSegmentWaveByteOffset();
MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
@@ -923,8 +946,7 @@ SDValue SITargetLowering::LowerFormalArg
unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
CCInfo.AllocateReg(Reg);
- } else
- llvm_unreachable("workitem id x should always be enabled");
+ }
if (Info->hasWorkItemIDY()) {
unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=266336&r1=266335&r2=266336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Thu Apr 14 11:27:03 2016
@@ -65,12 +65,12 @@ SIMachineFunctionInfo::SIMachineFunction
GridWorkgroupCountX(false),
GridWorkgroupCountY(false),
GridWorkgroupCountZ(false),
- WorkGroupIDX(true),
+ WorkGroupIDX(false),
WorkGroupIDY(false),
WorkGroupIDZ(false),
WorkGroupInfo(false),
PrivateSegmentWaveByteOffset(false),
- WorkItemIDX(true),
+ WorkItemIDX(false),
WorkItemIDY(false),
WorkItemIDZ(false) {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
@@ -80,8 +80,11 @@ SIMachineFunctionInfo::SIMachineFunction
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- if (!AMDGPU::isShader(F->getCallingConv()))
+ if (!AMDGPU::isShader(F->getCallingConv())) {
KernargSegmentPtr = true;
+ WorkGroupIDX = true;
+ WorkItemIDX = true;
+ }
if (F->hasFnAttribute("amdgpu-work-group-id-y"))
WorkGroupIDY = true;
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=266336&r1=266335&r2=266336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Thu Apr 14 11:27:03 2016
@@ -164,6 +164,10 @@ public:
return PrivateSegmentWaveByteOffsetSystemSGPR;
}
+ void setPrivateSegmentWaveByteOffset(unsigned Reg) {
+ PrivateSegmentWaveByteOffsetSystemSGPR = Reg;
+ }
+
bool hasPrivateSegmentBuffer() const {
return PrivateSegmentBuffer;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll?rev=266336&r1=266335&r2=266336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/large-alloca-graphics.ll Thu Apr 14 11:27:03 2016
@@ -8,8 +8,8 @@
; CI: s_mov_b32 s11, 0x98f000
; VI: s_mov_b32 s11, 0x980000
-; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
@@ -29,8 +29,8 @@ define amdgpu_ps void @large_alloca_pixe
; CI: s_mov_b32 s11, 0x98f000
; VI: s_mov_b32 s11, 0x980000
-; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
-; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
; ALL: ; ScratchSize: 32772
define amdgpu_ps void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #0 {
Modified: llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=266336&r1=266335&r2=266336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Thu Apr 14 11:27:03 2016
@@ -11,6 +11,7 @@
; GCN-LABEL: {{^}}main:
+; GCN: s_mov_b32 s11, s12
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s14, -1
More information about the llvm-commits
mailing list