[llvm] r364099 - AMDGPU: Fix not using s33 for scratch wave offset in kernels
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 21 13:04:03 PDT 2019
Author: arsenm
Date: Fri Jun 21 13:04:02 2019
New Revision: 364099
URL: http://llvm.org/viewvc/llvm-project?rev=364099&view=rev
Log:
AMDGPU: Fix not using s33 for scratch wave offset in kernels
Fixes missing piece from r363990.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
llvm/trunk/test/CodeGen/AMDGPU/ipra.ll
llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=364099&r1=364098&r2=364099&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jun 21 13:04:02 2019
@@ -1829,11 +1829,8 @@ static void reservePrivateMemoryRegs(con
Info.setScratchRSrcReg(ReservedBufferReg);
}
- // This should be accurate for kernels even before the frame is finalized.
- const bool HasFP = ST.getFrameLowering()->hasFP(MF);
- if (HasFP) {
- unsigned ReservedOffsetReg =
- TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ // hasFP should be accurate for kernels even before the frame is finalized.
+ if (ST.getFrameLowering()->hasFP(MF)) {
MachineRegisterInfo &MRI = MF.getRegInfo();
// Try to use s32 as the SP, but move it if it would interfere with input
@@ -1860,8 +1857,15 @@ static void reservePrivateMemoryRegs(con
report_fatal_error("failed to find register for SP");
}
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- Info.setFrameOffsetReg(ReservedOffsetReg);
+ if (MFI.hasCalls()) {
+ Info.setScratchWaveOffsetReg(AMDGPU::SGPR33);
+ Info.setFrameOffsetReg(AMDGPU::SGPR33);
+ } else {
+ unsigned ReservedOffsetReg =
+ TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setFrameOffsetReg(ReservedOffsetReg);
+ }
} else if (RequiresStackAccess) {
assert(!MFI.hasCalls());
// We know there are accesses and they will be done relative to SP, so just
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll?rev=364099&r1=364098&r2=364099&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll Fri Jun 21 13:04:02 2019
@@ -104,9 +104,9 @@ define amdgpu_kernel void @test_call_voi
}
; GCN-LABEL: {{^}}test_call_void_func_void_mayclobber_s31:
-; GCN: s_mov_b32 s33, s31
+; GCN: s_mov_b32 s34, s31
; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s31, s33
+; GCN-NEXT: s_mov_b32 s31, s34
define amdgpu_kernel void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1)* %out) #0 {
%s31 = call i32 asm sideeffect "; def $0", "={s31}"()
call void @external_void_func_void()
@@ -128,15 +128,14 @@ define amdgpu_kernel void @test_call_voi
; FIXME: What is the expected behavior for reserved registers here?
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
-; GCN: s_mov_b32 s34, s9
-; GCN-NOT: s33
+; GCN: s_mov_b32 s33, s9
+; GCN: s_mov_b32 s32, s33
; GCN: #ASMSTART
; GCN-NEXT: ; def s33
; GCN-NEXT: #ASMEND
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void at rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void at rel32@hi+4
-; GCN: s_mov_b32 s32, s34
; GCN: s_swappc_b64 s[30:31], s[4:5]
; GCN: ;;#ASMSTART
; GCN-NEXT: ; use s33
Modified: llvm/trunk/test/CodeGen/AMDGPU/ipra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ipra.ll?rev=364099&r1=364098&r2=364099&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ipra.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ipra.ll Fri Jun 21 13:04:02 2019
@@ -30,7 +30,7 @@ define hidden void @func() #1 {
; GCN-NOT: writelane
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
-; GCN: ; NumSgprs: 37
+; GCN: ; NumSgprs: 38
; GCN: ; NumVgprs: 9
define amdgpu_kernel void @kernel_call() #0 {
%vgpr = load volatile i32, i32 addrspace(1)* undef
Modified: llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll?rev=364099&r1=364098&r2=364099&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/stack-realign.ll Fri Jun 21 13:04:02 2019
@@ -91,7 +91,8 @@ define void @force_realign4(i32 %idx) #1
}
; GCN-LABEL: {{^}}kernel_call_align16_from_8:
-; GCN: s_add_u32 s32, s8, 0x400{{$}}
+; GCN: s_mov_b32 s33, s7{{$}}
+; GCN-NEXT: s_add_u32 s32, s33, 0x400{{$}}
; GCN-NOT: s32
; GCN: s_swappc_b64
define amdgpu_kernel void @kernel_call_align16_from_8() #0 {
@@ -103,7 +104,8 @@ define amdgpu_kernel void @kernel_call_a
; The call sequence should keep the stack on call aligned to 4
; GCN-LABEL: {{^}}kernel_call_align16_from_5:
-; GCN: s_add_u32 s32, s8, 0x400
+; GCN: s_mov_b32 s33, s7{{$}}
+; GCN-NEXT: s_add_u32 s32, s33, 0x400
; GCN: s_swappc_b64
define amdgpu_kernel void @kernel_call_align16_from_5() {
%alloca0 = alloca i8, align 1, addrspace(5)
@@ -114,7 +116,8 @@ define amdgpu_kernel void @kernel_call_a
}
; GCN-LABEL: {{^}}kernel_call_align4_from_5:
-; GCN: s_add_u32 s32, s8, 0x400
+; GCN: s_mov_b32 s33, s7{{$}}
+; GCN: s_add_u32 s32, s33, 0x400
; GCN: s_swappc_b64
define amdgpu_kernel void @kernel_call_align4_from_5() {
%alloca0 = alloca i8, align 1, addrspace(5)
More information about the llvm-commits
mailing list