[llvm] r313274 - AMDGPU: Make frame register caller preserved
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 10:14:57 PDT 2017
Author: arsenm
Date: Thu Sep 14 10:14:57 2017
New Revision: 313274
URL: http://llvm.org/viewvc/llvm-project?rev=313274&view=rev
Log:
AMDGPU: Make frame register caller preserved
Using SplitCSR for the frame register was very broken. Often
the copies in the prolog and epilog were optimized out, in addition
to them being inserted after the true prolog where the FP
was clobbered.
I have a hacky solution which works that continues to use
split CSR, but for now this is simpler and will get to working
programs.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
llvm/trunk/test/CodeGen/AMDGPU/ipra.ll
llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp Thu Sep 14 10:14:57 2017
@@ -59,16 +59,7 @@ const MCPhysReg *SIRegisterInfo::getCall
const MCPhysReg *
SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
- // FIXME
- static MCPhysReg Regs[2];
-
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- assert(!MFI->isEntryFunction());
-
- Regs[0] = MFI->getFrameOffsetReg();
- Regs[1] = AMDGPU::NoRegister;
-
- return Regs;
+ return nullptr;
}
const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Sep 14 10:14:57 2017
@@ -2148,6 +2148,8 @@ SDValue SITargetLowering::LowerCall(Call
MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SDValue CallerSavedFP;
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall) {
@@ -2164,6 +2166,13 @@ SDValue SITargetLowering::LowerCall(Call
SDValue ScratchWaveOffsetReg
= DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
+
+ if (!Info->isEntryFunction()) {
+ // Avoid clobbering this function's FP value. In the current convention
+ // callee will overwrite this, so do save/restore around the call site.
+ CallerSavedFP = DAG.getCopyFromReg(Chain, DL,
+ Info->getFrameOffsetReg(), MVT::i32);
+ }
}
// Stack pointer relative accesses are done by changing the offset SGPR. This
@@ -2344,6 +2353,12 @@ SDValue SITargetLowering::LowerCall(Call
Chain = Call.getValue(0);
InFlag = Call.getValue(1);
+ if (CallerSavedFP) {
+ SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32);
+ Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
uint64_t CalleePopBytes = 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32),
DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll Thu Sep 14 10:14:57 2017
@@ -32,11 +32,13 @@ define amdgpu_kernel void @test_kernel_c
; GCN: v_writelane_b32 v32, s37, 4
; GCN: s_mov_b32 s33, s5
-; GCN: s_swappc_b64
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN: v_readlane_b32 s37, v32, 4
; GCN: v_readlane_b32 s36, v32, 3
; GCN: v_readlane_b32 s35, v32, 2
@@ -49,6 +51,20 @@ define void @test_func_call_external_voi
call void @external_void_func_void()
ret void
}
+
+; FIXME: Avoid extra restore of FP in between calls.
+; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
+; GCN: s_mov_b32 s33, s5
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
+; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
+define void @test_func_call_external_void_funcx2() #0 {
+ call void @external_void_func_void()
+ call void @external_void_func_void()
+ ret void
+}
; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
; GCN: s_waitcnt
Modified: llvm/trunk/test/CodeGen/AMDGPU/ipra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ipra.ll?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ipra.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ipra.ll Thu Sep 14 10:14:57 2017
@@ -90,5 +90,19 @@ define void @func_call_tail_call() #1 {
ret void
}
+define void @void_func_void() noinline {
+ ret void
+}
+
+; Make sure we don't get save/restore of FP between calls.
+; GCN-LABEL: {{^}}test_funcx2:
+; GCN-NOT: s5
+; GCN-NOT: s32
+define void @test_funcx2() #0 {
+ call void @void_func_void()
+ call void @void_func_void()
+ ret void
+}
+
attributes #0 = { nounwind }
attributes #1 = { nounwind noinline }
Modified: llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll Thu Sep 14 10:14:57 2017
@@ -13,8 +13,8 @@ define fastcc i32 @i32_fastcc_i32_i32(i3
; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_mov_b32 s5, s32
; GCN: v_add_i32_e32 v0, vcc, v1, v
+; GCN: s_mov_b32 s5, s32
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
; GCN: s_waitcnt vmcnt(0)
; GCN: s_setpc_b64
More information about the llvm-commits
mailing list