[llvm] r313274 - AMDGPU: Make frame register caller preserved

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 10:14:57 PDT 2017


Author: arsenm
Date: Thu Sep 14 10:14:57 2017
New Revision: 313274

URL: http://llvm.org/viewvc/llvm-project?rev=313274&view=rev
Log:
AMDGPU: Make frame register caller preserved

Using SplitCSR for the frame register was very broken. Often
the copies in the prolog and epilog were optimized out, in addition
to them being inserted after the true prolog where the FP
was clobbered.

I have a hacky solution which works that continues to use
split CSR, but for now this is simpler and will get to working
programs.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
    llvm/trunk/test/CodeGen/AMDGPU/ipra.ll
    llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp Thu Sep 14 10:14:57 2017
@@ -59,16 +59,7 @@ const MCPhysReg *SIRegisterInfo::getCall
 
 const MCPhysReg *
 SIRegisterInfo::getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
-  // FIXME
-  static MCPhysReg Regs[2];
-
-  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  assert(!MFI->isEntryFunction());
-
-  Regs[0] = MFI->getFrameOffsetReg();
-  Regs[1] = AMDGPU::NoRegister;
-
-  return Regs;
+  return nullptr;
 }
 
 const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Sep 14 10:14:57 2017
@@ -2148,6 +2148,8 @@ SDValue SITargetLowering::LowerCall(Call
   MachineFrameInfo &MFI = MF.getFrameInfo();
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 
+  SDValue CallerSavedFP;
+
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   if (!IsSibCall) {
@@ -2164,6 +2166,13 @@ SDValue SITargetLowering::LowerCall(Call
     SDValue ScratchWaveOffsetReg
       = DAG.getCopyFromReg(Chain, DL, OffsetReg, MVT::i32);
     RegsToPass.emplace_back(AMDGPU::SGPR4, ScratchWaveOffsetReg);
+
+    if (!Info->isEntryFunction()) {
+      // Avoid clobbering this function's FP value. In the current convention
+      // callee will overwrite this, so do save/restore around the call site.
+      CallerSavedFP = DAG.getCopyFromReg(Chain, DL,
+                                         Info->getFrameOffsetReg(), MVT::i32);
+    }
   }
 
   // Stack pointer relative accesses are done by changing the offset SGPR. This
@@ -2344,6 +2353,12 @@ SDValue SITargetLowering::LowerCall(Call
   Chain = Call.getValue(0);
   InFlag = Call.getValue(1);
 
+  if (CallerSavedFP) {
+    SDValue FPReg = DAG.getRegister(Info->getFrameOffsetReg(), MVT::i32);
+    Chain = DAG.getCopyToReg(Chain, DL, FPReg, CallerSavedFP, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
   uint64_t CalleePopBytes = 0;
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32),
                              DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),

Modified: llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-preserved-registers.ll Thu Sep 14 10:14:57 2017
@@ -32,11 +32,13 @@ define amdgpu_kernel void @test_kernel_c
 ; GCN: v_writelane_b32 v32, s37, 4
 
 ; GCN: s_mov_b32 s33, s5
-; GCN: s_swappc_b64
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
 ; GCN-NEXT: ;;#ASMSTART
 ; GCN-NEXT: ;;#ASMEND
+; GCN-NEXT: s_mov_b32 s33, s5
 ; GCN-NEXT: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN-NEXT: s_mov_b32 s5, s33
 ; GCN: v_readlane_b32 s37, v32, 4
 ; GCN: v_readlane_b32 s36, v32, 3
 ; GCN: v_readlane_b32 s35, v32, 2
@@ -49,6 +51,20 @@ define void @test_func_call_external_voi
   call void @external_void_func_void()
   ret void
 }
+
+; FIXME: Avoid extra restore of FP in between calls.
+; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
+; GCN: s_mov_b32 s33, s5
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
+; GCN-NEXT: s_mov_b32 s33, s5
+; GCN-NEXT: s_swappc_b64
+; GCN-NEXT: s_mov_b32 s5, s33
+define void @test_func_call_external_void_funcx2() #0 {
+  call void @external_void_func_void()
+  call void @external_void_func_void()
+  ret void
+}
 
 ; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31:
 ; GCN: s_waitcnt

Modified: llvm/trunk/test/CodeGen/AMDGPU/ipra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/ipra.ll?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/ipra.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/ipra.ll Thu Sep 14 10:14:57 2017
@@ -90,5 +90,19 @@ define void @func_call_tail_call() #1 {
   ret void
 }
 
+define void @void_func_void() noinline {
+  ret void
+}
+
+; Make sure we don't get save/restore of FP between calls.
+; GCN-LABEL: {{^}}test_funcx2:
+; GCN-NOT: s5
+; GCN-NOT: s32
+define void @test_funcx2() #0 {
+  call void @void_func_void()
+  call void @void_func_void()
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind noinline }

Modified: llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll?rev=313274&r1=313273&r2=313274&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/sibling-call.ll Thu Sep 14 10:14:57 2017
@@ -13,8 +13,8 @@ define fastcc i32 @i32_fastcc_i32_i32(i3
 
 ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
 ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: s_mov_b32 s5, s32
 ; GCN: v_add_i32_e32 v0, vcc, v1, v
+; GCN: s_mov_b32 s5, s32
 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
 ; GCN: s_waitcnt vmcnt(0)
 ; GCN: s_setpc_b64




More information about the llvm-commits mailing list