[llvm] r363949 - AMDGPU: Fix ignoring DisableFramePointerElim in leaf functions

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 10:03:23 PDT 2019


Author: arsenm
Date: Thu Jun 20 10:03:23 2019
New Revision: 363949

URL: http://llvm.org/viewvc/llvm-project?rev=363949&view=rev
Log:
AMDGPU: Fix ignoring DisableFramePointerElim in leaf functions

The attribute can specify elimination for leaf or non-leaf, so it
should always be considered. I copied this bug from AArch64, which
probably should also be fixed.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=363949&r1=363948&r2=363949&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Thu Jun 20 10:03:23 2019
@@ -707,12 +707,12 @@ void SIFrameLowering::emitEpilogue(Machi
       .addReg(ScratchExecCopy);
   }
 
-  if (hasFP(MF)) {
-    const MachineFrameInfo &MFI = MF.getFrameInfo();
-    uint32_t NumBytes = MFI.getStackSize();
-    uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
-      NumBytes + MFI.getMaxAlignment() : NumBytes;
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  uint32_t NumBytes = MFI.getStackSize();
+  uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
+    NumBytes + MFI.getMaxAlignment() : NumBytes;
 
+  if (RoundedSize != 0 && hasFP(MF)) {
     const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
       .addReg(StackPtrReg)
@@ -863,14 +863,10 @@ bool SIFrameLowering::hasFP(const Machin
     // API SP if there are calls.
     if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
       return true;
-
-    // Retain behavior of always omitting the FP for leaf functions when
-    // possible.
-    if (MF.getTarget().Options.DisableFramePointerElim(MF))
-      return true;
   }
 
   return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
     MFI.hasStackMap() || MFI.hasPatchPoint() ||
-    MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);
+    MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF) ||
+    MF.getTarget().Options.DisableFramePointerElim(MF);
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll?rev=363949&r1=363948&r2=363949&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-frame-setup.ll Thu Jun 20 10:03:23 2019
@@ -9,15 +9,22 @@ define void @callee_no_stack() #0 {
   ret void
 }
 
-; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim:
+; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
 ; GCN: ; %bb.0:
 ; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_mov_b32 s5, s32
 ; GCN-NEXT: s_setpc_b64
-define void @callee_no_stack_no_fp_elim() #1 {
+define void @callee_no_stack_no_fp_elim_all() #1 {
   ret void
 }
 
-; Requires frame pointer for access to local regular object.
+; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
+  ret void
+}
 
 ; GCN-LABEL: {{^}}callee_with_stack:
 ; GCN: ; %bb.0:
@@ -32,6 +39,35 @@ define void @callee_with_stack() #0 {
   ret void
 }
 
+; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_mov_b32 s5, s32
+; GCN-NEXT: s_add_u32 s32, s32, 0x200
+; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
+; GCN-NEXT: s_sub_u32 s32, s32, 0x200
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define void @callee_with_stack_no_fp_elim_all() #1 {
+  %alloca = alloca i32, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %alloca
+  ret void
+}
+
+; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32{{$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
+  %alloca = alloca i32, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %alloca
+  ret void
+}
+
 ; GCN-LABEL: {{^}}callee_with_stack_and_call:
 ; GCN: ; %bb.0:
 ; GCN-NEXT: s_waitcnt
@@ -151,4 +187,5 @@ define void @spill_only_csr_sgpr() {
 }
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind "no-frame-pointer-elim"="true" }
+attributes #1 = { nounwind "frame-pointer"="all" }
+attributes #2 = { nounwind "frame-pointer"="non-leaf" }




More information about the llvm-commits mailing list