[llvm] r313279 - AMDGPU: Stop modifying SP in call sequences

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 14 10:37:40 PDT 2017


Author: arsenm
Date: Thu Sep 14 10:37:40 2017
New Revision: 313279

URL: http://llvm.org/viewvc/llvm-project?rev=313279&view=rev
Log:
AMDGPU: Stop modifying SP in call sequences

Because the stack growth direction and addressing is done
in the same direction, modifying SP at the beginning of the
call sequence was incorrect. If we had a stack passed argument,
we would end up skipping that number of bytes before pushing
arguments, leaving unused/inconsistent space.

The callee creates fixed stack objects in its frame, so
the space necessary for these is already logically allocated
in the callee, so we just let the callee increment SP if
it really requires it.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
    llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
    llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=313279&r1=313278&r2=313279&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Sep 14 10:37:40 2017
@@ -2153,7 +2153,7 @@ SDValue SITargetLowering::LowerCall(Call
   // Adjust the stack pointer for the new arguments...
   // These operations are automatically eliminated by the prolog/epilog pass
   if (!IsSibCall) {
-    Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
+    Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
 
     unsigned OffsetReg = Info->getScratchWaveOffsetReg();
 
@@ -2359,8 +2359,8 @@ SDValue SITargetLowering::LowerCall(Call
     InFlag = Chain.getValue(1);
   }
 
-  uint64_t CalleePopBytes = 0;
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32),
+  uint64_t CalleePopBytes = NumBytes;
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32),
                              DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
                              InFlag, DL);
   if (!Ins.empty())

Modified: llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll?rev=313279&r1=313278&r2=313279&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/byval-frame-setup.ll Thu Sep 14 10:37:40 2017
@@ -74,7 +74,6 @@ entry:
 ; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
 ; GCN-DAG: v_writelane_b32
 
-; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
 ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
 
@@ -86,6 +85,7 @@ entry:
 ; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
 ; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
 
+; GCN-NOT: s_add_u32 s32, s32, 0x800
 
 ; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
 ; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
@@ -107,8 +107,9 @@ entry:
 ; GCN: v_readlane_b32
 ; GCN-NOT: v_readlane_b32 s32
 
-; GCN: s_sub_u32 s32, s32, 0x800{{$}}
-; GCN-NEXT: s_sub_u32 s32, s32, 0xc00{{$}}
+; GCN-NOT: s_sub_u32 s32, s32, 0x800
+
+; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
 define void @call_void_func_byval_struct_func() #0 {
@@ -138,7 +139,7 @@ entry:
 ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
 ; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
 
-; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
+; GCN-NOT: s_add_u32 s32, s32, 0x800
 
 ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
 ; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
@@ -162,9 +163,8 @@ entry:
 
 
 ; GCN: s_swappc_b64
-; FIXME: Dead SP modfication
-; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}}
-; GCN-NEXT: s_endpgm
+; GCN-NOT: s_sub_u32 s32
+; GCN: s_endpgm
 define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
 entry:
   %arg0 = alloca %struct.ByValStruct, align 4

Modified: llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll?rev=313279&r1=313278&r2=313279&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-argument-types.ll Thu Sep 14 10:37:40 2017
@@ -385,10 +385,10 @@ define amdgpu_kernel void @test_call_ext
 
 ; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32:
 ; HSA-DAG: s_mov_b32 s33, s9
-; HSA-DAG: s_add_u32 [[SP_REG:s[0-9]+]], s33, 0x100{{$}}
+; HSA-NOT: s_add_u32 s32
 
 ; MESA-DAG: s_mov_b32 s33, s3{{$}}
-; MESA-DAG: s_add_u32 [[SP_REG:s[0-9]+]], s33, 0x100{{$}}
+; MESA-NOT: s_add_u32 s32
 
 ; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 ; GCN-DAG: buffer_load_dwordx4 v[0:3], off
@@ -400,7 +400,7 @@ define amdgpu_kernel void @test_call_ext
 ; GCN-DAG: buffer_load_dwordx4 v[24:27], off
 ; GCN-DAG: buffer_load_dwordx4 v[28:31], off
 
-; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]] offset:4{{$}}
+; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32 offset:4{{$}}
 ; GCN: s_waitcnt
 ; GCN-NEXT: s_swappc_b64
 ; GCN-NEXT: s_endpgm
@@ -447,7 +447,7 @@ define amdgpu_kernel void @test_call_ext
 ; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], s33 offset:8
 ; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], s33 offset:12
 
-; GCN: s_add_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: s_add_u32 [[SP]],
 
 ; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
 ; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
@@ -463,7 +463,7 @@ define amdgpu_kernel void @test_call_ext
 ; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
 
 ; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: [[SP]]
 define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
   %val = alloca { i8, i32 }, align 4
   %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 0
@@ -486,13 +486,13 @@ define amdgpu_kernel void @test_call_ext
 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8
 ; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
 
-; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: s_add_u32 [[SP]]
 ; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
 ; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
 ; GCN-NEXT: s_swappc_b64
 ; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
 ; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20
-; GCN: s_sub_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: s_sub_u32 [[SP]]
 
 ; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
 ; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off

Modified: llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll?rev=313279&r1=313278&r2=313279&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll Thu Sep 14 10:37:40 2017
@@ -425,7 +425,7 @@ define void @too_many_args_use_workitem_
 ; GCN: s_mov_b32 s33, s7
 ; GCN: s_add_u32 s32, s33, 0x200{{$}}
 
-; GCN-DAG: s_add_u32 s32, s32, 0x100{{$}}
+; GCN-NOT: s32
 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12




More information about the llvm-commits mailing list