[PATCH] D59840: AMDGPU: Enable the scavenger for large frames
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 13:28:12 PDT 2019
arsenm created this revision.
arsenm added a reviewer: rampitec.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, qcolombet.
arsenm added a parent revision: D59836: PEI: Delay checking requiresFrameIndexReplacementScavenging.
Another test is needed for the case where the scavenge fail, but there's another issue with that which needs an additional fix.
https://reviews.llvm.org/D59840
Files:
lib/Target/AMDGPU/SIRegisterInfo.cpp
test/CodeGen/AMDGPU/spill-offset-calculation.ll
Index: test/CodeGen/AMDGPU/spill-offset-calculation.ll
===================================================================
--- test/CodeGen/AMDGPU/spill-offset-calculation.ll
+++ test/CodeGen/AMDGPU/spill-offset-calculation.ll
@@ -35,9 +35,8 @@
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
; 0x40000 / 64 = 4096 (for wave64)
- ; CHECK: s_add_u32 s7, s7, 0x40000
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
- ; CHECK: s_sub_u32 s7, s7, 0x40000
+ ; CHECK: s_add_u32 s6, s7, 0x40000
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
%a = load volatile i32, i32 addrspace(5)* %aptr
; Force %a to spill
@@ -88,10 +87,9 @@
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
; 0x3ff00 / 64 = 4092 (for wave64)
- ; CHECK: s_add_u32 s7, s7, 0x3ff00
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 ; 4-byte Folded Spill
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s7 offset:4 ; 4-byte Folded Spill
- ; CHECK: s_sub_u32 s7, s7, 0x3ff00
+ ; CHECK: s_add_u32 s6, s7, 0x3ff00
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
@@ -139,9 +137,8 @@
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
; 0x40000 / 64 = 4096 (for wave64)
- ; CHECK: s_add_u32 s5, s5, 0x40000
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
- ; CHECK: s_sub_u32 s5, s5, 0x40000
+ ; CHECK: s_add_u32 s6, s5, 0x40000
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
%a = load volatile i32, i32 addrspace(5)* %aptr
; Force %a to spill
@@ -192,10 +189,9 @@
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
; 0x3ff00 / 64 = 4092 (for wave64)
- ; CHECK: s_add_u32 s5, s5, 0x3ff00
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 ; 4-byte Folded Spill
- ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s5 offset:4 ; 4-byte Folded Spill
- ; CHECK: s_sub_u32 s5, s5, 0x3ff00
+ ; CHECK: s_add_u32 s6, s5, 0x3ff00
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
+ ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
Index: lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -257,11 +257,20 @@
bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
const MachineFunction &MF) const {
- // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
- // create a virtual register for it during frame index elimination, so the
- // scavenger is directly needed.
- return MF.getFrameInfo().hasStackObjects() &&
- MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.hasStackObjects())
+ return false;
+
+ // The scavenger is used for large frames which may require finding a free
+ // register for large offsets.
+ if (!isUInt<12>(MFI.getStackSize()))
+ return true;
+
+ // If using scalar stores, for spills, m0 is needed for the scalar store
+ // offset (pre-GFX9). m0 is unallocatable, so we can't create a virtual
+ // register for it during frame index elimination, so the scavenger is
+ // directly needed.
+ return MF.getSubtarget<GCNSubtarget>().hasScalarStores() &&
MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D59840.192339.patch
Type: text/x-patch
Size: 4087 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190326/44a59072/attachment.bin>
More information about the llvm-commits
mailing list