[PATCH] D24405: AMDGPU/SI: Make sure llvm.amdgcn.implicitarg.ptr() is 8-byte aligned for HSA
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 9 09:55:10 PDT 2016
tstellarAMD created this revision.
tstellarAMD added a reviewer: arsenm.
tstellarAMD added a subscriber: llvm-commits.
Herald added subscribers: nhaehnle, wdng, arsenm.
https://reviews.llvm.org/D24405
Files:
lib/Target/AMDGPU/AMDGPUISelLowering.cpp
lib/Target/AMDGPU/AMDGPUSubtarget.h
lib/Target/AMDGPU/GCNSchedStrategy.cpp
test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
Index: test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -29,7 +29,7 @@
; ALL-LABEL: {{^}}test_implicit_alignment
; MESA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
-; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
+; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; MESA: buffer_store_dword [[V_VAL]]
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
Index: lib/Target/AMDGPU/GCNSchedStrategy.cpp
===================================================================
--- lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -34,6 +34,7 @@
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
ST.getOccupancyWithNumVGPRs(VGPRs));
+ return MinRegOccupancy;
return std::min(MinRegOccupancy,
ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
}
Index: lib/Target/AMDGPU/AMDGPUSubtarget.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -276,6 +276,10 @@
return isAmdHsaOS() ? 0 : 36;
}
+ unsigned getAlignmentForImplicitArgPtr() const {
+ return isAmdHsaOS() ? 8 : 4;
+ }
+
unsigned getStackAlignment() const {
// Scratch is allocated in 256 dword per wave blocks.
return 4 * 256 / getWavefrontSize();
Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -2690,7 +2690,8 @@
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
- uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), 4);
+ unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
+ uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
switch (Param) {
case GRID_DIM:
return ArgOffset;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D24405.70853.patch
Type: text/x-patch
Size: 2343 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160909/f3109745/attachment.bin>
More information about the llvm-commits
mailing list