[llvm] r281080 - AMDGPU/SI: Make sure llvm.amdgcn.implicitarg.ptr() is 8-byte aligned for HSA

Tom Stellard via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 9 12:28:01 PDT 2016


Author: tstellar
Date: Fri Sep  9 14:28:00 2016
New Revision: 281080

URL: http://llvm.org/viewvc/llvm-project?rev=281080&view=rev
Log:
AMDGPU/SI: Make sure llvm.amdgcn.implicitarg.ptr() is 8-byte aligned for HSA

Reviewers: arsenm

Subscribers: arsenm, wdng, nhaehnle, llvm-commits

Differential Revision: https://reviews.llvm.org/D24405

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=281080&r1=281079&r2=281080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Sep  9 14:28:00 2016
@@ -2690,7 +2690,8 @@ SDValue AMDGPUTargetLowering::CreateLive
 
 uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
     const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
-  uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), 4);
+  unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
+  uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
   switch (Param) {
   case GRID_DIM:
     return ArgOffset;

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=281080&r1=281079&r2=281080&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Sep  9 14:28:00 2016
@@ -276,6 +276,10 @@ public:
     return isAmdHsaOS() ? 0 : 36;
   }
 
+  unsigned getAlignmentForImplicitArgPtr() const {
+    return isAmdHsaOS() ? 8 : 4;
+  }
+
   unsigned getStackAlignment() const {
     // Scratch is allocated in 256 dword per wave blocks.
     return 4 * 256 / getWavefrontSize();

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=281080&r1=281079&r2=281080&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Fri Sep  9 14:28:00 2016
@@ -29,7 +29,7 @@ define void @test_implicit(i32 addrspace
 
 ; ALL-LABEL: {{^}}test_implicit_alignment
 ; MESA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
-; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
+; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
 ; MESA: buffer_store_dword [[V_VAL]]
 ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]




More information about the llvm-commits mailing list