[llvm] r333456 - AMDGPU: Round up kernel argument allocation size

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue May 29 12:35:00 PDT 2018


Author: arsenm
Date: Tue May 29 12:35:00 2018
New Revision: 333456

URL: http://llvm.org/viewvc/llvm-project?rev=333456&view=rev
Log:
AMDGPU: Round up kernel argument allocation size

AFAIK the driver's allocation will actually have to round this
up anyway. It is useful to track the rounded up size, so that
the end of the kernel segment is known to be dereferencable so
a wider s_load_dword can be used for a short argument at the end
of the segment.

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h?rev=333456&r1=333455&r2=333456&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMachineFunction.h Tue May 29 12:35:00 2018
@@ -20,6 +20,7 @@ class AMDGPUMachineFunction : public Mac
   /// local memory space.
   SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
 
+protected:
   uint64_t KernArgSize;
   unsigned MaxKernArgAlign;
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=333456&r1=333455&r2=333456&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue May 29 12:35:00 2018
@@ -414,12 +414,16 @@ bool SISubtarget::isVGPRSpillingEnabled(
 
 unsigned SISubtarget::getKernArgSegmentSize(const Function &F,
                                             unsigned ExplicitArgBytes) const {
+  uint64_t TotalSize = ExplicitArgBytes;
   unsigned ImplicitBytes = getImplicitArgNumBytes(F);
-  if (ImplicitBytes == 0)
-    return ExplicitArgBytes;
 
-  unsigned Alignment = getAlignmentForImplicitArgPtr();
-  return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+  if (ImplicitBytes != 0) {
+    unsigned Alignment = getAlignmentForImplicitArgPtr();
+    TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+  }
+
+  // Being able to dereference past the end is useful for emitting scalar loads.
+  return alignTo(TotalSize, 4);
 }
 
 unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=333456&r1=333455&r2=333456&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Tue May 29 12:35:00 2018
@@ -71,8 +71,11 @@ SIMachineFunctionInfo::SIMachineFunction
     if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
       ImplicitArgPtr = true;
   } else {
-    if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+    if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
       KernargSegmentPtr = true;
+      assert(MaxKernArgAlign == 0);
+      MaxKernArgAlign =  ST.getAlignmentForImplicitArgPtr();
+    }
   }
 
   CallingConv::ID CC = F.getCallingConv();

Modified: llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll?rev=333456&r1=333455&r2=333456&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/kernel-args.ll Tue May 29 12:35:00 2018
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck -enable-var-scope --check-prefix=EG --check-prefix=FUNC %s
 
 ; FUNC-LABEL: {{^}}i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
@@ -25,6 +26,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i8_zext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
@@ -44,6 +46,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i8_sext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
@@ -63,7 +66,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
 ; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
@@ -83,7 +88,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i16_zext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -102,7 +109,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i16_sext_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -121,7 +130,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c
@@ -133,6 +144,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z
 ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb
@@ -145,7 +157,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v2i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
 ; MESA-GCN: buffer_load_ubyte
@@ -159,7 +173,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v2i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
 
@@ -174,7 +190,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v2i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
@@ -187,7 +205,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v2f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
 ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
@@ -200,7 +220,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v3i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
@@ -217,7 +239,9 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v3i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
 ; HSA-VI: kernarg_segment_alignment = 4
+
 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
@@ -233,6 +257,7 @@ entry:
   ret void
 }
 ; FUNC-LABEL: {{^}}v3i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -247,6 +272,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v3f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -261,6 +287,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v4i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
@@ -281,6 +308,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v4i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
@@ -301,6 +329,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v4i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -317,6 +346,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v4f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -332,6 +362,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v8i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 16
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
@@ -363,6 +394,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v8i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
@@ -393,6 +425,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v8i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 64
 ; HSA-VI: kernarg_segment_alignment = 5
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
@@ -412,6 +445,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v8f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 64
 ; HSA-VI: kernarg_segment_alignment = 5
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
@@ -429,6 +463,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v16i8_arg:
+; HSA-VI: kernarg_segment_byte_size = 32
 ; HSA-VI: kernarg_segment_alignment = 4
 ; EG: VTX_READ_8
 ; EG: VTX_READ_8
@@ -485,6 +520,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v16i16_arg:
+; HSA-VI: kernarg_segment_byte_size = 64
 ; HSA-VI: kernarg_segment_alignment = 5
 ; EG: VTX_READ_16
 ; EG: VTX_READ_16
@@ -535,6 +571,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v16i32_arg:
+; HSA-VI: kernarg_segment_byte_size = 128
 ; HSA-VI: kernarg_segment_alignment = 6
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
@@ -562,6 +599,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}v16f32_arg:
+; HSA-VI: kernarg_segment_byte_size = 128
 ; HSA-VI: kernarg_segment_alignment = 6
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
@@ -621,6 +659,9 @@ entry:
 ; }
 
 ; FUNC-LABEL: {{^}}i1_arg:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
 ; SI: buffer_load_ubyte
 ; SI: v_and_b32_e32
 ; SI: buffer_store_byte
@@ -631,6 +672,9 @@ define amdgpu_kernel void @i1_arg(i1 add
 }
 
 ; FUNC-LABEL: {{^}}i1_arg_zext_i32:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
 ; SI: buffer_load_ubyte
 ; SI: buffer_store_dword
 ; SI: s_endpgm
@@ -641,6 +685,9 @@ define amdgpu_kernel void @i1_arg_zext_i
 }
 
 ; FUNC-LABEL: {{^}}i1_arg_zext_i64:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
 ; SI: buffer_load_ubyte
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
@@ -651,6 +698,9 @@ define amdgpu_kernel void @i1_arg_zext_i
 }
 
 ; FUNC-LABEL: {{^}}i1_arg_sext_i32:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
 ; SI: buffer_load_ubyte
 ; SI: buffer_store_dword
 ; SI: s_endpgm
@@ -661,6 +711,9 @@ define amdgpu_kernel void @i1_arg_sext_i
 }
 
 ; FUNC-LABEL: {{^}}i1_arg_sext_i64:
+; HSA-VI: kernarg_segment_byte_size = 12
+; HSA-VI: kernarg_segment_alignment = 4
+
 ; SI: buffer_load_ubyte
 ; SI: v_bfe_i32
 ; SI: v_ashrrev_i32

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=333456&r1=333455&r2=333456&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Tue May 29 12:35:00 2018
@@ -4,6 +4,9 @@
 
 ; ALL-LABEL: {{^}}test:
 ; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 8
+; HSA: kernarg_segment_alignment = 4
+
 ; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
 
 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
@@ -17,6 +20,10 @@ define amdgpu_kernel void @test(i32 addr
 }
 
 ; ALL-LABEL: {{^}}test_implicit:
+; HSA: kernarg_segment_byte_size = 8
+; OS-MESA3D: kernarg_segment_byte_size = 24
+; CO-V2: kernarg_segment_alignment = 4
+
 ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15
 define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
@@ -28,9 +35,12 @@ define amdgpu_kernel void @test_implicit
   ret void
 }
 
-; ALL-LABEL: {{^}}test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 10
+; ALL-LABEL: {{^}}test_implicit_alignment:
+; HSA: kernarg_segment_byte_size = 12
 ; OS-MESA3D: kernarg_segment_byte_size = 28
+; CO-V2: kernarg_segment_alignment = 4
+
+
 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
@@ -48,6 +58,9 @@ define amdgpu_kernel void @test_implicit
 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment
 ; HSA: kernarg_segment_byte_size = 64
 ; OS-MESA3D: kernarg_segment_byte_size = 28
+; CO-V2: kernarg_segment_alignment = 4
+
+
 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
@@ -63,7 +76,11 @@ define amdgpu_kernel void @opencl_test_i
 }
 
 ; ALL-LABEL: {{^}}test_no_kernargs:
-; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 0
+; OS-MESA3D: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 32
+
 ; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
 define amdgpu_kernel void @test_no_kernargs() #1 {
   %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
@@ -74,9 +91,34 @@ define amdgpu_kernel void @test_no_kerna
   ret void
 }
 
+; GCN-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
+; HSA: kernarg_segment_byte_size = 48
+; OS-MESA3d: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
+  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+  %val = load volatile i32, i32 addrspace(4)* %arg.ptr
+  store volatile i32 %val, i32 addrspace(1)* null
+  ret void
+}
+
+; GCN-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
+; HSA: kernarg_segment_byte_size = 40
+; OS-MESA3D: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
+  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+  %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+  %val = load volatile i32, i32 addrspace(4)* %arg.ptr
+  store volatile i32 %val, i32 addrspace(1)* null
+  ret void
+}
+
 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
+attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }




More information about the llvm-commits mailing list