[llvm] r328351 - [AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU

Tony Tye via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 23 11:58:47 PDT 2018


Author: t-tye
Date: Fri Mar 23 11:58:47 2018
New Revision: 328351

URL: http://llvm.org/viewvc/llvm-project?rev=328351&view=rev
Log:
[AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU

Add two additional implicit arguments for OpenCL for the AMDGPU target using the AMDHSA runtime to support device enqueue.

Differential Revision: https://reviews.llvm.org/D44697

Modified:
    llvm/trunk/docs/AMDGPUUsage.rst
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
    llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll

Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=328351&r1=328350&r2=328351&view=diff
==============================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Fri Mar 23 11:58:47 2018
@@ -3801,10 +3801,14 @@ When the language is OpenCL the followin
      Position Byte Byte      Description
               Size Alignment
      ======== ==== ========= ===========================================
-     0        8    8         OpenCL Global Offset X
-     1        8    8         OpenCL Global Offset Y
-     2        8    8         OpenCL Global Offset Z
-     3        8    8         OpenCL printf buffer
+     1        8    8         OpenCL Global Offset X
+     2        8    8         OpenCL Global Offset Y
+     3        8    8         OpenCL Global Offset Z
+     4        8    8         OpenCL address of printf buffer
+     5        8    8         OpenCL address of virtual queue used by
+                             enqueue_kernel.
+     6        8    8         OpenCL address of AqlWrap struct used by
+                             enqueue_kernel.
      ======== ==== ========= ===========================================
 
 .. _amdgpu-hcc:

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll?rev=328351&r1=328350&r2=328351&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll Fri Mar 23 11:58:47 2018
@@ -18,7 +18,7 @@ define amdgpu_kernel void @kernel_implic
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
 ; MESA: kernarg_segment_byte_size = 16
 
 ; HSA: s_load_dword s0, s[4:5], 0x0
@@ -46,7 +46,7 @@ define amdgpu_kernel void @kernel_implic
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
 ; MESA: kernarg_segment_byte_size = 464
 
 ; HSA: s_load_dword s0, s[4:5], 0x1c
@@ -106,7 +106,7 @@ define amdgpu_kernel void @kernel_call_i
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
 ; MESA: kernarg_segment_byte_size = 16
 ; GCN: s_mov_b64 s[6:7], s[4:5]
 ; GCN: s_swappc_b64
@@ -132,7 +132,7 @@ define amdgpu_kernel void @kernel_call_i
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
 ; MESA: kernarg_segment_byte_size = 464
 
 ; HSA: s_add_u32 s6, s4, 0x70
@@ -232,5 +232,5 @@ declare i8 addrspace(4)* @llvm.amdgcn.im
 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
 
 attributes #0 = { nounwind noinline }
-attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
+attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
 attributes #2 = { nounwind readnone speculatable }

Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=328351&r1=328350&r2=328351&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Fri Mar 23 11:58:47 2018
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_implicit
 }
 
 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 48
+; HSA: kernarg_segment_byte_size = 64
 ; OS-MESA3D: kernarg_segment_byte_size = 28
 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@@ -79,4 +79,4 @@ declare i8 addrspace(4)* @llvm.amdgcn.im
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" }
+attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }




More information about the llvm-commits mailing list