[PATCH] D44697: [AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU

Tony Tye via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 20 12:10:12 PDT 2018


t-tye created this revision.
t-tye added reviewers: arsenm, yaxunl.
Herald added subscribers: Anastasia, tpr, dstuttard, nhaehnle, wdng.
t-tye added a dependency: D43736: [AMDGPU] Remove use of OpenCL triple environment and replace with function attribute for AMDGPU.

Add two additional implicit arguments for OpenCL for the AMDGPU target using the AMDHSA runtime to support device enqueue.


https://reviews.llvm.org/D44697

Files:
  docs/AMDGPUUsage.rst
  test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
  test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll


Index: test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -46,7 +46,7 @@
 }
 
 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 48
+; HSA: kernarg_segment_byte_size = 64
 ; OS-MESA3D: kernarg_segment_byte_size = 28
 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@@ -79,4 +79,4 @@
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" }
+attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
Index: test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -18,7 +18,7 @@
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
 ; MESA: kernarg_segment_byte_size = 16
 
 ; HSA: s_load_dword s0, s[4:5], 0x0
@@ -46,7 +46,7 @@
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
 ; MESA: kernarg_segment_byte_size = 464
 
 ; HSA: s_load_dword s0, s[4:5], 0x1c
@@ -106,7 +106,7 @@
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
 ; MESA: kernarg_segment_byte_size = 16
 ; GCN: s_mov_b64 s[6:7], s[4:5]
 ; GCN: s_swappc_b64
@@ -132,7 +132,7 @@
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
 ; MESA: kernarg_segment_byte_size = 464
 
 ; HSA: s_add_u32 s6, s4, 0x70
@@ -232,5 +232,5 @@
 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
 
 attributes #0 = { nounwind noinline }
-attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
+attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
 attributes #2 = { nounwind readnone speculatable }
Index: docs/AMDGPUUsage.rst
===================================================================
--- docs/AMDGPUUsage.rst
+++ docs/AMDGPUUsage.rst
@@ -3801,10 +3801,14 @@
      Position Byte Byte      Description
               Size Alignment
      ======== ==== ========= ===========================================
-     0        8    8         OpenCL Global Offset X
-     1        8    8         OpenCL Global Offset Y
-     2        8    8         OpenCL Global Offset Z
-     3        8    8         OpenCL printf buffer
+     1        8    8         OpenCL Global Offset X
+     2        8    8         OpenCL Global Offset Y
+     3        8    8         OpenCL Global Offset Z
+     4        8    8         OpenCL address of printf buffer
+     5        8    8         OpenCL address of virtual queue used by
+                             enqueue_kernel.
+     6        8    8         OpenCL address of AqlWrap struct used by
+                             enqueue_kernel.
      ======== ==== ========= ===========================================
 
 .. _amdgpu-hcc:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44697.139177.patch
Type: text/x-patch
Size: 3572 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180320/f1c7d5c5/attachment.bin>


More information about the llvm-commits mailing list