[PATCH] D44697: [AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU

Tony Tye via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 23 12:02:09 PDT 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rL328351: [AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU (authored by t-tye, committed by ).

Changed prior to commit:
  https://reviews.llvm.org/D44697?vs=139177&id=139632#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D44697

Files:
  llvm/trunk/docs/AMDGPUUsage.rst
  llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
  llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll


Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -18,7 +18,7 @@
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
 ; MESA: kernarg_segment_byte_size = 16
 
 ; HSA: s_load_dword s0, s[4:5], 0x0
@@ -46,7 +46,7 @@
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
 ; MESA: kernarg_segment_byte_size = 464
 
 ; HSA: s_load_dword s0, s[4:5], 0x1c
@@ -106,7 +106,7 @@
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
 ; MESA: kernarg_segment_byte_size = 16
 ; GCN: s_mov_b64 s[6:7], s[4:5]
 ; GCN: s_swappc_b64
@@ -132,7 +132,7 @@
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
 ; MESA: kernarg_segment_byte_size = 464
 
 ; HSA: s_add_u32 s6, s4, 0x70
@@ -232,5 +232,5 @@
 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
 
 attributes #0 = { nounwind noinline }
-attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
+attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
 attributes #2 = { nounwind readnone speculatable }
Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -46,7 +46,7 @@
 }
 
 ; ALL-LABEL: {{^}}opencl_test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 48
+; HSA: kernarg_segment_byte_size = 64
 ; OS-MESA3D: kernarg_segment_byte_size = 28
 ; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
 ; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@@ -79,4 +79,4 @@
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" }
+attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
Index: llvm/trunk/docs/AMDGPUUsage.rst
===================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst
+++ llvm/trunk/docs/AMDGPUUsage.rst
@@ -3801,10 +3801,14 @@
      Position Byte Byte      Description
               Size Alignment
      ======== ==== ========= ===========================================
-     0        8    8         OpenCL Global Offset X
-     1        8    8         OpenCL Global Offset Y
-     2        8    8         OpenCL Global Offset Z
-     3        8    8         OpenCL printf buffer
+     1        8    8         OpenCL Global Offset X
+     2        8    8         OpenCL Global Offset Y
+     3        8    8         OpenCL Global Offset Z
+     4        8    8         OpenCL address of printf buffer
+     5        8    8         OpenCL address of virtual queue used by
+                             enqueue_kernel.
+     6        8    8         OpenCL address of AqlWrap struct used by
+                             enqueue_kernel.
      ======== ==== ========= ===========================================
 
 .. _amdgpu-hcc:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44697.139632.patch
Type: text/x-patch
Size: 3671 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180323/7c0c2411/attachment.bin>


More information about the llvm-commits mailing list