[llvm] r328351 - [AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU
Tony Tye via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 23 11:58:47 PDT 2018
Author: t-tye
Date: Fri Mar 23 11:58:47 2018
New Revision: 328351
URL: http://llvm.org/viewvc/llvm-project?rev=328351&view=rev
Log:
[AMDGPU] Update OpenCL to use 48 bytes of implicit arguments for AMDGPU
Add two additional implicit arguments for OpenCL for the AMDGPU target using the AMDHSA runtime to support device enqueue.
Differential Revision: https://reviews.llvm.org/D44697
Modified:
llvm/trunk/docs/AMDGPUUsage.rst
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=328351&r1=328350&r2=328351&view=diff
==============================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Fri Mar 23 11:58:47 2018
@@ -3801,10 +3801,14 @@ When the language is OpenCL the followin
Position Byte Byte Description
Size Alignment
======== ==== ========= ===========================================
- 0 8 8 OpenCL Global Offset X
- 1 8 8 OpenCL Global Offset Y
- 2 8 8 OpenCL Global Offset Z
- 3 8 8 OpenCL printf buffer
+ 1 8 8 OpenCL Global Offset X
+ 2 8 8 OpenCL Global Offset Y
+ 3 8 8 OpenCL Global Offset Z
+ 4 8 8 OpenCL address of printf buffer
+ 5 8 8 OpenCL address of virtual queue used by
+ enqueue_kernel.
+ 6 8 8 OpenCL address of AqlWrap struct used by
+ enqueue_kernel.
======== ==== ========= ===========================================
.. _amdgpu-hcc:
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll?rev=328351&r1=328350&r2=328351&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll Fri Mar 23 11:58:47 2018
@@ -18,7 +18,7 @@ define amdgpu_kernel void @kernel_implic
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
; MESA: kernarg_segment_byte_size = 16
; HSA: s_load_dword s0, s[4:5], 0x0
@@ -46,7 +46,7 @@ define amdgpu_kernel void @kernel_implic
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
; MESA: kernarg_segment_byte_size = 464
; HSA: s_load_dword s0, s[4:5], 0x1c
@@ -106,7 +106,7 @@ define amdgpu_kernel void @kernel_call_i
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 32
+; HSA: kernarg_segment_byte_size = 48
; MESA: kernarg_segment_byte_size = 16
; GCN: s_mov_b64 s[6:7], s[4:5]
; GCN: s_swappc_b64
@@ -132,7 +132,7 @@ define amdgpu_kernel void @kernel_call_i
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
; GCN: enable_sgpr_kernarg_segment_ptr = 1
-; HSA: kernarg_segment_byte_size = 144
+; HSA: kernarg_segment_byte_size = 160
; MESA: kernarg_segment_byte_size = 464
; HSA: s_add_u32 s6, s4, 0x70
@@ -232,5 +232,5 @@ declare i8 addrspace(4)* @llvm.amdgcn.im
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
attributes #0 = { nounwind noinline }
-attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="32" }
+attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
attributes #2 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll?rev=328351&r1=328350&r2=328351&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll Fri Mar 23 11:58:47 2018
@@ -46,7 +46,7 @@ define amdgpu_kernel void @test_implicit
}
; ALL-LABEL: {{^}}opencl_test_implicit_alignment
-; HSA: kernarg_segment_byte_size = 48
+; HSA: kernarg_segment_byte_size = 64
; OS-MESA3D: kernarg_segment_byte_size = 28
; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
@@ -79,4 +79,4 @@ declare i8 addrspace(4)* @llvm.amdgcn.im
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
-attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="32" }
+attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
More information about the llvm-commits
mailing list