[llvm] 6733590 - AMDGPU: Set implicit kernarg size to be of 256 bytes for code object version 5

Changpeng Fang via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 7 08:36:03 PDT 2022


Author: Changpeng Fang
Date: 2022-04-07T08:35:23-07:00
New Revision: 6733590db284ac5ba530cc83e8fde699b44c4863

URL: https://github.com/llvm/llvm-project/commit/6733590db284ac5ba530cc83e8fde699b44c4863
DIFF: https://github.com/llvm/llvm-project/commit/6733590db284ac5ba530cc83e8fde699b44c4863.diff

LOG: AMDGPU: Set implicit kernarg size to be of 256 bytes for code object version 5

Summary:
  If implicitarg_ptr intrinsic is not used, set implicit kernarg size to 0, otherwise
set it to 256 bytes for code object version 5 (and beyond).

Reviewers: arsenm

Differential Revision: https://reviews.llvm.org/D123262

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 572df05d0c4ff..4810a6c43fd73 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -652,7 +652,8 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
     return 16;
 
   // Assume all implicit inputs are used by default
-  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
+  unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56;
+  return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes);
 }
 
 uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 581bb7233e505..92261d700446b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,COV5 %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
 
 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
@@ -11,6 +12,8 @@
 ; MESA: kernarg_segment_alignment = 4
 
 ; HSA: s_load_dword s0, s[4:5], 0x0
+
+; COV5: .amdhsa_kernarg_size 256
 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -31,6 +34,8 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
 ; HSA: s_load_dword s0, [[NULL]], 0x0
 
 ; MESA: s_load_dword s0, s[4:5], 0x0
+
+; COV5: .amdhsa_kernarg_size 0
 define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -39,15 +44,18 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
 }
 
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: kernarg_segment_byte_size = 48
 ; HSA: kernarg_segment_alignment = 4
 
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
 ; MESA: kernarg_segment_byte_size = 16
 ; MESA: kernarg_segment_alignment = 4
 
 ; HSA: s_load_dword s0, s[4:5], 0x0
+
+; COV5: .amdhsa_kernarg_size 48
 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -56,15 +64,18 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
 }
 
 ; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: kernarg_segment_byte_size = 168
 ; HSA: kernarg_segment_alignment = 4
 
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
 ; MESA: kernarg_segment_byte_size = 128
 ; MESA: kernarg_segment_alignment = 4
 
 ; HSA: s_load_dword s0, s[4:5], 0x1c
+
+; COV5: .amdhsa_kernarg_size 368
 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -73,15 +84,18 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
 }
 
 ; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
 
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: kernarg_segment_byte_size = 160
 ; HSA: kernarg_segment_alignment = 4
 
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
 ; MESA: kernarg_segment_byte_size = 128
 ; MESA: kernarg_segment_alignment = 4
 
 ; HSA: s_load_dword s0, s[4:5], 0x1c
+
+; COV5: .amdhsa_kernarg_size 160
 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -124,6 +138,8 @@ define void @opencl_func_implicitarg_ptr() #0 {
 
 ; GCN: s_mov_b64 s[8:9], s[4:5]
 ; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 256
 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
   call void @func_implicitarg_ptr()
   ret void
@@ -141,30 +157,36 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
 ; HSA: s_mov_b64 s[8:9], 0{{$}}
 ; MESA: s_mov_b64 s[8:9], s[4:5]{{$}}
 ; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 0
 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
   call void @func_implicitarg_ptr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: kernarg_segment_byte_size = 48
 ; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
 ; MESA: kernarg_segment_byte_size = 16
 ; GCN: s_mov_b64 s[8:9], s[4:5]
 ; GCN-NOT: s4
 ; GCN-NOT: s5
 ; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 48
 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
   call void @func_implicitarg_ptr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: kernarg_segment_byte_size = 168
 ; HSA: kernarg_segment_alignment = 4
 
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
 ; MESA: kernarg_segment_byte_size = 128
 ; MESA: kernarg_segment_alignment = 4
 
@@ -173,21 +195,26 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
 
 ; GCN: s_addc_u32 s9, s5, 0{{$}}
 ; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 368
 define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
   call void @func_implicitarg_ptr()
   ret void
 }
 
 ; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: kernarg_segment_byte_size = 160
 ; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
 ; MESA: kernarg_segment_byte_size = 128
 ; MESA: kernarg_segment_alignment = 4
 
 ; GCN: s_add_u32 s8, s4, 0x70
 ; GCN: s_addc_u32 s9, s5, 0{{$}}
 ; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 160
 define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
   call void @func_implicitarg_ptr()
   ret void
@@ -258,8 +285,11 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8])
 
 ; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
 ; HSA: kernarg_segment_byte_size = 120
+; HSA: kernarg_segment_alignment = 6
 ; MESA: kernarg_segment_byte_size = 84
-; GCN: kernarg_segment_alignment = 6
+; MESA: kernarg_segment_alignment = 6
+
+; COV5: .amdhsa_kernarg_size 120
 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
   %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
   %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -267,7 +297,6 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
   ret void
 }
 
-
 ; HSA-LABEL: Kernels:
 ; HSA-LABEL: - Name:            kernel_implicitarg_ptr_empty
 ; HSA: CodeProps:
@@ -318,6 +347,55 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
 ; HSA: KernargSegmentSize: 120
 ; HSA: KernargSegmentAlign: 64
 
+; COV5-LABEL:   amdhsa.kernels:
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 256
+; COV5-LABEL:   .name:           kernel_implicitarg_ptr_empty
+
+; COV5:         .kernarg_segment_align: 4
+; COV5-NEXT:    .kernarg_segment_size: 0
+; COV5-LABEL:   .name:           kernel_implicitarg_ptr_empty_0implicit
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 48
+; COV5-LABEL:   .name:           opencl_kernel_implicitarg_ptr_empty
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 368
+; COV5-LABEL:   .name:           kernel_implicitarg_ptr
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 160
+; COV5-LABEL:   .name:           opencl_kernel_implicitarg_ptr
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 256
+; COV5-LABEL:   .name:           kernel_call_implicitarg_ptr_func_empty
+
+; COV5:         .kernarg_segment_align: 4
+; COV5-NEXT:    .kernarg_segment_size: 0
+; COV5-LABEL:   .name:           kernel_call_implicitarg_ptr_func_empty_implicit0
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 48
+; COV5-LABEL:   .name:           opencl_kernel_call_implicitarg_ptr_func_empty
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 368
+; COV5-LABEL:   .name:           kernel_call_implicitarg_ptr_func
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 160
+; COV5-LABEL:   .name:           opencl_kernel_call_implicitarg_ptr_func
+
+; COV5:         .kernarg_segment_align: 8
+; COV5-NEXT:    .kernarg_segment_size: 368
+; COV5-LABEL:   .name:           kernel_call_kernarg_implicitarg_ptr_func
+
+; COV5:         .kernarg_segment_align: 64
+; COV5-NEXT:    .kernarg_segment_size: 120
+; COV5-LABEL:   .name:           kernel_implicitarg_no_struct_align_padding
+
 declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
 


        


More information about the llvm-commits mailing list