[llvm] 6733590 - AMDGPU: Set implicit kernarg size to be of 256 bytes for code object version 5
Changpeng Fang via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 7 08:36:03 PDT 2022
Author: Changpeng Fang
Date: 2022-04-07T08:35:23-07:00
New Revision: 6733590db284ac5ba530cc83e8fde699b44c4863
URL: https://github.com/llvm/llvm-project/commit/6733590db284ac5ba530cc83e8fde699b44c4863
DIFF: https://github.com/llvm/llvm-project/commit/6733590db284ac5ba530cc83e8fde699b44c4863.diff
LOG: AMDGPU: Set implicit kernarg size to be of 256 bytes for code object version 5
Summary:
If implicitarg_ptr intrinsic is not used, set implicit kernarg size to 0, otherwise
set it to 256 bytes for code object version 5 (and beyond).
Reviewers: arsenm
Differential Revision: https://reviews.llvm.org/D123262
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 572df05d0c4ff..4810a6c43fd73 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -652,7 +652,8 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
return 16;
// Assume all implicit inputs are used by default
- return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", 56);
+ unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56;
+ return AMDGPU::getIntegerAttribute(F, "amdgpu-implicitarg-num-bytes", NBytes);
}
uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 581bb7233e505..92261d700446b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=5 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,COV5 %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
@@ -11,6 +12,8 @@
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x0
+
+; COV5: .amdhsa_kernarg_size 256
define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -31,6 +34,8 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 {
; HSA: s_load_dword s0, [[NULL]], 0x0
; MESA: s_load_dword s0, s[4:5], 0x0
+
+; COV5: .amdhsa_kernarg_size 0
define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -39,15 +44,18 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 {
}
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr_empty:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 48
; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x0
+
+; COV5: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -56,15 +64,18 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 {
}
; GCN-LABEL: {{^}}kernel_implicitarg_ptr:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 168
; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x1c
+
+; COV5: .amdhsa_kernarg_size 368
define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -73,15 +84,18 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 {
}
; GCN-LABEL: {{^}}opencl_kernel_implicitarg_ptr:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 160
; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
; HSA: s_load_dword s0, s[4:5], 0x1c
+
+; COV5: .amdhsa_kernarg_size 160
define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -124,6 +138,8 @@ define void @opencl_func_implicitarg_ptr() #0 {
; GCN: s_mov_b64 s[8:9], s[4:5]
; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 256
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
call void @func_implicitarg_ptr()
ret void
@@ -141,30 +157,36 @@ define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty() #0 {
; HSA: s_mov_b64 s[8:9], 0{{$}}
; MESA: s_mov_b64 s[8:9], s[4:5]{{$}}
; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 0
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func_empty_implicit0() #3 {
call void @func_implicitarg_ptr()
ret void
}
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func_empty:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 48
; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 16
; GCN: s_mov_b64 s[8:9], s[4:5]
; GCN-NOT: s4
; GCN-NOT: s5
; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 48
define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
call void @func_implicitarg_ptr()
ret void
}
; GCN-LABEL: {{^}}kernel_call_implicitarg_ptr_func:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 168
; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
@@ -173,21 +195,26 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func_empty() #1 {
; GCN: s_addc_u32 s9, s5, 0{{$}}
; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 368
define amdgpu_kernel void @kernel_call_implicitarg_ptr_func([112 x i8]) #0 {
call void @func_implicitarg_ptr()
ret void
}
; GCN-LABEL: {{^}}opencl_kernel_call_implicitarg_ptr_func:
-; GCN: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
; HSA: kernarg_segment_byte_size = 160
; HSA: kernarg_segment_alignment = 4
+; MESA: enable_sgpr_kernarg_segment_ptr = 1
; MESA: kernarg_segment_byte_size = 128
; MESA: kernarg_segment_alignment = 4
; GCN: s_add_u32 s8, s4, 0x70
; GCN: s_addc_u32 s9, s5, 0{{$}}
; GCN: s_swappc_b64
+
+; COV5: .amdhsa_kernarg_size 160
define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) #1 {
call void @func_implicitarg_ptr()
ret void
@@ -258,8 +285,11 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8])
; GCN-LABEL: {{^}}kernel_implicitarg_no_struct_align_padding:
; HSA: kernarg_segment_byte_size = 120
+; HSA: kernarg_segment_alignment = 6
; MESA: kernarg_segment_byte_size = 84
-; GCN: kernarg_segment_alignment = 6
+; MESA: kernarg_segment_alignment = 6
+
+; COV5: .amdhsa_kernarg_size 120
define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 {
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
%cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
@@ -267,7 +297,6 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
ret void
}
-
; HSA-LABEL: Kernels:
; HSA-LABEL: - Name: kernel_implicitarg_ptr_empty
; HSA: CodeProps:
@@ -318,6 +347,55 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
; HSA: KernargSegmentSize: 120
; HSA: KernargSegmentAlign: 64
+; COV5-LABEL: amdhsa.kernels:
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 256
+; COV5-LABEL: .name: kernel_implicitarg_ptr_empty
+
+; COV5: .kernarg_segment_align: 4
+; COV5-NEXT: .kernarg_segment_size: 0
+; COV5-LABEL: .name: kernel_implicitarg_ptr_empty_0implicit
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 48
+; COV5-LABEL: .name: opencl_kernel_implicitarg_ptr_empty
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 368
+; COV5-LABEL: .name: kernel_implicitarg_ptr
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 160
+; COV5-LABEL: .name: opencl_kernel_implicitarg_ptr
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 256
+; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func_empty
+
+; COV5: .kernarg_segment_align: 4
+; COV5-NEXT: .kernarg_segment_size: 0
+; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func_empty_implicit0
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 48
+; COV5-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func_empty
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 368
+; COV5-LABEL: .name: kernel_call_implicitarg_ptr_func
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 160
+; COV5-LABEL: .name: opencl_kernel_call_implicitarg_ptr_func
+
+; COV5: .kernarg_segment_align: 8
+; COV5-NEXT: .kernarg_segment_size: 368
+; COV5-LABEL: .name: kernel_call_kernarg_implicitarg_ptr_func
+
+; COV5: .kernarg_segment_align: 64
+; COV5-NEXT: .kernarg_segment_size: 120
+; COV5-LABEL: .name: kernel_implicitarg_no_struct_align_padding
+
declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2
declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2
More information about the llvm-commits
mailing list