[llvm] 47554a0 - AMDGPU: Use more accurate IR type for block handle
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 6 18:31:06 PST 2023
Author: Matt Arsenault
Date: 2023-01-06T21:23:28-05:00
New Revision: 47554a0c73ee703362d913d3a149f104819c8365
URL: https://github.com/llvm/llvm-project/commit/47554a0c73ee703362d913d3a149f104819c8365
DIFF: https://github.com/llvm/llvm-project/commit/47554a0c73ee703362d913d3a149f104819c8365.diff
LOG: AMDGPU: Use more accurate IR type for block handle
The device library uses this as a struct with a pointer sized integer
and 2 ints.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 971be5764913..f242cbc494e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -70,6 +70,10 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() {
bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
auto &C = M.getContext();
bool Changed = false;
+
+ // ptr kernel_object, i32 private_segment_size, i32 group_segment_size
+ StructType *HandleTy = nullptr;
+
for (auto &F : M.functions()) {
if (F.hasFnAttribute("enqueued-block")) {
if (!F.hasName()) {
@@ -80,11 +84,17 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
}
LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
- auto T = ArrayType::get(Type::getInt64Ty(C), 2);
+ if (!HandleTy) {
+ Type *Int32 = Type::getInt32Ty(C);
+ HandleTy = StructType::create(
+ C, {Type::getInt8Ty(C)->getPointerTo(0), Int32, Int32},
+ "block.runtime.handle.t");
+ }
+
auto *GV = new GlobalVariable(
- M, T,
+ M, HandleTy,
/*isConstant=*/false, GlobalValue::ExternalLinkage,
- /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
+ /*Initializer=*/Constant::getNullValue(HandleTy), RuntimeHandle,
/*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
AMDGPUAS::GLOBAL_ADDRESS,
/*isExternallyInitialized=*/false);
diff --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
index 13ed69597064..c4549286753e 100644
--- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -109,11 +109,11 @@ define internal amdgpu_kernel void @1(<{ i32, i32, ptr addrspace(1), i8 }> %arg)
attributes #0 = { "enqueued-block" }
;.
; CHECK: @[[KERNEL_ADDRESS_USER:[a-zA-Z0-9_$"\\.-]+]] = global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @block_has_used_kernel_address.runtime_handle to ptr)]
-; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
;.
; CHECK-LABEL: define {{[^@]+}}@non_caller
; CHECK-SAME: (ptr addrspace(1) [[A:%.*]], i8 [[B:%.*]], ptr addrspace(1) [[C:%.*]], i64 [[D:%.*]]) {
More information about the llvm-commits
mailing list