[PATCH] D140642: AMDGPU: Use more accurate IR type for block handle
Matt Arsenault via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 23 15:23:21 PST 2022
arsenm created this revision.
arsenm added reviewers: yaxunl, rampitec, AMDGPU.
Herald added subscribers: kosarev, foad, kerbowa, hiraditya, tpr, dstuttard, jvesely, kzhuravl.
Herald added a project: All.
arsenm requested review of this revision.
Herald added a subscriber: wdng.
Herald added a project: LLVM.
The device library uses this as a struct with a pointer sized integer
and 2 ints.
https://reviews.llvm.org/D140642
Files:
llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
Index: llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -109,11 +109,11 @@
attributes #0 = { "enqueued-block" }
;.
; CHECK: @[[KERNEL_ADDRESS_USER:[a-zA-Z0-9_$"\\.-]+]] = global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @block_has_used_kernel_address.runtime_handle to ptr)]
-; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
;.
; CHECK-LABEL: define {{[^@]+}}@non_caller
; CHECK-SAME: (ptr addrspace(1) [[A:%.*]], i8 [[B:%.*]], ptr addrspace(1) [[C:%.*]], i64 [[D:%.*]]) {
Index: llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -70,6 +70,10 @@
bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
auto &C = M.getContext();
bool Changed = false;
+
+ // ptr kernel_object, i32 private_segment_size, i32 group_segment_size
+ StructType *HandleTy = nullptr;
+
for (auto &F : M.functions()) {
if (F.hasFnAttribute("enqueued-block")) {
if (!F.hasName()) {
@@ -80,11 +84,17 @@
}
LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
- auto T = ArrayType::get(Type::getInt64Ty(C), 2);
+ if (!HandleTy) {
+ Type *Int32 = Type::getInt32Ty(C);
+ HandleTy = StructType::create(
+ C, {Type::getInt8Ty(C)->getPointerTo(0), Int32, Int32},
+ "block.runtime.handle.t");
+ }
+
auto *GV = new GlobalVariable(
- M, T,
+ M, HandleTy,
/*isConstant=*/false, GlobalValue::ExternalLinkage,
- /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
+ /*Initializer=*/Constant::getNullValue(HandleTy), RuntimeHandle,
/*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
AMDGPUAS::GLOBAL_ADDRESS,
/*isExternallyInitialized=*/false);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D140642.485168.patch
Type: text/x-patch
Size: 3479 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221223/2ef150d5/attachment.bin>
More information about the llvm-commits
mailing list