[llvm] 47554a0 - AMDGPU: Use more accurate IR type for block handle

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 6 18:31:06 PST 2023


Author: Matt Arsenault
Date: 2023-01-06T21:23:28-05:00
New Revision: 47554a0c73ee703362d913d3a149f104819c8365

URL: https://github.com/llvm/llvm-project/commit/47554a0c73ee703362d913d3a149f104819c8365
DIFF: https://github.com/llvm/llvm-project/commit/47554a0c73ee703362d913d3a149f104819c8365.diff

LOG: AMDGPU: Use more accurate IR type for block handle

The device library uses this as a struct with a pointer sized integer
and 2 ints.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
    llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 971be5764913..f242cbc494e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -70,6 +70,10 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() {
 bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
   auto &C = M.getContext();
   bool Changed = false;
+
+  // ptr kernel_object, i32 private_segment_size, i32 group_segment_size
+  StructType *HandleTy = nullptr;
+
   for (auto &F : M.functions()) {
     if (F.hasFnAttribute("enqueued-block")) {
       if (!F.hasName()) {
@@ -80,11 +84,17 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
       }
       LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
       auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
-      auto T = ArrayType::get(Type::getInt64Ty(C), 2);
+      if (!HandleTy) {
+        Type *Int32 = Type::getInt32Ty(C);
+        HandleTy = StructType::create(
+            C, {Type::getInt8Ty(C)->getPointerTo(0), Int32, Int32},
+            "block.runtime.handle.t");
+      }
+
       auto *GV = new GlobalVariable(
-          M, T,
+          M, HandleTy,
           /*isConstant=*/false, GlobalValue::ExternalLinkage,
-          /*Initializer=*/Constant::getNullValue(T), RuntimeHandle,
+          /*Initializer=*/Constant::getNullValue(HandleTy), RuntimeHandle,
           /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
           AMDGPUAS::GLOBAL_ADDRESS,
           /*isExternallyInitialized=*/false);

diff  --git a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
index 13ed69597064..c4549286753e 100644
--- a/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -109,11 +109,11 @@ define internal amdgpu_kernel void @1(<{ i32, i32, ptr addrspace(1), i8 }> %arg)
 attributes #0 = { "enqueued-block" }
 ;.
 ; CHECK: @[[KERNEL_ADDRESS_USER:[a-zA-Z0-9_$"\\.-]+]] = global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @block_has_used_kernel_address.runtime_handle to ptr)]
-; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
-; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__TEST_BLOCK_INVOKE_2_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[BLOCK_HAS_USED_KERNEL_ADDRESS_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
+; CHECK: @[[__AMDGPU_ENQUEUED_KERNEL_1_RUNTIME_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = addrspace(1) global [[BLOCK_RUNTIME_HANDLE_T:%.*]] zeroinitializer
 ;.
 ; CHECK-LABEL: define {{[^@]+}}@non_caller
 ; CHECK-SAME: (ptr addrspace(1) [[A:%.*]], i8 [[B:%.*]], ptr addrspace(1) [[C:%.*]], i64 [[D:%.*]]) {


        


More information about the llvm-commits mailing list