[PATCH] D48094: [AMDGPU] Change enqueue kernel handle type

Yaxun Liu via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 12 14:04:43 PDT 2018


yaxunl created this revision.
yaxunl added a reviewer: brian.
Herald added subscribers: t-tye, tpr, dstuttard, nhaehnle, wdng, kzhuravl, arsenm.

Currently the handle type is a global pointer which holds 8 bytes.
We need a larger type which hold 16 bytes, therefore change it
to [i64 x 2].


https://reviews.llvm.org/D48094

Files:
  lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
  test/CodeGen/AMDGPU/enqueue-kernel.ll


Index: test/CodeGen/AMDGPU/enqueue-kernel.ll
===================================================================
--- test/CodeGen/AMDGPU/enqueue-kernel.ll
+++ test/CodeGen/AMDGPU/enqueue-kernel.ll
@@ -1,9 +1,9 @@
 ; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s
 
-; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
-; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
-; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global i8 addrspace(1)* null
-; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global i8 addrspace(1)* null
+; CHECK: @__test_block_invoke_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @__test_block_invoke_2_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
+; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = addrspace(1) global [2 x i64] zeroinitializer
 
 %struct.ndrange_t = type { i32 }
 %opencl.queue_t = type opaque
@@ -84,7 +84,7 @@
 ; CHECK-LABEL: define amdgpu_kernel void @inlined_caller
 ; CHECK-SAME: #[[AT_CALLER]]
 ; CHECK-NOT: @__test_block_invoke_kernel
-; CHECK: load i64, i64 addrspace(1)* bitcast (i8 addrspace(1)* addrspace(1)* @__test_block_invoke_kernel.runtime_handle to i64 addrspace(1)*)
+; CHECK: load i64, i64 addrspace(1)* getelementptr inbounds ([2 x i64], [2 x i64] addrspace(1)* @__test_block_invoke_kernel.runtime_handle, i32 0, i32 0)
 define amdgpu_kernel void @inlined_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
   !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
 entry:
Index: lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -36,6 +36,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
@@ -116,7 +117,7 @@
       }
       LLVM_DEBUG(dbgs() << "found enqueued kernel: " << F.getName() << '\n');
       auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
-      auto T = Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS);
+      auto T = ArrayType::get(Type::getInt64Ty(C), 2);
       auto *GV = new GlobalVariable(
           M, T,
           /*IsConstant=*/false, GlobalValue::ExternalLinkage,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D48094.151022.patch
Type: text/x-patch
Size: 2778 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180612/fff0137f/attachment.bin>


More information about the llvm-commits mailing list