[llvm] r327291 - [AMDGPU] Fix lowering enqueue kernel when kernel has no name

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 12 09:34:06 PDT 2018


Author: yaxunl
Date: Mon Mar 12 09:34:06 2018
New Revision: 327291

URL: http://llvm.org/viewvc/llvm-project?rev=327291&view=rev
Log:
[AMDGPU] Fix lowering enqueue kernel when kernel has no name

Since the enqueued kernels have internal linkage, their names may be dropped.
In this case, give them unique names __amdgpu_enqueued_kernel or
__amdgpu_enqueued_kernel.n where n is a sequential number starting from 1.

Differential Revision: https://reviews.llvm.org/D44322

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp?rev=327291&r1=327290&r2=327291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp Mon Mar 12 09:34:06 2018
@@ -37,6 +37,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/User.h"
 #include "llvm/Pass.h"
@@ -94,18 +95,25 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::
   bool Changed = false;
   for (auto &F : M.functions()) {
     if (F.hasFnAttribute("enqueued-block")) {
+      if (!F.hasName()) {
+        SmallString<64> Name;
+        Mangler::getNameWithPrefix(Name, "__amdgpu_enqueued_kernel",
+                                   M.getDataLayout());
+        F.setName(Name);
+      }
+      auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
+      auto *GV = new GlobalVariable(
+          M, Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS),
+          /*IsConstant=*/true, GlobalValue::ExternalLinkage,
+          /*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr,
+          GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS,
+          /*IsExternallyInitialized=*/true);
+      DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
+
       for (auto U : F.users()) {
         if (!isa<ConstantExpr>(&*U))
           continue;
         auto *BitCast = cast<ConstantExpr>(&*U);
-        auto RuntimeHandle = (F.getName() + "_runtime_handle").str();
-        auto *GV = new GlobalVariable(
-            M, Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS),
-            /*IsConstant=*/true, GlobalValue::ExternalLinkage,
-            /*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr,
-            GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS,
-            /*IsExternallyInitialized=*/true);
-        DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
         auto *NewPtr = ConstantExpr::getPointerCast(GV, BitCast->getType());
         BitCast->replaceAllUsesWith(NewPtr);
         F.addFnAttr("runtime-handle", RuntimeHandle);

Modified: llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll?rev=327291&r1=327290&r2=327291&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll Mon Mar 12 09:34:06 2018
@@ -1,25 +1,41 @@
 ; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s
 
-; CHECK: @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
-; CHECK: @__test_block_invoke_2_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
+; CHECK: @__test_block_invoke_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
+; CHECK: @__test_block_invoke_2_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
+; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
+; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
 
 %struct.ndrange_t = type { i32 }
 %opencl.queue_t = type opaque
 
-; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space
+; CHECK-LABEL: define amdgpu_kernel void @non_caller
+; CHECK-NOT: #{{[0-9]+}}
 define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
   !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]]
+; CHECK-LABEL: define amdgpu_kernel void @caller_indirect
+; CHECK-SAME: #[[AT_CALLER:[0-9]+]]
 define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
   !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
   call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
   ret void
 }
 
-; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]]
+; CHECK-LABEL: define amdgpu_kernel void @caller
+; CHECK-SAME: #[[AT_CALLER]]
+; CHECK-NOT: @__test_block_invoke_kernel
+; CHECK-NOT: @__test_block_invoke_2_kernel
+; CHECK-NOT: @__amdgpu_enqueued_kernel
+; CHECK-NOT: @__amdgpu_enqueued_kernel.1
+; CHECK-NOT: @0
+; CHECK-NOT: @1
+; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle
+; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle
+; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.runtime_handle
+; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.1.runtime_handle
+; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_2_kernel.runtime_handle
 define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
   !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
 entry:
@@ -41,6 +57,10 @@ entry:
     i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
   %tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
     i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
+  %tmp11 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
+    i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @0 to i8*), i8* nonnull %tmp4) #2
+  %tmp12 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
+    i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @1 to i8*), i8* nonnull %tmp4) #2
   %block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0
   store i32 41, i32 addrspace(5)* %block.size4, align 8
   %block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1
@@ -60,7 +80,8 @@ entry:
   ret void
 }
 
-; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]]
+; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel
+; CHECK-SAME: #[[AT1:[0-9]+]]
 define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
   !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
 entry:
@@ -72,7 +93,8 @@ entry:
 
 declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr
 
-; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]]
+; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel
+; CHECK-SAME: #[[AT2:[0-9]+]]
 define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*,
   i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15
   !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
@@ -86,9 +108,25 @@ entry:
   ret void
 }
 
+; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel
+; CHECK-SAME: #[[AT3:[0-9]+]]
+define internal amdgpu_kernel void @0(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
+  !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
+  ret void
+}
+
+; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel.1
+; CHECK-SAME: #[[AT4:[0-9]+]]
+define internal amdgpu_kernel void @1(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
+  !kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
+  ret void
+}
+
 ; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
-; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle"
-; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle"
+; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel.runtime_handle"
+; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel.runtime_handle"
+; CHECK: attributes #[[AT3]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle"
+; CHECK: attributes #[[AT4]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle"
 
 attributes #0 = { "enqueued-block" }
 




More information about the llvm-commits mailing list