[llvm] r316907 - [AMDGPU] Emit metadata for hidden arguments for kernel enqueue

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 30 07:30:28 PDT 2017


Author: yaxunl
Date: Mon Oct 30 07:30:28 2017
New Revision: 316907

URL: http://llvm.org/viewvc/llvm-project?rev=316907&view=rev
Log:
[AMDGPU] Emit metadata for hidden arguments for kernel enqueue

Identifies kernels which performs device side kernel enqueues and emit
metadata for the associated hidden kernel arguments. Such kernels are
marked with calls-enqueue-kernel function attribute by
AMDGPUOpenCLEnqueueKernelLowering pass and later on
hidden kernel arguments metadata HiddenDefaultQueue and
HiddenCompletionAction are emitted for them.

Differential Revision: https://reviews.llvm.org/D39255

Added:
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
Modified:
    llvm/trunk/docs/AMDGPUUsage.rst
    llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
    llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll

Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Mon Oct 30 07:30:28 2017
@@ -1039,10 +1039,10 @@ non-AMD key names should be prefixed by
                                                   passed in the kernarg.
 
                                                 "HiddenCompletionAction"
-                                                  *TBD*
-
-                                                  .. TODO
-                                                     Add description.
+                                                  A global address space pointer
+                                                  to help link enqueued kernels into
+                                                  the ancestor tree for determining
+                                                  when the parent kernel has finished.
 
      "ValueType"       string         Required  Kernel argument value type. Only
                                                 present if "ValueKind" is

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp Mon Oct 30 07:30:28 2017
@@ -25,12 +25,20 @@
 // linkage does not work since optimization passes will try to replace loads
 // of the global variable with its initialization value.
 //
+// It also identifies the kernels directly or indirectly enqueues kernels
+// and adds "calls-enqueue-kernel" function attribute to them, which will
+// be used to determine whether to emit runtime metadata for the kernel
+// enqueue related hidden kernel arguments.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/User.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -66,7 +74,22 @@ ModulePass* llvm::createAMDGPUOpenCLEnqu
   return new AMDGPUOpenCLEnqueuedBlockLowering();
 }
 
+/// Collect direct or indrect callers of \p F and save them
+/// to \p Callers.
+static void collectCallers(Function *F, DenseSet<Function *> &Callers) {
+  for (auto U : F->users()) {
+    if (auto *CI = dyn_cast<CallInst>(&*U)) {
+      auto *Caller = CI->getParent()->getParent();
+      if (Callers.count(Caller))
+        continue;
+      Callers.insert(Caller);
+      collectCallers(Caller, Callers);
+    }
+  }
+}
+
 bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
+  DenseSet<Function *> Callers;
   auto &C = M.getContext();
   auto AS = AMDGPU::getAMDGPUAS(M);
   bool Changed = false;
@@ -91,8 +114,23 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::
       AddrCast->replaceAllUsesWith(NewPtr);
       F.addFnAttr("runtime-handle", RuntimeHandle);
       F.setLinkage(GlobalValue::ExternalLinkage);
+
+      // Collect direct or indirect callers of enqueue_kernel.
+      for (auto U : NewPtr->users()) {
+        if (auto *I = dyn_cast<Instruction>(&*U)) {
+          auto *F = I->getParent()->getParent();
+          Callers.insert(F);
+          collectCallers(F, Callers);
+        }
+      }
       Changed = true;
     }
   }
+
+  for (auto F : Callers) {
+    if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL)
+      continue;
+    F->addFnAttr("calls-enqueue-kernel");
+  }
   return Changed;
 }

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp Mon Oct 30 07:30:28 2017
@@ -266,12 +266,21 @@ void MetadataStreamer::emitKernelArgs(co
   emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
   emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
 
-  if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
-    return;
-
   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
                                       AMDGPUASI.GLOBAL_ADDRESS);
-  emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+  auto CallsPrintf = Func.getParent()->getNamedMetadata("llvm.printf.fmts");
+  if (CallsPrintf)
+    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+  if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+    if (!CallsPrintf) {
+      // Emit a dummy argument so that the remaining hidden arguments
+      // have a fixed position relative to the first hidden argument.
+      // This is to facilitate library code to access hidden arguments.
+      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+    }
+    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
+    emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
+  }
 }
 
 void MetadataStreamer::emitKernelArg(const Argument &Arg) {

Modified: llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll Mon Oct 30 07:30:28 2017
@@ -9,7 +9,21 @@ target triple = "amdgcn-amdhsa-amd-openc
 %struct.ndrange_t = type { i32 }
 %opencl.queue_t = type opaque
 
-define amdgpu_kernel void @test(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space
+define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+  !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]]
+define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+  !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+  call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
+  ret void
+}
+
+; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]]
+define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
   !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
 entry:
   %block = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8
@@ -77,6 +91,7 @@ entry:
   ret void
 }
 
+; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
 ; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle"
 ; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle"
 

Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll?rev=316907&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll Mon Oct 30 07:30:28 2017
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK: ---
+; CHECK:  Version: [ 1, 0 ]
+; CHECK-NOT:  Printf:
+; CHECK:  Kernels:
+
+; CHECK:      - Name:            test_non_enqueue_kernel_caller
+; CHECK-NEXT:   SymbolName:      'test_non_enqueue_kernel_caller at kd'
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - TypeName:      char
+; CHECK-NEXT:       Size:          1
+; CHECK-NEXT:       Align:         1
+; CHECK-NEXT:       ValueKind:     ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NOT:        ValueKind:     HiddenNone
+; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
+; CHECK-NOT:        ValueKind:     HiddenCompletionAction
+define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_enqueue_kernel_caller
+; CHECK-NEXT:   SymbolName:      'test_enqueue_kernel_caller at kd'
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - TypeName:      char
+; CHECK-NEXT:       Size:          1
+; CHECK-NEXT:       Align:         1
+; CHECK-NEXT:       ValueKind:     ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenNone
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenDefaultQueue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenCompletionAction
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+attributes #0 = { "calls-enqueue-kernel" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll Mon Oct 30 07:30:28 2017
@@ -51,6 +51,8 @@
 ; CHECK-NEXT:       ValueKind:     HiddenPrintfBuffer
 ; CHECK-NEXT:       ValueType:     I8
 ; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
+; CHECK-NOT:        ValueKind:     HiddenCompletionAction
 define amdgpu_kernel void @test_char(i8 %a)
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
@@ -1267,7 +1269,52 @@ define amdgpu_kernel void @__test_block_
   ret void
 }
 
+; CHECK:      - Name:            test_enqueue_kernel_caller
+; CHECK-NEXT:   SymbolName:      'test_enqueue_kernel_caller at kd'
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - TypeName:      char
+; CHECK-NEXT:       Size:          1
+; CHECK-NEXT:       Align:         1
+; CHECK-NEXT:       ValueKind:     ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenDefaultQueue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenCompletionAction
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
+    !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+  ret void
+}
+
 attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
+attributes #1 = { "calls-enqueue-kernel" }
 
 !llvm.printf.fmts = !{!100, !101}
 




More information about the llvm-commits mailing list