[llvm] r316907 - [AMDGPU] Emit metadata for hidden arguments for kernel enqueue
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 30 07:30:28 PDT 2017
Author: yaxunl
Date: Mon Oct 30 07:30:28 2017
New Revision: 316907
URL: http://llvm.org/viewvc/llvm-project?rev=316907&view=rev
Log:
[AMDGPU] Emit metadata for hidden arguments for kernel enqueue
Identifies kernels which performs device side kernel enqueues and emit
metadata for the associated hidden kernel arguments. Such kernels are
marked with calls-enqueue-kernel function attribute by
AMDGPUOpenCLEnqueueKernelLowering pass and later on
hidden kernel arguments metadata HiddenDefaultQueue and
HiddenCompletionAction are emitted for them.
Differential Revision: https://reviews.llvm.org/D39255
Added:
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
Modified:
llvm/trunk/docs/AMDGPUUsage.rst
llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Mon Oct 30 07:30:28 2017
@@ -1039,10 +1039,10 @@ non-AMD key names should be prefixed by
passed in the kernarg.
"HiddenCompletionAction"
- *TBD*
-
- .. TODO
- Add description.
+ A global address space pointer
+ to help link enqueued kernels into
+ the ancestor tree for determining
+ when the parent kernel has finished.
"ValueType" string Required Kernel argument value type. Only
present if "ValueKind" is
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp Mon Oct 30 07:30:28 2017
@@ -25,12 +25,20 @@
// linkage does not work since optimization passes will try to replace loads
// of the global variable with its initialization value.
//
+// It also identifies the kernels directly or indirectly enqueues kernels
+// and adds "calls-enqueue-kernel" function attribute to them, which will
+// be used to determine whether to emit runtime metadata for the kernel
+// enqueue related hidden kernel arguments.
+//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/User.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,7 +74,22 @@ ModulePass* llvm::createAMDGPUOpenCLEnqu
return new AMDGPUOpenCLEnqueuedBlockLowering();
}
+/// Collect direct or indrect callers of \p F and save them
+/// to \p Callers.
+static void collectCallers(Function *F, DenseSet<Function *> &Callers) {
+ for (auto U : F->users()) {
+ if (auto *CI = dyn_cast<CallInst>(&*U)) {
+ auto *Caller = CI->getParent()->getParent();
+ if (Callers.count(Caller))
+ continue;
+ Callers.insert(Caller);
+ collectCallers(Caller, Callers);
+ }
+ }
+}
+
bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
+ DenseSet<Function *> Callers;
auto &C = M.getContext();
auto AS = AMDGPU::getAMDGPUAS(M);
bool Changed = false;
@@ -91,8 +114,23 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::
AddrCast->replaceAllUsesWith(NewPtr);
F.addFnAttr("runtime-handle", RuntimeHandle);
F.setLinkage(GlobalValue::ExternalLinkage);
+
+ // Collect direct or indirect callers of enqueue_kernel.
+ for (auto U : NewPtr->users()) {
+ if (auto *I = dyn_cast<Instruction>(&*U)) {
+ auto *F = I->getParent()->getParent();
+ Callers.insert(F);
+ collectCallers(F, Callers);
+ }
+ }
Changed = true;
}
}
+
+ for (auto F : Callers) {
+ if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL)
+ continue;
+ F->addFnAttr("calls-enqueue-kernel");
+ }
return Changed;
}
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp Mon Oct 30 07:30:28 2017
@@ -266,12 +266,21 @@ void MetadataStreamer::emitKernelArgs(co
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
- if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
- return;
-
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
AMDGPUASI.GLOBAL_ADDRESS);
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+ auto CallsPrintf = Func.getParent()->getNamedMetadata("llvm.printf.fmts");
+ if (CallsPrintf)
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ if (!CallsPrintf) {
+ // Emit a dummy argument so that the remaining hidden arguments
+ // have a fixed position relative to the first hidden argument.
+ // This is to facilitate library code to access hidden arguments.
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ }
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
+ }
}
void MetadataStreamer::emitKernelArg(const Argument &Arg) {
Modified: llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/enqueue-kernel.ll Mon Oct 30 07:30:28 2017
@@ -9,7 +9,21 @@ target triple = "amdgcn-amdhsa-amd-openc
%struct.ndrange_t = type { i32 }
%opencl.queue_t = type opaque
-define amdgpu_kernel void @test(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space
+define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+ !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]]
+define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
+ !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
+ call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
+ ret void
+}
+
+; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]]
+define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
entry:
%block = alloca <{ i32, i32, i8 addrspace(4)*, i8 addrspace(1)*, i8 }>, align 8
@@ -77,6 +91,7 @@ entry:
ret void
}
+; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle"
; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle"
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll?rev=316907&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll Mon Oct 30 07:30:28 2017
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+; CHECK-NOT: Printf:
+; CHECK: Kernels:
+
+; CHECK: - Name: test_non_enqueue_kernel_caller
+; CHECK-NEXT: SymbolName: 'test_non_enqueue_kernel_caller at kd'
+; CHECK-NEXT: Language: OpenCL C
+; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - TypeName: char
+; CHECK-NEXT: Size: 1
+; CHECK-NEXT: Align: 1
+; CHECK-NEXT: ValueKind: ByValue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AccQual: Default
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NOT: ValueKind: HiddenNone
+; CHECK-NOT: ValueKind: HiddenDefaultQueue
+; CHECK-NOT: ValueKind: HiddenCompletionAction
+define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: - Name: test_enqueue_kernel_caller
+; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller at kd'
+; CHECK-NEXT: Language: OpenCL C
+; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - TypeName: char
+; CHECK-NEXT: Size: 1
+; CHECK-NEXT: Align: 1
+; CHECK-NEXT: ValueKind: ByValue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AccQual: Default
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenDefaultQueue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenCompletionAction
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+attributes #0 = { "calls-enqueue-kernel" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll?rev=316907&r1=316906&r2=316907&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll Mon Oct 30 07:30:28 2017
@@ -51,6 +51,8 @@
; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
; CHECK-NEXT: ValueType: I8
; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NOT: ValueKind: HiddenDefaultQueue
+; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_char(i8 %a)
!kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
!kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
@@ -1267,7 +1269,52 @@ define amdgpu_kernel void @__test_block_
ret void
}
+; CHECK: - Name: test_enqueue_kernel_caller
+; CHECK-NEXT: SymbolName: 'test_enqueue_kernel_caller at kd'
+; CHECK-NEXT: Language: OpenCL C
+; CHECK-NEXT: LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT: Args:
+; CHECK-NEXT: - TypeName: char
+; CHECK-NEXT: Size: 1
+; CHECK-NEXT: Align: 1
+; CHECK-NEXT: ValueKind: ByValue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AccQual: Default
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenPrintfBuffer
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenDefaultQueue
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenCompletionAction
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
+ !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+ ret void
+}
+
attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
+attributes #1 = { "calls-enqueue-kernel" }
!llvm.printf.fmts = !{!100, !101}
More information about the llvm-commits
mailing list