[llvm] r329341 - AMDGPU/Metadata: Always report a fixed number of hidden arguments
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 5 13:46:04 PDT 2018
Author: kzhuravl
Date: Thu Apr 5 13:46:04 2018
New Revision: 329341
URL: http://llvm.org/viewvc/llvm-project?rev=329341&view=rev
Log:
AMDGPU/Metadata: Always report a fixed number of hidden arguments
Currently it is 6. If the "feature" was not used, report dummy
hidden argument. Otherwise it does not match the kernarg size
reported in the kernel header.
Differential Revision: https://reviews.llvm.org/D45129
Added:
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp?rev=329341&r1=329340&r2=329341&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUHSAMetadataStreamer.cpp Thu Apr 5 13:46:04 2018
@@ -268,18 +268,22 @@ void MetadataStreamer::emitKernelArgs(co
auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
AMDGPUASI.GLOBAL_ADDRESS);
- auto CallsPrintf = Func.getParent()->getNamedMetadata("llvm.printf.fmts");
- if (CallsPrintf)
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+ else
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel is
+ // used, otherwise emit dummy "none" arguments.
if (Func.hasFnAttribute("calls-enqueue-kernel")) {
- if (!CallsPrintf) {
- // Emit a dummy argument so that the remaining hidden arguments
- // have a fixed position relative to the first hidden argument.
- // This is to facilitate library code to access hidden arguments.
- emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
- }
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
+ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
}
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll?rev=329341&r1=329340&r2=329341&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel.ll Thu Apr 5 13:46:04 2018
@@ -30,7 +30,6 @@
; CHECK-NEXT: Align: 8
; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
; CHECK-NEXT: ValueType: I64
-; CHECK-NOT: ValueKind: HiddenNone
; CHECK-NOT: ValueKind: HiddenDefaultQueue
; CHECK-NOT: ValueKind: HiddenCompletionAction
define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll?rev=329341&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll Thu Apr 5 13:46:04 2018
@@ -0,0 +1,70 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+; CHECK: Kernels:
+
+; CHECK: - Name: test
+; CHECK: SymbolName: 'test at kd'
+; CHECK: Args:
+; CHECK-NEXT: - Name: r
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: a
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Name: b
+; CHECK-NEXT: Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: GlobalBuffer
+; CHECK-NEXT: ValueType: F16
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ
+; CHECK-NEXT: ValueType: I64
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+; CHECK-NEXT: - Size: 8
+; CHECK-NEXT: Align: 8
+; CHECK-NEXT: ValueKind: HiddenNone
+; CHECK-NEXT: ValueType: I8
+; CHECK-NEXT: AddrSpaceQual: Global
+define amdgpu_kernel void @test(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+!opencl.ocl.version = !{!0}
+!0 = !{i32 2, i32 0}
More information about the llvm-commits
mailing list