[llvm] r348963 - [AMDGPU] Emit MessagePack HSA Metadata for v3 code object
Scott Linder via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 12 11:39:27 PST 2018
Author: scott.linder
Date: Wed Dec 12 11:39:27 2018
New Revision: 348963
URL: http://llvm.org/viewvc/llvm-project?rev=348963&view=rev
Log:
[AMDGPU] Emit MessagePack HSA Metadata for v3 code object
Continue to present HSA metadata as YAML in ASM and when output by tools
(e.g. llvm-readobj), but encode it in Messagepack in the code object.
Differential Revision: https://reviews.llvm.org/D48179
Added:
llvm/trunk/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
llvm/trunk/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
Modified:
llvm/trunk/include/llvm/BinaryFormat/ELF.h
llvm/trunk/include/llvm/Support/AMDGPUMetadata.h
llvm/trunk/lib/BinaryFormat/CMakeLists.txt
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/LLVMBuild.txt
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll
llvm/trunk/test/MC/AMDGPU/hsa-v3.s
llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
Added: llvm/trunk/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h?rev=348963&view=auto
==============================================================================
--- llvm/trunk/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h (added)
+++ llvm/trunk/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h Wed Dec 12 11:39:27 2018
@@ -0,0 +1,70 @@
+//===- AMDGPUMetadataVerifier.h - MsgPack Types -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This is a verifier for AMDGPU HSA metadata, which can verify both
+/// well-typed metadata and untyped metadata. When verifying in the non-strict
+/// mode, untyped metadata is coerced into the correct type if possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
+#define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
+
+#include "llvm/BinaryFormat/MsgPackTypes.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAMD {
+namespace V3 {
+
+/// Verifier for AMDGPU HSA metadata.
+///
+/// Operates in two modes:
+///
+/// In strict mode, metadata must already be well-typed.
+///
+/// In non-strict mode, metadata is coerced into expected types when possible.
+class MetadataVerifier {
+ bool Strict;
+
+ bool verifyScalar(msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
+ bool verifyInteger(msgpack::Node &Node);
+ bool verifyArray(msgpack::Node &Node,
+ function_ref<bool(msgpack::Node &)> verifyNode,
+ Optional<size_t> Size = None);
+ bool verifyEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ function_ref<bool(msgpack::Node &)> verifyNode);
+ bool
+ verifyScalarEntry(msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue = {});
+ bool verifyIntegerEntry(msgpack::MapNode &MapNode, StringRef Key,
+ bool Required);
+ bool verifyKernelArgs(msgpack::Node &Node);
+ bool verifyKernel(msgpack::Node &Node);
+
+public:
+ /// Construct a MetadataVerifier, specifying whether it will operate in \p
+ /// Strict mode.
+ MetadataVerifier(bool Strict) : Strict(Strict) {}
+
+ /// Verify given HSA metadata.
+ ///
+ /// \returns True when successful, false when metadata is invalid.
+ bool verify(msgpack::Node &HSAMetadataRoot);
+};
+
+} // end namespace V3
+} // end namespace HSAMD
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
Modified: llvm/trunk/include/llvm/BinaryFormat/ELF.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/BinaryFormat/ELF.h?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/include/llvm/BinaryFormat/ELF.h (original)
+++ llvm/trunk/include/llvm/BinaryFormat/ELF.h Wed Dec 12 11:39:27 2018
@@ -1361,7 +1361,7 @@ enum {
GNU_PROPERTY_X86_FEATURE_1_SHSTK = 1 << 1
};
-// AMDGPU specific notes.
+// AMD specific notes. (Code Object V2)
enum {
// Note types with values between 0 and 9 (inclusive) are reserved.
NT_AMD_AMDGPU_HSA_METADATA = 10,
@@ -1369,6 +1369,12 @@ enum {
NT_AMD_AMDGPU_PAL_METADATA = 12
};
+// AMDGPU specific notes. (Code Object V3)
+enum {
+ // Note types with values between 0 and 31 (inclusive) are reserved.
+ NT_AMDGPU_METADATA = 32
+};
+
enum {
GNU_ABI_TAG_LINUX = 0,
GNU_ABI_TAG_HURD = 1,
Modified: llvm/trunk/include/llvm/Support/AMDGPUMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/AMDGPUMetadata.h?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/AMDGPUMetadata.h (original)
+++ llvm/trunk/include/llvm/Support/AMDGPUMetadata.h Wed Dec 12 11:39:27 2018
@@ -431,6 +431,21 @@ std::error_code fromString(std::string S
/// Converts \p HSAMetadata to \p String.
std::error_code toString(Metadata HSAMetadata, std::string &String);
+//===----------------------------------------------------------------------===//
+// HSA metadata for v3 code object.
+//===----------------------------------------------------------------------===//
+namespace V3 {
+/// HSA metadata major version.
+constexpr uint32_t VersionMajor = 1;
+/// HSA metadata minor version.
+constexpr uint32_t VersionMinor = 0;
+
+/// HSA metadata beginning assembler directive.
+constexpr char AssemblerDirectiveBegin[] = ".amdgpu_metadata";
+/// HSA metadata ending assembler directive.
+constexpr char AssemblerDirectiveEnd[] = ".end_amdgpu_metadata";
+} // end namespace V3
+
} // end namespace HSAMD
//===----------------------------------------------------------------------===//
Added: llvm/trunk/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp?rev=348963&view=auto
==============================================================================
--- llvm/trunk/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp (added)
+++ llvm/trunk/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp Wed Dec 12 11:39:27 2018
@@ -0,0 +1,324 @@
+//===- AMDGPUMetadataVerifier.cpp - MsgPack Types ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implements a verifier for AMDGPU HSA metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/Support/AMDGPUMetadata.h"
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAMD {
+namespace V3 {
+
+bool MetadataVerifier::verifyScalar(
+ msgpack::Node &Node, msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
+ auto ScalarPtr = dyn_cast<msgpack::ScalarNode>(&Node);
+ if (!ScalarPtr)
+ return false;
+ auto &Scalar = *ScalarPtr;
+ // Do not output extraneous tags for types we know from the spec.
+ Scalar.IgnoreTag = true;
+ if (Scalar.getScalarKind() != SKind) {
+ if (Strict)
+ return false;
+ // If we are not strict, we interpret string values as "implicitly typed"
+ // and attempt to coerce them to the expected type here.
+ if (Scalar.getScalarKind() != msgpack::ScalarNode::SK_String)
+ return false;
+ std::string StringValue = Scalar.getString();
+ Scalar.setScalarKind(SKind);
+ if (Scalar.inputYAML(StringValue) != StringRef())
+ return false;
+ }
+ if (verifyValue)
+ return verifyValue(Scalar);
+ return true;
+}
+
+bool MetadataVerifier::verifyInteger(msgpack::Node &Node) {
+ if (!verifyScalar(Node, msgpack::ScalarNode::SK_UInt))
+ if (!verifyScalar(Node, msgpack::ScalarNode::SK_Int))
+ return false;
+ return true;
+}
+
+bool MetadataVerifier::verifyArray(
+ msgpack::Node &Node, function_ref<bool(msgpack::Node &)> verifyNode,
+ Optional<size_t> Size) {
+ auto ArrayPtr = dyn_cast<msgpack::ArrayNode>(&Node);
+ if (!ArrayPtr)
+ return false;
+ auto &Array = *ArrayPtr;
+ if (Size && Array.size() != *Size)
+ return false;
+ for (auto &Item : Array)
+ if (!verifyNode(*Item.get()))
+ return false;
+
+ return true;
+}
+
+bool MetadataVerifier::verifyEntry(
+ msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ function_ref<bool(msgpack::Node &)> verifyNode) {
+ auto Entry = MapNode.find(Key);
+ if (Entry == MapNode.end())
+ return !Required;
+ return verifyNode(*Entry->second.get());
+}
+
+bool MetadataVerifier::verifyScalarEntry(
+ msgpack::MapNode &MapNode, StringRef Key, bool Required,
+ msgpack::ScalarNode::ScalarKind SKind,
+ function_ref<bool(msgpack::ScalarNode &)> verifyValue) {
+ return verifyEntry(MapNode, Key, Required, [=](msgpack::Node &Node) {
+ return verifyScalar(Node, SKind, verifyValue);
+ });
+}
+
+bool MetadataVerifier::verifyIntegerEntry(msgpack::MapNode &MapNode,
+ StringRef Key, bool Required) {
+ return verifyEntry(MapNode, Key, Required, [this](msgpack::Node &Node) {
+ return verifyInteger(Node);
+ });
+}
+
+bool MetadataVerifier::verifyKernelArgs(msgpack::Node &Node) {
+ auto ArgsMapPtr = dyn_cast<msgpack::MapNode>(&Node);
+ if (!ArgsMapPtr)
+ return false;
+ auto &ArgsMap = *ArgsMapPtr;
+
+ if (!verifyScalarEntry(ArgsMap, ".name", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".type_name", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyIntegerEntry(ArgsMap, ".size", true))
+ return false;
+ if (!verifyIntegerEntry(ArgsMap, ".offset", true))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".value_kind", true,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("by_value", true)
+ .Case("global_buffer", true)
+ .Case("dynamic_shared_pointer", true)
+ .Case("sampler", true)
+ .Case("image", true)
+ .Case("pipe", true)
+ .Case("queue", true)
+ .Case("hidden_global_offset_x", true)
+ .Case("hidden_global_offset_y", true)
+ .Case("hidden_global_offset_z", true)
+ .Case("hidden_none", true)
+ .Case("hidden_printf_buffer", true)
+ .Case("hidden_default_queue", true)
+ .Case("hidden_completion_action", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".value_type", true,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("struct", true)
+ .Case("i8", true)
+ .Case("u8", true)
+ .Case("i16", true)
+ .Case("u16", true)
+ .Case("f16", true)
+ .Case("i32", true)
+ .Case("u32", true)
+ .Case("f32", true)
+ .Case("i64", true)
+ .Case("u64", true)
+ .Case("f64", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyIntegerEntry(ArgsMap, ".pointee_align", false))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".address_space", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("private", true)
+ .Case("global", true)
+ .Case("constant", true)
+ .Case("local", true)
+ .Case("generic", true)
+ .Case("region", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".access", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("read_only", true)
+ .Case("write_only", true)
+ .Case("read_write", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".actual_access", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("read_only", true)
+ .Case("write_only", true)
+ .Case("read_write", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_const", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_restrict", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_volatile", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+ if (!verifyScalarEntry(ArgsMap, ".is_pipe", false,
+ msgpack::ScalarNode::SK_Boolean))
+ return false;
+
+ return true;
+}
+
+bool MetadataVerifier::verifyKernel(msgpack::Node &Node) {
+ auto KernelMapPtr = dyn_cast<msgpack::MapNode>(&Node);
+ if (!KernelMapPtr)
+ return false;
+ auto &KernelMap = *KernelMapPtr;
+
+ if (!verifyScalarEntry(KernelMap, ".name", true,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".symbol", true,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".language", false,
+ msgpack::ScalarNode::SK_String,
+ [](msgpack::ScalarNode &SNode) {
+ return StringSwitch<bool>(SNode.getString())
+ .Case("OpenCL C", true)
+ .Case("OpenCL C++", true)
+ .Case("HCC", true)
+ .Case("HIP", true)
+ .Case("OpenMP", true)
+ .Case("Assembler", true)
+ .Default(false);
+ }))
+ return false;
+ if (!verifyEntry(
+ KernelMap, ".language_version", false, [this](msgpack::Node &Node) {
+ return verifyArray(
+ Node,
+ [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+ }))
+ return false;
+ if (!verifyEntry(KernelMap, ".args", false, [this](msgpack::Node &Node) {
+ return verifyArray(Node, [this](msgpack::Node &Node) {
+ return verifyKernelArgs(Node);
+ });
+ }))
+ return false;
+ if (!verifyEntry(KernelMap, ".reqd_workgroup_size", false,
+ [this](msgpack::Node &Node) {
+ return verifyArray(Node,
+ [this](msgpack::Node &Node) {
+ return verifyInteger(Node);
+ },
+ 3);
+ }))
+ return false;
+ if (!verifyEntry(KernelMap, ".workgroup_size_hint", false,
+ [this](msgpack::Node &Node) {
+ return verifyArray(Node,
+ [this](msgpack::Node &Node) {
+ return verifyInteger(Node);
+ },
+ 3);
+ }))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".vec_type_hint", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyScalarEntry(KernelMap, ".device_enqueue_symbol", false,
+ msgpack::ScalarNode::SK_String))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".group_segment_fixed_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".private_segment_fixed_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".kernarg_segment_align", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".wavefront_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".sgpr_count", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".vgpr_count", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".max_flat_workgroup_size", true))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".sgpr_spill_count", false))
+ return false;
+ if (!verifyIntegerEntry(KernelMap, ".vgpr_spill_count", false))
+ return false;
+
+ return true;
+}
+
+bool MetadataVerifier::verify(msgpack::Node &HSAMetadataRoot) {
+ auto RootMapPtr = dyn_cast<msgpack::MapNode>(&HSAMetadataRoot);
+ if (!RootMapPtr)
+ return false;
+ auto &RootMap = *RootMapPtr;
+
+ if (!verifyEntry(
+ RootMap, "amdhsa.version", true, [this](msgpack::Node &Node) {
+ return verifyArray(
+ Node,
+ [this](msgpack::Node &Node) { return verifyInteger(Node); }, 2);
+ }))
+ return false;
+ if (!verifyEntry(
+ RootMap, "amdhsa.printf", false, [this](msgpack::Node &Node) {
+ return verifyArray(Node, [this](msgpack::Node &Node) {
+ return verifyScalar(Node, msgpack::ScalarNode::SK_String);
+ });
+ }))
+ return false;
+ if (!verifyEntry(RootMap, "amdhsa.kernels", true,
+ [this](msgpack::Node &Node) {
+ return verifyArray(Node, [this](msgpack::Node &Node) {
+ return verifyKernel(Node);
+ });
+ }))
+ return false;
+
+ return true;
+}
+
+} // end namespace V3
+} // end namespace HSAMD
+} // end namespace AMDGPU
+} // end namespace llvm
Modified: llvm/trunk/lib/BinaryFormat/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/BinaryFormat/CMakeLists.txt?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/BinaryFormat/CMakeLists.txt (original)
+++ llvm/trunk/lib/BinaryFormat/CMakeLists.txt Wed Dec 12 11:39:27 2018
@@ -1,4 +1,5 @@
add_llvm_library(LLVMBinaryFormat
+ AMDGPUMetadataVerifier.cpp
Dwarf.cpp
Magic.cpp
MsgPackReader.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Dec 12 11:39:27 2018
@@ -46,6 +46,7 @@
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
// TODO: This should get the default rounding mode from the kernel. We just set
// the default here, but this could change if the OpenCL rounding mode pragmas
@@ -99,6 +100,10 @@ extern "C" void LLVMInitializeAMDGPUAsmP
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
+ if (IsaInfo::hasCodeObjectV3(getSTI()))
+ HSAMetadataStream.reset(new MetadataStreamerV3());
+ else
+ HSAMetadataStream.reset(new MetadataStreamerV2());
}
StringRef AMDGPUAsmPrinter::getPassName() const {
@@ -122,9 +127,6 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
IsaInfo::streamIsaVersion(getSTI(), ExpectedTargetOS);
getTargetStreamer()->EmitDirectiveAMDGCNTarget(ExpectedTarget);
-
- if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
}
if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&
@@ -132,11 +134,14 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
return;
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
- HSAMetadataStream.begin(M);
+ HSAMetadataStream->begin(M);
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
readPALMetadata(M);
+ if (IsaInfo::hasCodeObjectV3(getSTI()))
+ return;
+
// HSA emits NT_AMDGPU_HSA_CODE_OBJECT_VERSION for code objects v2.
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
@@ -148,37 +153,38 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
- // TODO: Add metadata to code object v3.
- if (IsaInfo::hasCodeObjectV3(getSTI()) &&
- TM.getTargetTriple().getOS() == Triple::AMDHSA)
- return;
-
// Following code requires TargetStreamer to be present.
if (!getTargetStreamer())
return;
- // Emit ISA Version (NT_AMD_AMDGPU_ISA).
- std::string ISAVersionString;
- raw_string_ostream ISAVersionStream(ISAVersionString);
- IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
- getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+ if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ // Emit ISA Version (NT_AMD_AMDGPU_ISA).
+ std::string ISAVersionString;
+ raw_string_ostream ISAVersionStream(ISAVersionString);
+ IsaInfo::streamIsaVersion(getSTI(), ISAVersionStream);
+ getTargetStreamer()->EmitISAVersion(ISAVersionStream.str());
+ }
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
- HSAMetadataStream.end();
- getTargetStreamer()->EmitHSAMetadata(HSAMetadataStream.getHSAMetadata());
+ HSAMetadataStream->end();
+ bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());
+ (void)Success;
+ assert(Success && "Malformed HSA Metadata");
}
- // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
- if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
- // Copy the PAL metadata from the map where we collected it into a vector,
- // then write it as a .note.
- PALMD::Metadata PALMetadataVector;
- for (auto i : PALMetadataMap) {
- PALMetadataVector.push_back(i.first);
- PALMetadataVector.push_back(i.second);
+ if (!IsaInfo::hasCodeObjectV3(getSTI())) {
+ // Emit PAL Metadata (NT_AMD_AMDGPU_PAL_METADATA).
+ if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
+ // Copy the PAL metadata from the map where we collected it into a vector,
+ // then write it as a .note.
+ PALMD::Metadata PALMetadataVector;
+ for (auto i : PALMetadataMap) {
+ PALMetadataVector.push_back(i.first);
+ PALMetadataVector.push_back(i.second);
+ }
+ getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
}
- getTargetStreamer()->EmitPALMetadata(PALMetadataVector);
}
}
@@ -211,11 +217,8 @@ void AMDGPUAsmPrinter::EmitFunctionBodyS
getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);
}
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
- return;
-
- if (!STM.hasCodeObjectV3() && STM.isAmdHsaOS())
- HSAMetadataStream.emitKernel(*MF, CurrentProgramInfo);
+ if (STM.isAmdHsaOS())
+ HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
}
void AMDGPUAsmPrinter::EmitFunctionBodyEnd() {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Wed Dec 12 11:39:27 2018
@@ -56,7 +56,7 @@ private:
SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
- AMDGPU::HSAMD::MetadataStreamer HSAMetadataStream;
+ std::unique_ptr<AMDGPU::HSAMD::MetadataStreamer> HSAMetadataStream;
std::map<uint32_t, uint32_t> PALMetadataMap;
uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp Wed Dec 12 11:39:27 2018
@@ -16,6 +16,7 @@
#include "AMDGPUHSAMetadataStreamer.h"
#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
+#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "SIMachineFunctionInfo.h"
#include "SIProgramInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
@@ -36,11 +37,14 @@ static cl::opt<bool> VerifyHSAMetadata(
namespace AMDGPU {
namespace HSAMD {
-void MetadataStreamer::dump(StringRef HSAMetadataString) const {
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV2
+//===----------------------------------------------------------------------===//
+void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
}
-void MetadataStreamer::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
HSAMD::Metadata FromHSAMetadataString;
@@ -63,7 +67,8 @@ void MetadataStreamer::verify(StringRef
}
}
-AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+AccessQualifier
+MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
if (AccQual.empty())
return AccessQualifier::Unknown;
@@ -74,7 +79,8 @@ AccessQualifier MetadataStreamer::getAcc
.Default(AccessQualifier::Default);
}
-AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+AddressSpaceQualifier
+MetadataStreamerV2::getAddressSpaceQualifier(
unsigned AddressSpace) const {
switch (AddressSpace) {
case AMDGPUAS::PRIVATE_ADDRESS:
@@ -94,8 +100,8 @@ AddressSpaceQualifier MetadataStreamer::
}
}
-ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
- StringRef BaseTypeName) const {
+ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
if (TypeQual.find("pipe") != StringRef::npos)
return ValueKind::Pipe;
@@ -122,7 +128,7 @@ ValueKind MetadataStreamer::getValueKind
ValueKind::ByValue);
}
-ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
auto Signed = !TypeName.startswith("u");
@@ -154,7 +160,7 @@ ValueType MetadataStreamer::getValueType
}
}
-std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
if (!Signed)
@@ -191,8 +197,8 @@ std::string MetadataStreamer::getTypeNam
}
}
-std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
- MDNode *Node) const {
+std::vector<uint32_t>
+MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
std::vector<uint32_t> Dims;
if (Node->getNumOperands() != 3)
return Dims;
@@ -202,9 +208,9 @@ std::vector<uint32_t> MetadataStreamer::
return Dims;
}
-Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
+Kernel::CodeProps::Metadata
+MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
@@ -231,9 +237,9 @@ Kernel::CodeProps::Metadata MetadataStre
return HSACodeProps;
}
-Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
- const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const {
+Kernel::DebugProps::Metadata
+MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
@@ -253,14 +259,14 @@ Kernel::DebugProps::Metadata MetadataStr
return HSADebugProps;
}
-void MetadataStreamer::emitVersion() {
+void MetadataStreamerV2::emitVersion() {
auto &Version = HSAMetadata.mVersion;
Version.push_back(VersionMajor);
Version.push_back(VersionMinor);
}
-void MetadataStreamer::emitPrintf(const Module &Mod) {
+void MetadataStreamerV2::emitPrintf(const Module &Mod) {
auto &Printf = HSAMetadata.mPrintf;
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
@@ -272,7 +278,7 @@ void MetadataStreamer::emitPrintf(const
Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
}
-void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
auto &Kernel = HSAMetadata.mKernels.back();
// TODO: What about other languages?
@@ -290,7 +296,7 @@ void MetadataStreamer::emitKernelLanguag
mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
}
-void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -308,14 +314,14 @@ void MetadataStreamer::emitKernelAttrs(c
}
}
-void MetadataStreamer::emitKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
for (auto &Arg : Func.args())
emitKernelArg(Arg);
emitHiddenKernelArgs(Func);
}
-void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
auto Func = Arg.getParent();
auto ArgNo = Arg.getArgNo();
const MDNode *Node;
@@ -368,12 +374,12 @@ void MetadataStreamer::emitKernelArg(con
PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
}
-void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
- ValueKind ValueKind,
- unsigned PointeeAlign,
- StringRef Name,
- StringRef TypeName, StringRef BaseTypeName,
- StringRef AccQual, StringRef TypeQual) {
+void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
+ ValueKind ValueKind,
+ unsigned PointeeAlign, StringRef Name,
+ StringRef TypeName,
+ StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
@@ -386,7 +392,7 @@ void MetadataStreamer::emitKernelArg(con
Arg.mPointeeAlign = PointeeAlign;
if (auto PtrTy = dyn_cast<PointerType>(Ty))
- Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+ Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
Arg.mAccQual = getAccessQualifier(AccQual);
@@ -406,7 +412,7 @@ void MetadataStreamer::emitKernelArg(con
}
}
-void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
+void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
int HiddenArgNumBytes =
getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
@@ -448,12 +454,16 @@ void MetadataStreamer::emitHiddenKernelA
}
}
-void MetadataStreamer::begin(const Module &Mod) {
+bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+ return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
+}
+
+void MetadataStreamerV2::begin(const Module &Mod) {
emitVersion();
emitPrintf(Mod);
}
-void MetadataStreamer::end() {
+void MetadataStreamerV2::end() {
std::string HSAMetadataString;
if (toString(HSAMetadata, HSAMetadataString))
return;
@@ -464,7 +474,8 @@ void MetadataStreamer::end() {
verify(HSAMetadataString);
}
-void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
+void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
return;
@@ -484,6 +495,505 @@ void MetadataStreamer::emitKernel(const
HSAMetadata.mKernels.back().mDebugProps = DebugProps;
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV3
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
+ errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
+}
+
+void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
+ errs() << "AMDGPU HSA Metadata Parser Test: ";
+
+ std::shared_ptr<msgpack::Node> FromHSAMetadataString =
+ std::make_shared<msgpack::MapNode>();
+
+ yaml::Input YIn(HSAMetadataString);
+ YIn >> FromHSAMetadataString;
+ if (YIn.error()) {
+ errs() << "FAIL\n";
+ return;
+ }
+
+ std::string ToHSAMetadataString;
+ raw_string_ostream StrOS(ToHSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << FromHSAMetadataString;
+
+ errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
+ if (HSAMetadataString != ToHSAMetadataString) {
+ errs() << "Original input: " << HSAMetadataString << '\n'
+ << "Produced output: " << StrOS.str() << '\n';
+ }
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
+ return StringSwitch<Optional<StringRef>>(AccQual)
+ .Case("read_only", StringRef("read_only"))
+ .Case("write_only", StringRef("write_only"))
+ .Case("read_write", StringRef("read_write"))
+ .Default(None);
+}
+
+Optional<StringRef>
+MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
+ switch (AddressSpace) {
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ return StringRef("private");
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ return StringRef("global");
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ return StringRef("constant");
+ case AMDGPUAS::LOCAL_ADDRESS:
+ return StringRef("local");
+ case AMDGPUAS::FLAT_ADDRESS:
+ return StringRef("generic");
+ case AMDGPUAS::REGION_ADDRESS:
+ return StringRef("region");
+ default:
+ return None;
+ }
+}
+
+StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const {
+ if (TypeQual.find("pipe") != StringRef::npos)
+ return "pipe";
+
+ return StringSwitch<StringRef>(BaseTypeName)
+ .Case("image1d_t", "image")
+ .Case("image1d_array_t", "image")
+ .Case("image1d_buffer_t", "image")
+ .Case("image2d_t", "image")
+ .Case("image2d_array_t", "image")
+ .Case("image2d_array_depth_t", "image")
+ .Case("image2d_array_msaa_t", "image")
+ .Case("image2d_array_msaa_depth_t", "image")
+ .Case("image2d_depth_t", "image")
+ .Case("image2d_msaa_t", "image")
+ .Case("image2d_msaa_depth_t", "image")
+ .Case("image3d_t", "image")
+ .Case("sampler_t", "sampler")
+ .Case("queue_t", "queue")
+ .Default(isa<PointerType>(Ty)
+ ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
+ ? "dynamic_shared_pointer"
+ : "global_buffer")
+ : "by_value");
+}
+
+StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ auto Signed = !TypeName.startswith("u");
+ switch (Ty->getIntegerBitWidth()) {
+ case 8:
+ return Signed ? "i8" : "u8";
+ case 16:
+ return Signed ? "i16" : "u16";
+ case 32:
+ return Signed ? "i32" : "u32";
+ case 64:
+ return Signed ? "i64" : "u64";
+ default:
+ return "struct";
+ }
+ }
+ case Type::HalfTyID:
+ return "f16";
+ case Type::FloatTyID:
+ return "f32";
+ case Type::DoubleTyID:
+ return "f64";
+ case Type::PointerTyID:
+ return getValueType(Ty->getPointerElementType(), TypeName);
+ case Type::VectorTyID:
+ return getValueType(Ty->getVectorElementType(), TypeName);
+ default:
+ return "struct";
+ }
+}
+
+std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
+ switch (Ty->getTypeID()) {
+ case Type::IntegerTyID: {
+ if (!Signed)
+ return (Twine('u') + getTypeName(Ty, true)).str();
+
+ auto BitWidth = Ty->getIntegerBitWidth();
+ switch (BitWidth) {
+ case 8:
+ return "char";
+ case 16:
+ return "short";
+ case 32:
+ return "int";
+ case 64:
+ return "long";
+ default:
+ return (Twine('i') + Twine(BitWidth)).str();
+ }
+ }
+ case Type::HalfTyID:
+ return "half";
+ case Type::FloatTyID:
+ return "float";
+ case Type::DoubleTyID:
+ return "double";
+ case Type::VectorTyID: {
+ auto VecTy = cast<VectorType>(Ty);
+ auto ElTy = VecTy->getElementType();
+ auto NumElements = VecTy->getVectorNumElements();
+ return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+ }
+ default:
+ return "unknown";
+ }
+}
+
+std::shared_ptr<msgpack::ArrayNode>
+MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
+ auto Dims = std::make_shared<msgpack::ArrayNode>();
+ if (Node->getNumOperands() != 3)
+ return Dims;
+
+ for (auto &Op : Node->operands())
+ Dims->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op)->getZExtValue()));
+ return Dims;
+}
+
+void MetadataStreamerV3::emitVersion() {
+ auto Version = std::make_shared<msgpack::ArrayNode>();
+ Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
+ Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
+ getRootMetadata("amdhsa.version") = std::move(Version);
+}
+
+void MetadataStreamerV3::emitPrintf(const Module &Mod) {
+ auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+ if (!Node)
+ return;
+
+ auto Printf = std::make_shared<msgpack::ArrayNode>();
+ for (auto Op : Node->operands())
+ if (Op->getNumOperands())
+ Printf->push_back(std::make_shared<msgpack::ScalarNode>(
+ cast<MDString>(Op->getOperand(0))->getString()));
+ getRootMetadata("amdhsa.printf") = std::move(Printf);
+}
+
+void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
+ msgpack::MapNode &Kern) {
+ // TODO: What about other languages?
+ auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+ if (!Node || !Node->getNumOperands())
+ return;
+ auto Op0 = Node->getOperand(0);
+ if (Op0->getNumOperands() <= 1)
+ return;
+
+ Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
+ auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
+ LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
+ LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
+ mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
+ Kern[".language_version"] = std::move(LanguageVersion);
+}
+
+void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
+ msgpack::MapNode &Kern) {
+
+ if (auto Node = Func.getMetadata("reqd_work_group_size"))
+ Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("work_group_size_hint"))
+ Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
+ if (auto Node = Func.getMetadata("vec_type_hint")) {
+ Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
+ cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+ mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
+ }
+ if (Func.hasFnAttribute("runtime-handle")) {
+ Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
+ Func.getFnAttribute("runtime-handle").getValueAsString().str());
+ }
+}
+
+void MetadataStreamerV3::emitKernelArgs(const Function &Func,
+ msgpack::MapNode &Kern) {
+ unsigned Offset = 0;
+ auto Args = std::make_shared<msgpack::ArrayNode>();
+ for (auto &Arg : Func.args())
+ emitKernelArg(Arg, Offset, *Args);
+
+ emitHiddenKernelArgs(Func, Offset, *Args);
+
+ // TODO: What about other languages?
+ if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
+ else
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel
+ // is used, otherwise emit dummy "none" arguments.
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
+ }
+ }
+
+ Kern[".args"] = std::move(Args);
+}
+
+void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
+ msgpack::ArrayNode &Args) {
+ auto Func = Arg.getParent();
+ auto ArgNo = Arg.getArgNo();
+ const MDNode *Node;
+
+ StringRef Name;
+ Node = Func->getMetadata("kernel_arg_name");
+ if (Node && ArgNo < Node->getNumOperands())
+ Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ else if (Arg.hasName())
+ Name = Arg.getName();
+
+ StringRef TypeName;
+ Node = Func->getMetadata("kernel_arg_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef BaseTypeName;
+ Node = Func->getMetadata("kernel_arg_base_type");
+ if (Node && ArgNo < Node->getNumOperands())
+ BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ StringRef AccQual;
+ if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
+ Arg.hasNoAliasAttr()) {
+ AccQual = "read_only";
+ } else {
+ Node = Func->getMetadata("kernel_arg_access_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+ }
+
+ StringRef TypeQual;
+ Node = Func->getMetadata("kernel_arg_type_qual");
+ if (Node && ArgNo < Node->getNumOperands())
+ TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+ Type *Ty = Arg.getType();
+ const DataLayout &DL = Func->getParent()->getDataLayout();
+
+ unsigned PointeeAlign = 0;
+ if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+ if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ PointeeAlign = Arg.getParamAlignment();
+ if (PointeeAlign == 0)
+ PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
+ }
+ }
+
+ emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+ getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
+ Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
+ TypeQual);
+}
+
+void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
+ StringRef ValueKind, unsigned &Offset,
+ msgpack::ArrayNode &Args,
+ unsigned PointeeAlign, StringRef Name,
+ StringRef TypeName,
+ StringRef BaseTypeName,
+ StringRef AccQual, StringRef TypeQual) {
+ auto ArgPtr = std::make_shared<msgpack::MapNode>();
+ auto &Arg = *ArgPtr;
+
+ if (!Name.empty())
+ Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
+ if (!TypeName.empty())
+ Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
+ auto Size = DL.getTypeAllocSize(Ty);
+ auto Align = DL.getABITypeAlignment(Ty);
+ Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
+ Offset = alignTo(Offset, Align);
+ Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
+ Offset += Size;
+ Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
+ Arg[".value_type"] =
+ std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
+ if (PointeeAlign)
+ Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
+
+ if (auto PtrTy = dyn_cast<PointerType>(Ty))
+ if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
+ Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
+
+ if (auto AQ = getAccessQualifier(AccQual))
+ Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
+
+ // TODO: Emit Arg[".actual_access"].
+
+ SmallVector<StringRef, 1> SplitTypeQuals;
+ TypeQual.split(SplitTypeQuals, " ", -1, false);
+ for (StringRef Key : SplitTypeQuals) {
+ if (Key == "const")
+ Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "restrict")
+ Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "volatile")
+ Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
+ else if (Key == "pipe")
+ Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
+ }
+
+ Args.push_back(std::move(ArgPtr));
+}
+
+void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
+ unsigned &Offset,
+ msgpack::ArrayNode &Args) {
+ int HiddenArgNumBytes =
+ getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
+
+ if (!HiddenArgNumBytes)
+ return;
+
+ auto &DL = Func.getParent()->getDataLayout();
+ auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+ if (HiddenArgNumBytes >= 8)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
+ if (HiddenArgNumBytes >= 16)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
+ if (HiddenArgNumBytes >= 24)
+ emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
+
+ auto Int8PtrTy =
+ Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
+
+ // Emit "printf buffer" argument if printf is used, otherwise emit dummy
+ // "none" argument.
+ if (HiddenArgNumBytes >= 32) {
+ if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
+ else
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ }
+
+ // Emit "default queue" and "completion action" arguments if enqueue kernel is
+ // used, otherwise emit dummy "none" arguments.
+ if (HiddenArgNumBytes >= 48) {
+ if (Func.hasFnAttribute("calls-enqueue-kernel")) {
+ emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
+ } else {
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
+ }
+ }
+}
+
+std::shared_ptr<msgpack::MapNode>
+MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const {
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ const Function &F = MF.getFunction();
+
+ auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
+ auto &Kern = *HSAKernelProps;
+
+ unsigned MaxKernArgAlign;
+ Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
+ STM.getKernArgSegmentSize(F, MaxKernArgAlign));
+ Kern[".group_segment_fixed_size"] =
+ std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
+ Kern[".private_segment_fixed_size"] =
+ std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
+ Kern[".kernarg_segment_align"] =
+ std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
+ Kern[".wavefront_size"] =
+ std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
+ Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
+ Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
+ Kern[".max_flat_workgroup_size"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
+ Kern[".sgpr_spill_count"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
+ Kern[".vgpr_spill_count"] =
+ std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
+
+ return HSAKernelProps;
+}
+
+bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+ return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
+}
+
+void MetadataStreamerV3::begin(const Module &Mod) {
+ emitVersion();
+ emitPrintf(Mod);
+ getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
+}
+
+void MetadataStreamerV3::end() {
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << HSAMetadataRoot;
+
+ if (DumpHSAMetadata)
+ dump(StrOS.str());
+ if (VerifyHSAMetadata)
+ verify(StrOS.str());
+}
+
+void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) {
+ auto &Func = MF.getFunction();
+ auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
+
+ assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+ Func.getCallingConv() == CallingConv::SPIR_KERNEL);
+
+ auto &KernelsNode = getRootMetadata("amdhsa.kernels");
+ auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
+
+ {
+ auto &Kern = *KernelProps;
+ Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
+ Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
+ (Twine(Func.getName()) + Twine(".kd")).str());
+ emitKernelLanguage(Func, Kern);
+ emitKernelAttrs(Func, Kern);
+ emitKernelArgs(Func, Kern);
+ }
+
+ Kernels->push_back(std::move(KernelProps));
+}
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h Wed Dec 12 11:39:27 2018
@@ -19,10 +19,12 @@
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/Support/AMDGPUMetadata.h"
namespace llvm {
+class AMDGPUTargetStreamer;
class Argument;
class DataLayout;
class Function;
@@ -34,7 +36,92 @@ class Type;
namespace AMDGPU {
namespace HSAMD {
-class MetadataStreamer final {
+class MetadataStreamer {
+public:
+ virtual ~MetadataStreamer(){};
+
+ virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0;
+
+ virtual void begin(const Module &Mod) = 0;
+
+ virtual void end() = 0;
+
+ virtual void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) = 0;
+};
+
+class MetadataStreamerV3 final : public MetadataStreamer {
+private:
+ std::shared_ptr<msgpack::Node> HSAMetadataRoot =
+ std::make_shared<msgpack::MapNode>();
+
+ void dump(StringRef HSAMetadataString) const;
+
+ void verify(StringRef HSAMetadataString) const;
+
+ Optional<StringRef> getAccessQualifier(StringRef AccQual) const;
+
+ Optional<StringRef> getAddressSpaceQualifier(unsigned AddressSpace) const;
+
+ StringRef getValueKind(Type *Ty, StringRef TypeQual,
+ StringRef BaseTypeName) const;
+
+ StringRef getValueType(Type *Ty, StringRef TypeName) const;
+
+ std::string getTypeName(Type *Ty, bool Signed) const;
+
+ std::shared_ptr<msgpack::ArrayNode>
+ getWorkGroupDimensions(MDNode *Node) const;
+
+ std::shared_ptr<msgpack::MapNode>
+ getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) const;
+
+ void emitVersion();
+
+ void emitPrintf(const Module &Mod);
+
+ void emitKernelLanguage(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelAttrs(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelArgs(const Function &Func, msgpack::MapNode &Kern);
+
+ void emitKernelArg(const Argument &Arg, unsigned &Offset,
+ msgpack::ArrayNode &Args);
+
+ void emitKernelArg(const DataLayout &DL, Type *Ty, StringRef ValueKind,
+ unsigned &Offset, msgpack::ArrayNode &Args,
+ unsigned PointeeAlign = 0, StringRef Name = "",
+ StringRef TypeName = "", StringRef BaseTypeName = "",
+ StringRef AccQual = "", StringRef TypeQual = "");
+
+ void emitHiddenKernelArgs(const Function &Func, unsigned &Offset,
+ msgpack::ArrayNode &Args);
+
+ std::shared_ptr<msgpack::Node> &getRootMetadata(StringRef Key) {
+ return (*cast<msgpack::MapNode>(HSAMetadataRoot.get()))[Key];
+ }
+
+ std::shared_ptr<msgpack::Node> &getHSAMetadataRoot() {
+ return HSAMetadataRoot;
+ }
+
+public:
+ MetadataStreamerV3() = default;
+ ~MetadataStreamerV3() = default;
+
+ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+ void begin(const Module &Mod) override;
+
+ void end() override;
+
+ void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) override;
+};
+
+class MetadataStreamerV2 final : public MetadataStreamer {
private:
Metadata HSAMetadata;
@@ -44,7 +131,7 @@ private:
AccessQualifier getAccessQualifier(StringRef AccQual) const;
- AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+ AddressSpaceQualifier getAddressSpaceQualifier(unsigned AddressSpace) const;
ValueKind getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const;
@@ -82,19 +169,22 @@ private:
void emitHiddenKernelArgs(const Function &Func);
-public:
- MetadataStreamer() = default;
- ~MetadataStreamer() = default;
-
const Metadata &getHSAMetadata() const {
return HSAMetadata;
}
- void begin(const Module &Mod);
+public:
+ MetadataStreamerV2() = default;
+ ~MetadataStreamerV2() = default;
+
+ bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
+
+ void begin(const Module &Mod) override;
- void end();
+ void end() override;
- void emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo);
+ void emitKernel(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo) override;
};
} // end namespace HSAMD
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h Wed Dec 12 11:39:27 2018
@@ -23,7 +23,8 @@ namespace ElfNote {
const char SectionName[] = ".note";
-const char NoteName[] = "AMD";
+const char NoteNameV2[] = "AMD";
+const char NoteNameV3[] = "AMDGPU";
// TODO: Remove this file once we drop code object v2.
enum NoteType{
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Dec 12 11:39:27 2018
@@ -3065,9 +3065,18 @@ bool AMDGPUAsmParser::ParseDirectiveISAV
}
bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
+ const char *AssemblerDirectiveBegin;
+ const char *AssemblerDirectiveEnd;
+ std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
+ AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
+ ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
+ HSAMD::V3::AssemblerDirectiveEnd)
+ : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
+ HSAMD::AssemblerDirectiveEnd);
+
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getParser().getTok().getLoc(),
- (Twine(HSAMD::AssemblerDirectiveBegin) + Twine(" directive is "
+ (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
"not available on non-amdhsa OSes")).str());
}
@@ -3085,7 +3094,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAM
if (getLexer().is(AsmToken::Identifier)) {
StringRef ID = getLexer().getTok().getIdentifier();
- if (ID == AMDGPU::HSAMD::AssemblerDirectiveEnd) {
+ if (ID == AssemblerDirectiveEnd) {
Lex();
FoundEnd = true;
break;
@@ -3107,8 +3116,13 @@ bool AMDGPUAsmParser::ParseDirectiveHSAM
YamlStream.flush();
- if (!getTargetStreamer().EmitHSAMetadata(HSAMetadataString))
- return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ if (IsaInfo::hasCodeObjectV3(&getSTI())) {
+ if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ } else {
+ if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
+ return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
+ }
return false;
}
@@ -3145,6 +3159,10 @@ bool AMDGPUAsmParser::ParseDirective(Asm
if (IDVal == ".amdhsa_kernel")
return ParseDirectiveAMDHSAKernel();
+
+ // TODO: Restructure/combine with PAL metadata directive.
+ if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
+ return ParseDirectiveHSAMetadata();
} else {
if (IDVal == ".hsa_code_object_version")
return ParseDirectiveHSACodeObjectVersion();
@@ -3160,10 +3178,10 @@ bool AMDGPUAsmParser::ParseDirective(Asm
if (IDVal == ".amd_amdgpu_isa")
return ParseDirectiveISAVersion();
- }
- if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
- return ParseDirectiveHSAMetadata();
+ if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
+ return ParseDirectiveHSAMetadata();
+ }
if (IDVal == PALMD::AssemblerDirective)
return ParseDirectivePALMetadata();
Modified: llvm/trunk/lib/Target/AMDGPU/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/LLVMBuild.txt?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/LLVMBuild.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/LLVMBuild.txt Wed Dec 12 11:39:27 2018
@@ -30,5 +30,5 @@ has_disassembler = 1
type = Library
name = AMDGPUCodeGen
parent = AMDGPU
-required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel
+required_libraries = Analysis AsmPrinter CodeGen Core IPO MC AMDGPUAsmPrinter AMDGPUDesc AMDGPUInfo AMDGPUUtils Scalar SelectionDAG Support Target TransformUtils Vectorize GlobalISel BinaryFormat
add_to_library_groups = AMDGPU
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Dec 12 11:39:27 2018
@@ -17,7 +17,9 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -35,12 +37,13 @@ namespace llvm {
using namespace llvm;
using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::HSAMD;
//===----------------------------------------------------------------------===//
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
-bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
+bool AMDGPUTargetStreamer::EmitHSAMetadataV2(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
return false;
@@ -48,6 +51,15 @@ bool AMDGPUTargetStreamer::EmitHSAMetada
return EmitHSAMetadata(HSAMetadata);
}
+bool AMDGPUTargetStreamer::EmitHSAMetadataV3(StringRef HSAMetadataString) {
+ std::shared_ptr<msgpack::Node> HSAMetadataRoot;
+ yaml::Input YIn(HSAMetadataString);
+ YIn >> HSAMetadataRoot;
+ if (YIn.error())
+ return false;
+ return EmitHSAMetadata(HSAMetadataRoot, false);
+}
+
StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
AMDGPU::GPUKind AK;
@@ -195,9 +207,26 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMet
if (HSAMD::toString(HSAMetadata, HSAMetadataString))
return false;
- OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
+ OS << '\t' << AssemblerDirectiveBegin << '\n';
OS << HSAMetadataString << '\n';
- OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
+ OS << '\t' << AssemblerDirectiveEnd << '\n';
+ return true;
+}
+
+bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
+ std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ V3::MetadataVerifier Verifier(Strict);
+ if (!Verifier.verify(*HSAMetadataRoot))
+ return false;
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << HSAMetadataRoot;
+
+ OS << '\t' << V3::AssemblerDirectiveBegin << '\n';
+ OS << StrOS.str() << '\n';
+ OS << '\t' << V3::AssemblerDirectiveEnd << '\n';
return true;
}
@@ -358,13 +387,13 @@ MCELFStreamer &AMDGPUTargetELFStreamer::
return static_cast<MCELFStreamer &>(Streamer);
}
-void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
- const MCExpr *DescSZ, unsigned NoteType,
+void AMDGPUTargetELFStreamer::EmitNote(
+ StringRef Name, const MCExpr *DescSZ, unsigned NoteType,
function_ref<void(MCELFStreamer &)> EmitDesc) {
auto &S = getStreamer();
auto &Context = S.getContext();
- auto NameSZ = sizeof(ElfNote::NoteName);
+ auto NameSZ = Name.size() + 1;
S.PushSection();
S.SwitchSection(Context.getELFSection(
@@ -372,7 +401,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPU
S.EmitIntValue(NameSZ, 4); // namesz
S.EmitValue(DescSZ, 4); // descz
S.EmitIntValue(NoteType, 4); // type
- S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ)); // name
+ S.EmitBytes(Name); // name
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
EmitDesc(S); // desc
S.EmitValueToAlignment(4, 0, 1, 0); // padding 0
@@ -384,14 +413,11 @@ void AMDGPUTargetELFStreamer::EmitDirect
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
uint32_t Major, uint32_t Minor) {
- EmitAMDGPUNote(
- MCConstantExpr::create(8, getContext()),
- ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
- [&](MCELFStreamer &OS){
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(8, getContext()),
+ ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION, [&](MCELFStreamer &OS) {
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ });
}
void
@@ -407,21 +433,18 @@ AMDGPUTargetELFStreamer::EmitDirectiveHS
sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
VendorNameSize + ArchNameSize;
- EmitAMDGPUNote(
- MCConstantExpr::create(DescSZ, getContext()),
- ElfNote::NT_AMDGPU_HSA_ISA,
- [&](MCELFStreamer &OS) {
- OS.EmitIntValue(VendorNameSize, 2);
- OS.EmitIntValue(ArchNameSize, 2);
- OS.EmitIntValue(Major, 4);
- OS.EmitIntValue(Minor, 4);
- OS.EmitIntValue(Stepping, 4);
- OS.EmitBytes(VendorName);
- OS.EmitIntValue(0, 1); // NULL terminate VendorName
- OS.EmitBytes(ArchName);
- OS.EmitIntValue(0, 1); // NULL terminte ArchName
- }
- );
+ EmitNote(ElfNote::NoteNameV2, MCConstantExpr::create(DescSZ, getContext()),
+ ElfNote::NT_AMDGPU_HSA_ISA, [&](MCELFStreamer &OS) {
+ OS.EmitIntValue(VendorNameSize, 2);
+ OS.EmitIntValue(ArchNameSize, 2);
+ OS.EmitIntValue(Major, 4);
+ OS.EmitIntValue(Minor, 4);
+ OS.EmitIntValue(Stepping, 4);
+ OS.EmitBytes(VendorName);
+ OS.EmitIntValue(0, 1); // NULL terminate VendorName
+ OS.EmitBytes(ArchName);
+ OS.EmitIntValue(0, 1); // NULL terminte ArchName
+ });
}
void
@@ -450,15 +473,41 @@ bool AMDGPUTargetELFStreamer::EmitISAVer
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitAMDGPUNote(
- DescSZ,
- ELF::NT_AMD_AMDGPU_ISA,
- [&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(IsaVersionString);
- OS.EmitLabel(DescEnd);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_ISA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(IsaVersionString);
+ OS.EmitLabel(DescEnd);
+ });
+ return true;
+}
+
+bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
+ std::shared_ptr<msgpack::Node> &HSAMetadataRoot, bool Strict) {
+ V3::MetadataVerifier Verifier(Strict);
+ if (!Verifier.verify(*HSAMetadataRoot))
+ return false;
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ msgpack::Writer MPWriter(StrOS);
+ HSAMetadataRoot->write(MPWriter);
+
+ // Create two labels to mark the beginning and end of the desc field
+ // and a MCExpr to calculate the size of the desc field.
+ auto &Context = getContext();
+ auto *DescBegin = Context.createTempSymbol();
+ auto *DescEnd = Context.createTempSymbol();
+ auto *DescSZ = MCBinaryExpr::createSub(
+ MCSymbolRefExpr::create(DescEnd, Context),
+ MCSymbolRefExpr::create(DescBegin, Context), Context);
+
+ EmitNote(ElfNote::NoteNameV3, DescSZ, ELF::NT_AMDGPU_METADATA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(StrOS.str());
+ OS.EmitLabel(DescEnd);
+ });
return true;
}
@@ -477,28 +526,24 @@ bool AMDGPUTargetELFStreamer::EmitHSAMet
MCSymbolRefExpr::create(DescEnd, Context),
MCSymbolRefExpr::create(DescBegin, Context), Context);
- EmitAMDGPUNote(
- DescSZ,
- ELF::NT_AMD_AMDGPU_HSA_METADATA,
- [&](MCELFStreamer &OS) {
- OS.EmitLabel(DescBegin);
- OS.EmitBytes(HSAMetadataString);
- OS.EmitLabel(DescEnd);
- }
- );
+ EmitNote(ElfNote::NoteNameV2, DescSZ, ELF::NT_AMD_AMDGPU_HSA_METADATA,
+ [&](MCELFStreamer &OS) {
+ OS.EmitLabel(DescBegin);
+ OS.EmitBytes(HSAMetadataString);
+ OS.EmitLabel(DescEnd);
+ });
return true;
}
bool AMDGPUTargetELFStreamer::EmitPALMetadata(
const PALMD::Metadata &PALMetadata) {
- EmitAMDGPUNote(
- MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
- ELF::NT_AMD_AMDGPU_PAL_METADATA,
- [&](MCELFStreamer &OS){
- for (auto I : PALMetadata)
- OS.EmitIntValue(I, sizeof(uint32_t));
- }
- );
+ EmitNote(ElfNote::NoteNameV2,
+ MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t),
+ getContext()),
+ ELF::NT_AMD_AMDGPU_PAL_METADATA, [&](MCELFStreamer &OS) {
+ for (auto I : PALMetadata)
+ OS.EmitIntValue(I, sizeof(uint32_t));
+ });
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Dec 12 11:39:27 2018
@@ -11,6 +11,7 @@
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
#include "AMDKernelCodeT.h"
+#include "llvm/BinaryFormat/MsgPackTypes.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDGPUMetadata.h"
@@ -52,7 +53,20 @@ public:
virtual bool EmitISAVersion(StringRef IsaVersionString) = 0;
/// \returns True on success, false on failure.
- virtual bool EmitHSAMetadata(StringRef HSAMetadataString);
+ virtual bool EmitHSAMetadataV2(StringRef HSAMetadataString);
+
+ /// \returns True on success, false on failure.
+ virtual bool EmitHSAMetadataV3(StringRef HSAMetadataString);
+
+ /// Emit HSA Metadata
+ ///
+ /// When \p Strict is true, known metadata elements must already be
+ /// well-typed. When \p Strict is false, known types are inferred and
+ /// the \p HSAMetadata structure is updated with the correct types.
+ ///
+ /// \returns True on success, false on failure.
+ virtual bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) = 0;
/// \returns True on success, false on failure.
virtual bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) = 0;
@@ -92,6 +106,10 @@ public:
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
+ bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) override;
+
+ /// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
@@ -107,8 +125,8 @@ public:
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
MCStreamer &Streamer;
- void EmitAMDGPUNote(const MCExpr *DescSize, unsigned NoteType,
- function_ref<void(MCELFStreamer &)> EmitDesc);
+ void EmitNote(StringRef Name, const MCExpr *DescSize, unsigned NoteType,
+ function_ref<void(MCELFStreamer &)> EmitDesc);
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
@@ -132,6 +150,10 @@ public:
bool EmitISAVersion(StringRef IsaVersionString) override;
/// \returns True on success, false on failure.
+ bool EmitHSAMetadata(std::shared_ptr<msgpack::Node> &HSAMetadata,
+ bool Strict) override;
+
+ /// \returns True on success, false on failure.
bool EmitHSAMetadata(const AMDGPU::HSAMD::Metadata &HSAMetadata) override;
/// \returns True on success, false on failure.
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt Wed Dec 12 11:39:27 2018
@@ -19,5 +19,5 @@
type = Library
name = AMDGPUDesc
parent = AMDGPU
-required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support
+required_libraries = Core MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support BinaryFormat
add_to_library_groups = AMDGPU
Added: llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,145 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -amdgpu-verify-hsa-metadata -filetype=obj -mattr=+code-object-v3 -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK-LABEL: {{^}}min_64_max_64:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define amdgpu_kernel void @min_64_max_64() #0 {
+entry:
+ ret void
+}
+attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
+
+; CHECK-LABEL: {{^}}min_64_max_128:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define amdgpu_kernel void @min_64_max_128() #1 {
+entry:
+ ret void
+}
+attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
+
+; CHECK-LABEL: {{^}}min_128_max_128:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define amdgpu_kernel void @min_128_max_128() #2 {
+entry:
+ ret void
+}
+attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
+
+; CHECK-LABEL: {{^}}min_1024_max_2048
+; CHECK: SGPRBlocks: 1
+; CHECK: VGPRBlocks: 7
+; CHECK: NumSGPRsForWavesPerEU: 12
+; CHECK: NumVGPRsForWavesPerEU: 32
+ at var = addrspace(1) global float 0.0
+define amdgpu_kernel void @min_1024_max_2048() #3 {
+ %val0 = load volatile float, float addrspace(1)* @var
+ %val1 = load volatile float, float addrspace(1)* @var
+ %val2 = load volatile float, float addrspace(1)* @var
+ %val3 = load volatile float, float addrspace(1)* @var
+ %val4 = load volatile float, float addrspace(1)* @var
+ %val5 = load volatile float, float addrspace(1)* @var
+ %val6 = load volatile float, float addrspace(1)* @var
+ %val7 = load volatile float, float addrspace(1)* @var
+ %val8 = load volatile float, float addrspace(1)* @var
+ %val9 = load volatile float, float addrspace(1)* @var
+ %val10 = load volatile float, float addrspace(1)* @var
+ %val11 = load volatile float, float addrspace(1)* @var
+ %val12 = load volatile float, float addrspace(1)* @var
+ %val13 = load volatile float, float addrspace(1)* @var
+ %val14 = load volatile float, float addrspace(1)* @var
+ %val15 = load volatile float, float addrspace(1)* @var
+ %val16 = load volatile float, float addrspace(1)* @var
+ %val17 = load volatile float, float addrspace(1)* @var
+ %val18 = load volatile float, float addrspace(1)* @var
+ %val19 = load volatile float, float addrspace(1)* @var
+ %val20 = load volatile float, float addrspace(1)* @var
+ %val21 = load volatile float, float addrspace(1)* @var
+ %val22 = load volatile float, float addrspace(1)* @var
+ %val23 = load volatile float, float addrspace(1)* @var
+ %val24 = load volatile float, float addrspace(1)* @var
+ %val25 = load volatile float, float addrspace(1)* @var
+ %val26 = load volatile float, float addrspace(1)* @var
+ %val27 = load volatile float, float addrspace(1)* @var
+ %val28 = load volatile float, float addrspace(1)* @var
+ %val29 = load volatile float, float addrspace(1)* @var
+ %val30 = load volatile float, float addrspace(1)* @var
+ %val31 = load volatile float, float addrspace(1)* @var
+ %val32 = load volatile float, float addrspace(1)* @var
+ %val33 = load volatile float, float addrspace(1)* @var
+ %val34 = load volatile float, float addrspace(1)* @var
+ %val35 = load volatile float, float addrspace(1)* @var
+ %val36 = load volatile float, float addrspace(1)* @var
+ %val37 = load volatile float, float addrspace(1)* @var
+ %val38 = load volatile float, float addrspace(1)* @var
+ %val39 = load volatile float, float addrspace(1)* @var
+ %val40 = load volatile float, float addrspace(1)* @var
+
+ store volatile float %val0, float addrspace(1)* @var
+ store volatile float %val1, float addrspace(1)* @var
+ store volatile float %val2, float addrspace(1)* @var
+ store volatile float %val3, float addrspace(1)* @var
+ store volatile float %val4, float addrspace(1)* @var
+ store volatile float %val5, float addrspace(1)* @var
+ store volatile float %val6, float addrspace(1)* @var
+ store volatile float %val7, float addrspace(1)* @var
+ store volatile float %val8, float addrspace(1)* @var
+ store volatile float %val9, float addrspace(1)* @var
+ store volatile float %val10, float addrspace(1)* @var
+ store volatile float %val11, float addrspace(1)* @var
+ store volatile float %val12, float addrspace(1)* @var
+ store volatile float %val13, float addrspace(1)* @var
+ store volatile float %val14, float addrspace(1)* @var
+ store volatile float %val15, float addrspace(1)* @var
+ store volatile float %val16, float addrspace(1)* @var
+ store volatile float %val17, float addrspace(1)* @var
+ store volatile float %val18, float addrspace(1)* @var
+ store volatile float %val19, float addrspace(1)* @var
+ store volatile float %val20, float addrspace(1)* @var
+ store volatile float %val21, float addrspace(1)* @var
+ store volatile float %val22, float addrspace(1)* @var
+ store volatile float %val23, float addrspace(1)* @var
+ store volatile float %val24, float addrspace(1)* @var
+ store volatile float %val25, float addrspace(1)* @var
+ store volatile float %val26, float addrspace(1)* @var
+ store volatile float %val27, float addrspace(1)* @var
+ store volatile float %val28, float addrspace(1)* @var
+ store volatile float %val29, float addrspace(1)* @var
+ store volatile float %val30, float addrspace(1)* @var
+ store volatile float %val31, float addrspace(1)* @var
+ store volatile float %val32, float addrspace(1)* @var
+ store volatile float %val33, float addrspace(1)* @var
+ store volatile float %val34, float addrspace(1)* @var
+ store volatile float %val35, float addrspace(1)* @var
+ store volatile float %val36, float addrspace(1)* @var
+ store volatile float %val37, float addrspace(1)* @var
+ store volatile float %val38, float addrspace(1)* @var
+ store volatile float %val39, float addrspace(1)* @var
+ store volatile float %val40, float addrspace(1)* @var
+
+ ret void
+}
+attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
+
+; CHECK: amdhsa.kernels:
+; CHECK: .max_flat_workgroup_size: 64
+; CHECK: .name: min_64_max_64
+; CHECK: .max_flat_workgroup_size: 128
+; CHECK: .name: min_64_max_128
+; CHECK: .max_flat_workgroup_size: 128
+; CHECK: .name: min_128_max_128
+; CHECK: .max_flat_workgroup_size: 2048
+; CHECK: .name: min_1024_max_2048
+; CHECK: amdhsa.version:
+; CHECK: - 1
+; CHECK: - 0
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Modified: llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/code-object-v3.ll Wed Dec 12 11:39:27 2018
@@ -3,6 +3,8 @@
; ALL-ASM-LABEL: {{^}}fadd:
+; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_version
+; OSABI-AMDHSA-ASM-NOT: .hsa_code_object_isa
; OSABI-AMDHSA-ASM-NOT: .amdgpu_hsa_kernel
; OSABI-AMDHSA-ASM-NOT: .amd_kernel_code_t
@@ -57,7 +59,8 @@
; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fadd.kd
; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000040 64 OBJECT GLOBAL DEFAULT {{[0-9]+}} fsub.kd
-; OSABI-AMDHSA-ELF-NOT: Displaying notes found
+; OSABI-AMDHSA-ELF: Displaying notes found at file offset
+; OSABI-AMDHSA-ELF: AMDGPU 0x{{[0-9a-f]+}} NT_AMDGPU_METADATA (AMDGPU Metadata)
define amdgpu_kernel void @fadd(
float addrspace(1)* %r,
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-deduce-ro-arg-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,33 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
+
+; CHECK: .symbol: test_ro_arg.kd
+; CHECK: .name: test_ro_arg
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'float*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: in
+; CHECK-NEXT: .access: read_only
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .is_const: true
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .is_restrict: true
+; CHECK-NEXT: .value_type: f32
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: 'float*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: out
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: f32
+; CHECK-NEXT: .address_space: global
+
+define amdgpu_kernel void @test_ro_arg(float addrspace(1)* noalias readonly %in, float addrspace(1)* %out)
+ !kernel_arg_addr_space !0 !kernel_arg_access_qual !1 !kernel_arg_type !2
+ !kernel_arg_base_type !2 !kernel_arg_type_qual !3 {
+ ret void
+}
+
+!0 = !{i32 1, i32 1}
+!1 = !{!"none", !"none"}
+!2 = !{!"float*", !"float*"}
+!3 = !{!"const restrict", !""}
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,101 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK: ---
+; CHECK: amdhsa.kernels:
+; CHECK: .symbol: test_non_enqueue_kernel_caller.kd
+; CHECK: .name: test_non_enqueue_kernel_caller
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: char
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 1
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NOT: .value_kind: hidden_default_queue
+; CHECK-NOT: .value_kind: hidden_completion_action
+define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_enqueue_kernel_caller.kd
+; CHECK: .name: test_enqueue_kernel_caller
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: char
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 1
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_none
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_default_queue
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_completion_action
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+; CHECK-NOT: amdhsa.printf:
+
+attributes #0 = { "calls-enqueue-kernel" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,1453 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+%struct.A = type { i8, float }
+%opencl.image1d_t = type opaque
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+%opencl.queue_t = type opaque
+%opencl.pipe_t = type opaque
+%struct.B = type { i32 addrspace(1)*}
+%opencl.clk_event_t = type opaque
+
+ at __test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
+
+; CHECK: ---
+; CHECK: amdhsa.kernels:
+; CHECK: .symbol: test_char.kd
+; CHECK: .name: test_char
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: char
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 1
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NOT: .value_kind: hidden_default_queue
+; CHECK-NOT: .value_kind: hidden_completion_action
+define amdgpu_kernel void @test_char(i8 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
+ !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_ushort2.kd
+; CHECK: .name: test_ushort2
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: ushort2
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: u16
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10
+ !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_int3.kd
+; CHECK: .name: test_int3
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int3
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 16
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_int3(<3 x i32> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11
+ !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_ulong4.kd
+; CHECK: .name: test_ulong4
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: ulong4
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 32
+; CHECK-NEXT: .value_type: u64
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 56
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12
+ !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_half8.kd
+; CHECK: .name: test_half8
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: half8
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 16
+; CHECK-NEXT: .value_type: f16
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_half8(<8 x half> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13
+ !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_float16.kd
+; CHECK: .name: test_float16
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: float16
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 64
+; CHECK-NEXT: .value_type: f32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 64
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 72
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 80
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 88
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_float16(<16 x float> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14
+ !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_double16.kd
+; CHECK: .name: test_double16
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: double16
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 128
+; CHECK-NEXT: .value_type: f64
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 128
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 136
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 144
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 152
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_double16(<16 x double> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15
+ !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_pointer.kd
+; CHECK: .name: test_pointer
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16
+ !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_image.kd
+; CHECK: .name: test_image
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: image2d_t
+; CHECK-NEXT: .value_kind: image
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17
+ !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_sampler.kd
+; CHECK: .name: test_sampler
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: sampler_t
+; CHECK-NEXT: .value_kind: sampler
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_sampler(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18
+ !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_queue.kd
+; CHECK: .name: test_queue
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: queue_t
+; CHECK-NEXT: .value_kind: queue
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19
+ !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_struct.kd
+; CHECK: .name: test_struct
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: struct A
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: private
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
+ !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_i128.kd
+; CHECK: .name: test_i128
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: i128
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 16
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_i128(i128 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21
+ !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_multi_arg.kd
+; CHECK: .name: test_multi_arg
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .type_name: short2
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 4
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i16
+; CHECK-NEXT: .name: b
+; CHECK-NEXT: - .type_name: char3
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .name: c
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
+ !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24
+ !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
+ ret void
+}
+
+; CHECK: .symbol: test_addr_space.kd
+; CHECK: .name: test_addr_space
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: g
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: c
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .address_space: constant
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: l
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .pointee_align: 4
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
+ i32 addrspace(4)* %c,
+ i32 addrspace(3)* %l)
+ !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
+ !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
+ ret void
+}
+
+; CHECK: .symbol: test_type_qual.kd
+; CHECK: .name: test_type_qual
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .is_volatile: true
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: b
+; CHECK-NEXT: .is_const: true
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .is_restrict: true
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: 'int addrspace(5)*'
+; CHECK-NEXT: .value_kind: pipe
+; CHECK-NEXT: .name: c
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .is_pipe: true
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a,
+ i32 addrspace(1)* %b,
+ %opencl.pipe_t addrspace(1)* %c)
+ !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51
+ !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
+ ret void
+}
+
+; CHECK: .symbol: test_access_qual.kd
+; CHECK: .name: test_access_qual
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: image1d_t
+; CHECK-NEXT: .value_kind: image
+; CHECK-NEXT: .name: ro
+; CHECK-NEXT: .access: read_only
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: image2d_t
+; CHECK-NEXT: .value_kind: image
+; CHECK-NEXT: .name: wo
+; CHECK-NEXT: .access: write_only
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: image3d_t
+; CHECK-NEXT: .value_kind: image
+; CHECK-NEXT: .name: rw
+; CHECK-NEXT: .access: read_write
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro,
+ %opencl.image2d_t addrspace(1)* %wo,
+ %opencl.image3d_t addrspace(1)* %rw)
+ !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62
+ !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_half.kd
+; CHECK: .name: test_vec_type_hint_half
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: half
+define amdgpu_kernel void @test_vec_type_hint_half(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_float.kd
+; CHECK: .name: test_vec_type_hint_float
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: float
+define amdgpu_kernel void @test_vec_type_hint_float(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_double.kd
+; CHECK: .name: test_vec_type_hint_double
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: double
+define amdgpu_kernel void @test_vec_type_hint_double(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_char.kd
+; CHECK: .name: test_vec_type_hint_char
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: char
+define amdgpu_kernel void @test_vec_type_hint_char(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_short.kd
+; CHECK: .name: test_vec_type_hint_short
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: short
+define amdgpu_kernel void @test_vec_type_hint_short(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_long.kd
+; CHECK: .name: test_vec_type_hint_long
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: long
+define amdgpu_kernel void @test_vec_type_hint_long(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
+ ret void
+}
+
+; CHECK: .symbol: test_vec_type_hint_unknown.kd
+; CHECK: .name: test_vec_type_hint_unknown
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: unknown
+define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
+ ret void
+}
+
+; CHECK: .reqd_workgroup_size:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 4
+; CHECK: .symbol: test_reqd_wgs_vec_type_hint.kd
+; CHECK: .name: test_reqd_wgs_vec_type_hint
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: int
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5
+ !reqd_work_group_size !6 {
+ ret void
+}
+
+; CHECK: .symbol: test_wgs_hint_vec_type_hint.kd
+; CHECK: .workgroup_size_hint:
+; CHECK-NEXT: - 8
+; CHECK-NEXT: - 16
+; CHECK-NEXT: - 32
+; CHECK: .name: test_wgs_hint_vec_type_hint
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: int
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK: .vec_type_hint: uint4
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+ !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7
+ !work_group_size_hint !8 {
+ ret void
+}
+
+; CHECK: .symbol: test_arg_ptr_to_ptr.kd
+; CHECK: .name: test_arg_ptr_to_ptr
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'int addrspace(5)* addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a)
+ !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
+ !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_arg_struct_contains_ptr.kd
+; CHECK: .name: test_arg_struct_contains_ptr
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: struct B
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: private
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
+ !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_arg_vector_of_ptr.kd
+; CHECK: .name: test_arg_vector_of_ptr
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'global int addrspace(5)* __attribute__((ext_vector_type(2)))'
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 16
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a)
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83
+ !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_arg_unknown_builtin_type.kd
+; CHECK: .name: test_arg_unknown_builtin_type
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: clk_event_t
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_arg_unknown_builtin_type(
+ %opencl.clk_event_t addrspace(1)* %a)
+ !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84
+ !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_pointee_align.kd
+; CHECK: .name: test_pointee_align
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: 'long addrspace(5)*'
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .type_name: 'char addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: b
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .pointee_align: 1
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .type_name: 'char2 addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: c
+; CHECK-NEXT: .offset: 12
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .pointee_align: 2
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .type_name: 'char3 addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: d
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .pointee_align: 4
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .type_name: 'char4 addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: e
+; CHECK-NEXT: .offset: 20
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .pointee_align: 4
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .type_name: 'char8 addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: f
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .pointee_align: 8
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .type_name: 'char16 addrspace(5)*'
+; CHECK-NEXT: .value_kind: dynamic_shared_pointer
+; CHECK-NEXT: .name: g
+; CHECK-NEXT: .offset: 28
+; CHECK-NEXT: .size: 4
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .pointee_align: 16
+; CHECK-NEXT: .address_space: local
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 56
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
+ i8 addrspace(3)* %b,
+ <2 x i8> addrspace(3)* %c,
+ <3 x i8> addrspace(3)* %d,
+ <4 x i8> addrspace(3)* %e,
+ <8 x i8> addrspace(3)* %f,
+ <16 x i8> addrspace(3)* %g)
+ !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
+ !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
+ ret void
+}
+
+; CHECK: .symbol: __test_block_invoke_kernel.kd
+; CHECK: .device_enqueue_symbol: __test_block_invoke_kernel_runtime_handle
+; CHECK: .name: __test_block_invoke_kernel
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: __block_literal
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 25
+; CHECK-NEXT: .value_type: struct
+; CHECK-NEXT: .name: arg
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 56
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @__test_block_invoke_kernel(
+ <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110
+ !kernel_arg_base_type !110 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: test_enqueue_kernel_caller.kd
+; CHECK: .name: test_enqueue_kernel_caller
+; CHECK: .language: OpenCL C
+; CHECK: .language_version:
+; CHECK-NEXT: - 2
+; CHECK-NEXT: - 0
+; CHECK: .args:
+; CHECK-NEXT: - .type_name: char
+; CHECK-NEXT: .value_kind: by_value
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 1
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_printf_buffer
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_default_queue
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_completion_action
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+ !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
+ !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK: .symbol: unknown_addrspace_kernarg.kd
+; CHECK: .name: unknown_addrspace_kernarg
+; CHECK: .args:
+; CHECK-NEXT: .value_kind: global_buffer
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i32
+; CHECK-NEXT: .name: ptr
+define amdgpu_kernel void @unknown_addrspace_kernarg(i32 addrspace(12345)* %ptr) #0 {
+ ret void
+}
+
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+; CHECK: amdhsa.printf:
+; CHECK-NEXT: - '1:1:4:%d\n'
+; CHECK-NEXT: - '2:1:8:%g\n'
+
+attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
+attributes #1 = { "calls-enqueue-kernel" }
+
+!llvm.printf.fmts = !{!100, !101}
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"int"}
+!4 = !{!""}
+!5 = !{i32 undef, i32 1}
+!6 = !{i32 1, i32 2, i32 4}
+!7 = !{<4 x i32> undef, i32 0}
+!8 = !{i32 8, i32 16, i32 32}
+!9 = !{!"char"}
+!10 = !{!"ushort2"}
+!11 = !{!"int3"}
+!12 = !{!"ulong4"}
+!13 = !{!"half8"}
+!14 = !{!"float16"}
+!15 = !{!"double16"}
+!16 = !{!"int addrspace(5)*"}
+!17 = !{!"image2d_t"}
+!18 = !{!"sampler_t"}
+!19 = !{!"queue_t"}
+!20 = !{!"struct A"}
+!21 = !{!"i128"}
+!22 = !{i32 0, i32 0, i32 0}
+!23 = !{!"none", !"none", !"none"}
+!24 = !{!"int", !"short2", !"char3"}
+!25 = !{!"", !"", !""}
+!26 = !{half undef, i32 1}
+!27 = !{float undef, i32 1}
+!28 = !{double undef, i32 1}
+!29 = !{i8 undef, i32 1}
+!30 = !{i16 undef, i32 1}
+!31 = !{i64 undef, i32 1}
+!32 = !{i32 addrspace(5)*undef, i32 1}
+!50 = !{i32 1, i32 2, i32 3}
+!51 = !{!"int addrspace(5)*", !"int addrspace(5)*", !"int addrspace(5)*"}
+!60 = !{i32 1, i32 1, i32 1}
+!61 = !{!"read_only", !"write_only", !"read_write"}
+!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
+!70 = !{!"volatile", !"const restrict", !"pipe"}
+!80 = !{!"int addrspace(5)* addrspace(5)*"}
+!81 = !{i32 1}
+!82 = !{!"struct B"}
+!83 = !{!"global int addrspace(5)* __attribute__((ext_vector_type(2)))"}
+!84 = !{!"clk_event_t"}
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
+!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+!93 = !{!"long addrspace(5)*", !"char addrspace(5)*", !"char2 addrspace(5)*", !"char3 addrspace(5)*", !"char4 addrspace(5)*", !"char8 addrspace(5)*", !"char16 addrspace(5)*"}
+!94 = !{!"", !"", !"", !"", !"", !"", !""}
+!100 = !{!"1:1:4:%d\5Cn"}
+!101 = !{!"2:1:8:%g\5Cn"}
+!110 = !{!"__block_literal"}
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,72 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+
+; CHECK: ---
+; CHECK: amdhsa.kernels:
+; CHECK: .symbol: test.kd
+; CHECK: .name: test
+; CHECK: .args:
+; CHECK-NEXT: - .value_kind: global_buffer
+; CHECK-NEXT: .name: r
+; CHECK-NEXT: .offset: 0
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: f16
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: global_buffer
+; CHECK-NEXT: .name: a
+; CHECK-NEXT: .offset: 8
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: f16
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: global_buffer
+; CHECK-NEXT: .name: b
+; CHECK-NEXT: .offset: 16
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: f16
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_global_offset_x
+; CHECK-NEXT: .offset: 24
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_y
+; CHECK-NEXT: .offset: 32
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_global_offset_z
+; CHECK-NEXT: .offset: 40
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i64
+; CHECK-NEXT: - .value_kind: hidden_none
+; CHECK-NEXT: .offset: 48
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_none
+; CHECK-NEXT: .offset: 56
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+; CHECK-NEXT: - .value_kind: hidden_none
+; CHECK-NEXT: .offset: 64
+; CHECK-NEXT: .size: 8
+; CHECK-NEXT: .value_type: i8
+; CHECK-NEXT: .address_space: global
+define amdgpu_kernel void @test(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+
+!opencl.ocl.version = !{!0}
+!0 = !{i32 2, i32 0}
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,95 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+
+%opencl.image1d_t = type opaque
+%opencl.image1d_array_t = type opaque
+%opencl.image1d_buffer_t = type opaque
+%opencl.image2d_t = type opaque
+%opencl.image2d_array_t = type opaque
+%opencl.image2d_array_depth_t = type opaque
+%opencl.image2d_array_msaa_t = type opaque
+%opencl.image2d_array_msaa_depth_t = type opaque
+%opencl.image2d_depth_t = type opaque
+%opencl.image2d_msaa_t = type opaque
+%opencl.image2d_msaa_depth_t = type opaque
+%opencl.image3d_t = type opaque
+
+; CHECK: ---
+; CHECK: amdhsa.kernels:
+; CHECK: .symbol: test.kd
+; CHECK: .name: test
+; CHECK: .args:
+; CHECK: - .type_name: image1d_t
+; CHECK: .value_kind: image
+; CHECK: .name: a
+; CHECK: .size: 8
+; CHECK: - .type_name: image1d_array_t
+; CHECK: .value_kind: image
+; CHECK: .name: b
+; CHECK: .size: 8
+; CHECK: - .type_name: image1d_buffer_t
+; CHECK: .value_kind: image
+; CHECK: .name: c
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_t
+; CHECK: .value_kind: image
+; CHECK: .name: d
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_array_t
+; CHECK: .value_kind: image
+; CHECK: .name: e
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_array_depth_t
+; CHECK: .value_kind: image
+; CHECK: .name: f
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_array_msaa_t
+; CHECK: .value_kind: image
+; CHECK: .name: g
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_array_msaa_depth_t
+; CHECK: .value_kind: image
+; CHECK: .name: h
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_depth_t
+; CHECK: .value_kind: image
+; CHECK: .name: i
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_msaa_t
+; CHECK: .value_kind: image
+; CHECK: .name: j
+; CHECK: .size: 8
+; CHECK: - .type_name: image2d_msaa_depth_t
+; CHECK: .value_kind: image
+; CHECK: .name: k
+; CHECK: .size: 8
+; CHECK: - .type_name: image3d_t
+; CHECK: .value_kind: image
+; CHECK: .name: l
+; CHECK: .size: 8
+define amdgpu_kernel void @test(%opencl.image1d_t addrspace(1)* %a,
+ %opencl.image1d_array_t addrspace(1)* %b,
+ %opencl.image1d_buffer_t addrspace(1)* %c,
+ %opencl.image2d_t addrspace(1)* %d,
+ %opencl.image2d_array_t addrspace(1)* %e,
+ %opencl.image2d_array_depth_t addrspace(1)* %f,
+ %opencl.image2d_array_msaa_t addrspace(1)* %g,
+ %opencl.image2d_array_msaa_depth_t addrspace(1)* %h,
+ %opencl.image2d_depth_t addrspace(1)* %i,
+ %opencl.image2d_msaa_t addrspace(1)* %j,
+ %opencl.image2d_msaa_depth_t addrspace(1)* %k,
+ %opencl.image3d_t addrspace(1)* %l)
+ !kernel_arg_type !1 !kernel_arg_base_type !1 {
+ ret void
+}
+
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+
+!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t",
+ !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t",
+ !"image2d_array_msaa_t", !"image2d_array_msaa_depth_t",
+ !"image2d_depth_t", !"image2d_msaa_t", !"image2d_msaa_depth_t",
+ !"image3d_t"}
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,11 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
+
+; Make sure llc does not crash for invalid opencl version metadata.
+
+; CHECK: ---
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+; CHECK: ...
+
+!opencl.ocl.version = !{}
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-2-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,12 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
+
+; Make sure llc does not crash for invalid opencl version metadata.
+
+; CHECK: ---
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+; CHECK: ...
+
+!opencl.ocl.version = !{!0}
+!0 = !{}
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,12 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck %s
+
+; Make sure llc does not crash for invalid opencl version metadata.
+
+; CHECK: ---
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+; CHECK: ...
+
+!opencl.ocl.version = !{!0}
+!0 = !{i32 1}
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll Wed Dec 12 11:39:27 2018
@@ -0,0 +1,146 @@
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+
+ at var = addrspace(1) global float 0.0
+
+; CHECK: ---
+; CHECK: amdhsa.kernels:
+
+; CHECK: - .max_flat_workgroup_size: 256
+; CHECK: .kernarg_segment_size: 24
+; CHECK: .private_segment_fixed_size: 0
+; CHECK: .wavefront_size: 64
+; CHECK: .symbol: test.kd
+; CHECK: .name: test
+; CHECK: .sgpr_count: 8
+; CHECK: .kernarg_segment_align: 8
+; CHECK: .vgpr_count: 6
+; CHECK: .group_segment_fixed_size: 0
+define amdgpu_kernel void @test(
+ half addrspace(1)* %r,
+ half addrspace(1)* %a,
+ half addrspace(1)* %b) {
+entry:
+ %a.val = load half, half addrspace(1)* %a
+ %b.val = load half, half addrspace(1)* %b
+ %r.val = fadd half %a.val, %b.val
+ store half %r.val, half addrspace(1)* %r
+ ret void
+}
+
+; CHECK: .symbol: num_spilled_sgprs.kd
+; CHECK: .name: num_spilled_sgprs
+; GFX700: .sgpr_spill_count: 40
+; GFX803: .sgpr_spill_count: 24
+; GFX900: .sgpr_spill_count: 24
+define amdgpu_kernel void @num_spilled_sgprs(
+ i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32],
+ i32 addrspace(1)* %out2, i32 addrspace(1)* %out3, [8 x i32],
+ i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, [8 x i32],
+ i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, [8 x i32],
+ i32 addrspace(1)* %out8, i32 addrspace(1)* %out9, [8 x i32],
+ i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, [8 x i32],
+ i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, [8 x i32],
+ i32 addrspace(1)* %oute, i32 addrspace(1)* %outf, [8 x i32],
+ i32 %in0, i32 %in1, i32 %in2, i32 %in3, [8 x i32],
+ i32 %in4, i32 %in5, i32 %in6, i32 %in7, [8 x i32],
+ i32 %in8, i32 %in9, i32 %ina, i32 %inb, [8 x i32],
+ i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
+entry:
+ store i32 %in0, i32 addrspace(1)* %out0
+ store i32 %in1, i32 addrspace(1)* %out1
+ store i32 %in2, i32 addrspace(1)* %out2
+ store i32 %in3, i32 addrspace(1)* %out3
+ store i32 %in4, i32 addrspace(1)* %out4
+ store i32 %in5, i32 addrspace(1)* %out5
+ store i32 %in6, i32 addrspace(1)* %out6
+ store i32 %in7, i32 addrspace(1)* %out7
+ store i32 %in8, i32 addrspace(1)* %out8
+ store i32 %in9, i32 addrspace(1)* %out9
+ store i32 %ina, i32 addrspace(1)* %outa
+ store i32 %inb, i32 addrspace(1)* %outb
+ store i32 %inc, i32 addrspace(1)* %outc
+ store i32 %ind, i32 addrspace(1)* %outd
+ store i32 %ine, i32 addrspace(1)* %oute
+ store i32 %inf, i32 addrspace(1)* %outf
+ ret void
+}
+
+; CHECK: .symbol: num_spilled_vgprs.kd
+; CHECK: .name: num_spilled_vgprs
+; CHECK: .vgpr_spill_count: 14
+define amdgpu_kernel void @num_spilled_vgprs() #1 {
+ %val0 = load volatile float, float addrspace(1)* @var
+ %val1 = load volatile float, float addrspace(1)* @var
+ %val2 = load volatile float, float addrspace(1)* @var
+ %val3 = load volatile float, float addrspace(1)* @var
+ %val4 = load volatile float, float addrspace(1)* @var
+ %val5 = load volatile float, float addrspace(1)* @var
+ %val6 = load volatile float, float addrspace(1)* @var
+ %val7 = load volatile float, float addrspace(1)* @var
+ %val8 = load volatile float, float addrspace(1)* @var
+ %val9 = load volatile float, float addrspace(1)* @var
+ %val10 = load volatile float, float addrspace(1)* @var
+ %val11 = load volatile float, float addrspace(1)* @var
+ %val12 = load volatile float, float addrspace(1)* @var
+ %val13 = load volatile float, float addrspace(1)* @var
+ %val14 = load volatile float, float addrspace(1)* @var
+ %val15 = load volatile float, float addrspace(1)* @var
+ %val16 = load volatile float, float addrspace(1)* @var
+ %val17 = load volatile float, float addrspace(1)* @var
+ %val18 = load volatile float, float addrspace(1)* @var
+ %val19 = load volatile float, float addrspace(1)* @var
+ %val20 = load volatile float, float addrspace(1)* @var
+ %val21 = load volatile float, float addrspace(1)* @var
+ %val22 = load volatile float, float addrspace(1)* @var
+ %val23 = load volatile float, float addrspace(1)* @var
+ %val24 = load volatile float, float addrspace(1)* @var
+ %val25 = load volatile float, float addrspace(1)* @var
+ %val26 = load volatile float, float addrspace(1)* @var
+ %val27 = load volatile float, float addrspace(1)* @var
+ %val28 = load volatile float, float addrspace(1)* @var
+ %val29 = load volatile float, float addrspace(1)* @var
+ %val30 = load volatile float, float addrspace(1)* @var
+
+ store volatile float %val0, float addrspace(1)* @var
+ store volatile float %val1, float addrspace(1)* @var
+ store volatile float %val2, float addrspace(1)* @var
+ store volatile float %val3, float addrspace(1)* @var
+ store volatile float %val4, float addrspace(1)* @var
+ store volatile float %val5, float addrspace(1)* @var
+ store volatile float %val6, float addrspace(1)* @var
+ store volatile float %val7, float addrspace(1)* @var
+ store volatile float %val8, float addrspace(1)* @var
+ store volatile float %val9, float addrspace(1)* @var
+ store volatile float %val10, float addrspace(1)* @var
+ store volatile float %val11, float addrspace(1)* @var
+ store volatile float %val12, float addrspace(1)* @var
+ store volatile float %val13, float addrspace(1)* @var
+ store volatile float %val14, float addrspace(1)* @var
+ store volatile float %val15, float addrspace(1)* @var
+ store volatile float %val16, float addrspace(1)* @var
+ store volatile float %val17, float addrspace(1)* @var
+ store volatile float %val18, float addrspace(1)* @var
+ store volatile float %val19, float addrspace(1)* @var
+ store volatile float %val20, float addrspace(1)* @var
+ store volatile float %val21, float addrspace(1)* @var
+ store volatile float %val22, float addrspace(1)* @var
+ store volatile float %val23, float addrspace(1)* @var
+ store volatile float %val24, float addrspace(1)* @var
+ store volatile float %val25, float addrspace(1)* @var
+ store volatile float %val26, float addrspace(1)* @var
+ store volatile float %val27, float addrspace(1)* @var
+ store volatile float %val28, float addrspace(1)* @var
+ store volatile float %val29, float addrspace(1)* @var
+ store volatile float %val30, float addrspace(1)* @var
+
+ ret void
+}
+
+; CHECK: amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+
+attributes #0 = { "amdgpu-num-sgpr"="14" }
+attributes #1 = { "amdgpu-num-vgpr"="20" }
Added: llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-args-v3.s Wed Dec 12 11:39:27 2018
@@ -0,0 +1,96 @@
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
+
+// CHECK: .amdgpu_metadata
+// CHECK: amdhsa.kernels:
+// CHECK-NEXT: - .max_flat_workgroup_size: 256
+// CHECK-NEXT: .wavefront_size: 128
+// CHECK-NEXT: .symbol: 'test_kernel at kd'
+// CHECK-NEXT: .kernarg_segment_size: 8
+// CHECK-NEXT: .private_segment_fixed_size: 32
+// CHECK-NEXT: .name: test_kernel
+// CHECK-NEXT: .language: OpenCL C
+// CHECK-NEXT: .sgpr_count: 14
+// CHECK-NEXT: .kernarg_segment_align: 64
+// CHECK-NEXT: .vgpr_count: 40
+// CHECK-NEXT: .group_segment_fixed_size: 16
+// CHECK-NEXT: .language_version:
+// CHECK-NEXT: - 2
+// CHECK-NEXT: - 0
+// CHECK-NEXT: .args:
+// CHECK-NEXT: - .type_name: char
+// CHECK-NEXT: .value_kind: by_value
+// CHECK-NEXT: .offset: 1
+// CHECK-NEXT: .size: 1
+// CHECK-NEXT: .value_type: i8
+// CHECK-NEXT: - .value_kind: hidden_global_offset_x
+// CHECK-NEXT: .offset: 8
+// CHECK-NEXT: .size: 8
+// CHECK-NEXT: .value_type: i64
+// CHECK-NEXT: - .value_kind: hidden_global_offset_y
+// CHECK-NEXT: .offset: 8
+// CHECK-NEXT: .size: 8
+// CHECK-NEXT: .value_type: i64
+// CHECK-NEXT: - .value_kind: hidden_global_offset_z
+// CHECK-NEXT: .offset: 8
+// CHECK-NEXT: .size: 8
+// CHECK-NEXT: .value_type: i64
+// CHECK-NEXT: - .value_kind: hidden_printf_buffer
+// CHECK-NEXT: .offset: 8
+// CHECK-NEXT: .size: 8
+// CHECK-NEXT: .value_type: i8
+// CHECK-NEXT: .address_space: global
+// CHECK: amdhsa.version:
+// CHECK-NEXT: - 1
+// CHECK-NEXT: - 0
+// CHECK: amdhsa.printf:
+// CHECK-NEXT: - '1:1:4:%d\n'
+// CHECK-NEXt: - '2:1:8:%g\n'
+// CHECK: .end_amdgpu_metadata
+.amdgpu_metadata
+ amdhsa.version:
+ - 1
+ - 0
+ amdhsa.printf:
+ - '1:1:4:%d\n'
+ - '2:1:8:%g\n'
+ amdhsa.kernels:
+ - .name: test_kernel
+ .symbol: test_kernel at kd
+ .language: OpenCL C
+ .language_version:
+ - 2
+ - 0
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+ .args:
+ - .type_name: char
+ .size: 1
+ .offset: 1
+ .value_kind: by_value
+ .value_type: i8
+ - .size: 8
+ .offset: 8
+ .value_kind: hidden_global_offset_x
+ .value_type: i64
+ - .size: 8
+ .offset: 8
+ .value_kind: hidden_global_offset_y
+ .value_type: i64
+ - .size: 8
+ .offset: 8
+ .value_kind: hidden_global_offset_z
+ .value_type: i64
+ - .size: 8
+ .offset: 8
+ .value_kind: hidden_printf_buffer
+ .value_type: i8
+ .address_space: global
+.end_amdgpu_metadata
Added: llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-attrs-v3.s Wed Dec 12 11:39:27 2018
@@ -0,0 +1,67 @@
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
+
+// CHECK: .amdgpu_metadata
+// CHECK: amdhsa.kernels:
+// CHECK: - .max_flat_workgroup_size: 256
+// CHECK: .wavefront_size: 128
+// CHECK: .symbol: 'test_kernel at kd'
+// CHECK: .reqd_workgroup_size:
+// CHECK-NEXT: - 1
+// CHECK-NEXT: - 2
+// CHECK-NEXT: - 4
+// CHECK: .kernarg_segment_size: 8
+// CHECK: .private_segment_fixed_size: 32
+// CHECK: .workgroup_size_hint:
+// CHECK-NEXT: - 8
+// CHECK-NEXT: - 16
+// CHECK-NEXT: - 32
+// CHECK: .name: test_kernel
+// CHECK: .language: OpenCL C
+// CHECK: .sgpr_count: 14
+// CHECK: .kernarg_segment_align: 64
+// CHECK: .vgpr_count: 40
+// CHECK: .language_version:
+// CHECK-NEXT: - 2
+// CHECK-NEXT: - 0
+// CHECK: .vec_type_hint: int
+// CHECK: amdhsa.version:
+// CHECK-NEXT: - 1
+// CHECK-NEXT: - 0
+// CHECK: amdhsa.printf:
+// CHECK: - '1:1:4:%d\n'
+// CHECK: - '2:1:8:%g\n'
+// CHECK: .end_amdgpu_metadata
+.amdgpu_metadata
+ amdhsa.version:
+ - 1
+ - 0
+ amdhsa.printf:
+ - '1:1:4:%d\n'
+ - '2:1:8:%g\n'
+ amdhsa.kernels:
+ - .name: test_kernel
+ .symbol: test_kernel at kd
+ .language: OpenCL C
+ .language_version:
+ - 2
+ - 0
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+ .reqd_workgroup_size:
+ - 1
+ - 2
+ - 4
+ .workgroup_size_hint:
+ - 8
+ - 16
+ - 32
+ .vec_type_hint: int
+.end_amdgpu_metadata
Added: llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s?rev=348963&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-metadata-kernel-code-props-v3.s Wed Dec 12 11:39:27 2018
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
+
+// CHECK: .amdgpu_metadata
+// CHECK: amdhsa.kernels:
+// CHECK: - .sgpr_count: 40
+// CHECK: .max_flat_workgroup_size: 256
+// CHECK: .symbol: 'test_kernel at kd'
+// CHECK: .kernarg_segment_size: 24
+// CHECK: .group_segment_fixed_size: 24
+// CHECK: .private_segment_fixed_size: 16
+// CHECK: .vgpr_count: 14
+// CHECK: .vgpr_spill_count: 1
+// CHECK: .kernarg_segment_align: 16
+// CHECK: .sgpr_spill_count: 1
+// CHECK: .wavefront_size: 64
+// CHECK: .name: test_kernel
+// CHECK: amdhsa.version:
+// CHECK-NEXT: - 1
+// CHECK-NEXT: - 0
+.amdgpu_metadata
+ amdhsa.version:
+ - 1
+ - 0
+ amdhsa.printf:
+ - '1:1:4:%d\n'
+ - '2:1:8:%g\n'
+ amdhsa.kernels:
+ - .name: test_kernel
+ .symbol: test_kernel at kd
+ .kernarg_segment_size: 24
+ .group_segment_fixed_size: 24
+ .private_segment_fixed_size: 16
+ .kernarg_segment_align: 16
+ .wavefront_size: 64
+ .max_flat_workgroup_size: 256
+ .sgpr_count: 40
+ .vgpr_count: 14
+ .sgpr_spill_count: 1
+ .vgpr_spill_count: 1
+.end_amdgpu_metadata
Modified: llvm/trunk/test/MC/AMDGPU/hsa-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-v3.s?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-v3.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa-v3.s Wed Dec 12 11:39:27 2018
@@ -213,3 +213,59 @@ v_mov_b32_e32 v16, s3
// ASM: .byte 17
.byte .amdgcn.next_free_sgpr
// ASM: .byte 4
+
+// Metadata
+
+.amdgpu_metadata
+ amdhsa.version:
+ - 3
+ - 0
+ amdhsa.kernels:
+ - .name: amd_kernel_code_t_test_all
+ .symbol: amd_kernel_code_t_test_all at kd
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+ - .name: amd_kernel_code_t_minimal
+ .symbol: amd_kernel_code_t_minimal at kd
+ .kernarg_segment_size: 8
+ .group_segment_fixed_size: 16
+ .private_segment_fixed_size: 32
+ .kernarg_segment_align: 64
+ .wavefront_size: 128
+ .sgpr_count: 14
+ .vgpr_count: 40
+ .max_flat_workgroup_size: 256
+.end_amdgpu_metadata
+
+// ASM: .amdgpu_metadata
+// ASM: amdhsa.kernels:
+// ASM: - .sgpr_count: 14
+// ASM: .max_flat_workgroup_size: 256
+// ASM: .symbol: 'amd_kernel_code_t_test_all at kd'
+// ASM: .kernarg_segment_size: 8
+// ASM: .group_segment_fixed_size: 16
+// ASM: .private_segment_fixed_size: 32
+// ASM: .vgpr_count: 40
+// ASM: .kernarg_segment_align: 64
+// ASM: .wavefront_size: 128
+// ASM: .name: amd_kernel_code_t_test_all
+// ASM: - .sgpr_count: 14
+// ASM: .max_flat_workgroup_size: 256
+// ASM: .symbol: 'amd_kernel_code_t_minimal at kd'
+// ASM: .kernarg_segment_size: 8
+// ASM: .group_segment_fixed_size: 16
+// ASM: .private_segment_fixed_size: 32
+// ASM: .vgpr_count: 40
+// ASM: .kernarg_segment_align: 64
+// ASM: .wavefront_size: 128
+// ASM: .name: amd_kernel_code_t_minimal
+// ASM: amdhsa.version:
+// ASM-NEXT: - 3
+// ASM-NEXT: - 0
+// ASM: .end_amdgpu_metadata
Modified: llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/ELFDumper.cpp?rev=348963&r1=348962&r2=348963&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/ELFDumper.cpp (original)
+++ llvm/trunk/tools/llvm-readobj/ELFDumper.cpp Wed Dec 12 11:39:27 2018
@@ -28,6 +28,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -3628,7 +3629,7 @@ static std::string getFreeBSDNoteTypeNam
return OS.str();
}
-static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
+static std::string getAMDNoteTypeName(const uint32_t NT) {
static const struct {
uint32_t ID;
const char *Name;
@@ -3651,6 +3652,16 @@ static std::string getAMDGPUNoteTypeName
return OS.str();
}
+static std::string getAMDGPUNoteTypeName(const uint32_t NT) {
+ if (NT == ELF::NT_AMDGPU_METADATA)
+ return std::string("NT_AMDGPU_METADATA (AMDGPU Metadata)");
+
+ std::string string;
+ raw_string_ostream OS(string);
+ OS << format("Unknown note type (0x%08x)", NT);
+ return OS.str();
+}
+
template <typename ELFT>
static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
ArrayRef<uint8_t> Data) {
@@ -3808,14 +3819,13 @@ static void printGNUNote(raw_ostream &OS
OS << '\n';
}
-struct AMDGPUNote {
- std::string type;
- std::string value;
+struct AMDNote {
+ std::string Type;
+ std::string Value;
};
template <typename ELFT>
-static AMDGPUNote getAMDGPUNote(uint32_t NoteType,
- ArrayRef<uint8_t> Desc) {
+static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return {"", ""};
@@ -3841,6 +3851,41 @@ static AMDGPUNote getAMDGPUNote(uint32_t
}
}
+struct AMDGPUNote {
+ std::string Type;
+ std::string Value;
+};
+
+template <typename ELFT>
+static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
+ switch (NoteType) {
+ default:
+ return {"", ""};
+ case ELF::NT_AMDGPU_METADATA:
+ auto MsgPackString =
+ StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+ msgpack::Reader MsgPackReader(MsgPackString);
+ auto OptMsgPackNodeOrErr = msgpack::Node::read(MsgPackReader);
+ if (errorToBool(OptMsgPackNodeOrErr.takeError()))
+ return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+ auto &OptMsgPackNode = *OptMsgPackNodeOrErr;
+ if (!OptMsgPackNode)
+ return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+ auto &MsgPackNode = *OptMsgPackNode;
+
+ AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ if (!Verifier.verify(*MsgPackNode))
+ return {"AMDGPU Metadata", "Invalid AMDGPU Metadata"};
+
+ std::string HSAMetadataString;
+ raw_string_ostream StrOS(HSAMetadataString);
+ yaml::Output YOut(StrOS);
+ YOut << MsgPackNode;
+
+ return {"AMDGPU Metadata", StrOS.str()};
+ }
+}
+
template <class ELFT>
void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
const Elf_Ehdr *e = Obj->getHeader();
@@ -3867,10 +3912,15 @@ void GNUStyle<ELFT>::printNotes(const EL
} else if (Name == "FreeBSD") {
OS << getFreeBSDNoteTypeName(Type) << '\n';
} else if (Name == "AMD") {
+ OS << getAMDNoteTypeName(Type) << '\n';
+ const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ OS << " " << N.Type << ":\n " << N.Value << '\n';
+ } else if (Name == "AMDGPU") {
OS << getAMDGPUNoteTypeName(Type) << '\n';
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
- if (!N.type.empty())
- OS << " " << N.type << ":\n " << N.value << '\n';
+ if (!N.Type.empty())
+ OS << " " << N.Type << ":\n " << N.Value << '\n';
} else {
OS << "Unknown note type: (" << format_hex(Type, 10) << ')';
}
@@ -4533,10 +4583,15 @@ void LLVMStyle<ELFT>::printNotes(const E
} else if (Name == "FreeBSD") {
W.printString("Type", getFreeBSDNoteTypeName(Type));
} else if (Name == "AMD") {
+ W.printString("Type", getAMDNoteTypeName(Type));
+ const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
+ if (!N.Type.empty())
+ W.printString(N.Type, N.Value);
+ } else if (Name == "AMDGPU") {
W.printString("Type", getAMDGPUNoteTypeName(Type));
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
- if (!N.type.empty())
- W.printString(N.type, N.value);
+ if (!N.Type.empty())
+ W.printString(N.Type, N.Value);
} else {
W.getOStream() << "Unknown note type: (" << format_hex(Type, 10) << ')';
}
More information about the llvm-commits
mailing list