[llvm] r289674 - AMDGPU: Emit runtime metadata version 2 as YAML

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 14 09:16:52 PST 2016


Author: yaxunl
Date: Wed Dec 14 11:16:52 2016
New Revision: 289674

URL: http://llvm.org/viewvc/llvm-project?rev=289674&view=rev
Log:
AMDGPU: Emit runtime metadata version 2 as YAML

Differential Revision: https://reviews.llvm.org/D25046

Added:
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
    llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
    llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
    llvm/trunk/tools/llvm-readobj/ObjDumper.h
    llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Dec 14 11:16:52 2016
@@ -119,7 +119,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
                                     "AMD", "AMDGPU");
 
   // Emit runtime metadata.
-  TS->emitRuntimeMetadataAsNoteElement(M);
+  TS->emitRuntimeMetadata(M);
 }
 
 bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -824,4 +824,3 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(c
                    *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
   return false;
 }
-

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Wed Dec 14 11:16:52 2016
@@ -14,17 +14,12 @@
 /// Runtime requests certain information (metadata) about kernels to be able
 /// to execute the kernels and answer the queries about the kernels.
 /// The metadata is represented as a note element in the .note ELF section of a
-/// binary (code object). The desc field of the note element consists of
-/// key-value pairs. Each key is an 8 bit unsigned integer. Each value can be
-/// an integer, a string, or a stream of key-value pairs. There are 3 levels of
-/// key-value pair streams. At the beginning of the ELF section is the top level
-/// key-value pair stream. A kernel-level key-value pair stream starts after
-/// encountering KeyKernelBegin and ends immediately before encountering
-/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
-/// after encountering KeyArgBegin and ends immediately before encountering
-/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
-/// level key-value pair stream. A kernel-argument-level key-value pair stream
-/// can only appear in a kernel-level key-value pair stream.
+/// binary (code object). The desc field of the note element is a YAML string
+/// consisting of key-value pairs. Each key is a string. Each value can be
+/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
+/// At the beginning of the YAML string is the module level YAML map. A
+/// kernel-level YAML map is in the amd.Kernels sequence. A
+/// kernel-argument-level map is in the amd.Args sequence.
 ///
 /// The format should be kept backward compatible. New enum values and bit
 /// fields should be appended at the end. It is suggested to bump up the
@@ -37,64 +32,46 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
 
+#include <cstdint>
+#include <vector>
+#include <string>
+
 namespace AMDGPU {
 
 namespace RuntimeMD {
 
   // Version and revision of runtime metadata
-  const unsigned char MDVersion   = 1;
+  const unsigned char MDVersion   = 2;
   const unsigned char MDRevision  = 0;
 
-  // Enumeration values of keys in runtime metadata.
-  enum Key {
-    KeyNull                     = 0, // Place holder. Ignored when encountered
-    KeyMDVersion                = 1, // Runtime metadata version
-    KeyLanguage                 = 2, // Language
-    KeyLanguageVersion          = 3, // Language version
-    KeyKernelBegin              = 4, // Beginning of kernel-level stream
-    KeyKernelEnd                = 5, // End of kernel-level stream
-    KeyKernelName               = 6, // Kernel name
-    KeyArgBegin                 = 7, // Beginning of kernel-arg-level stream
-    KeyArgEnd                   = 8, // End of kernel-arg-level stream
-    KeyArgSize                  = 9, // Kernel arg size
-    KeyArgAlign                 = 10, // Kernel arg alignment
-    KeyArgTypeName              = 11, // Kernel type name
-    KeyArgName                  = 12, // Kernel name
-    KeyArgKind                  = 13, // Kernel argument kind
-    KeyArgValueType             = 14, // Kernel argument value type
-    KeyArgAddrQual              = 15, // Kernel argument address qualifier
-    KeyArgAccQual               = 16, // Kernel argument access qualifier
-    KeyArgIsConst               = 17, // Kernel argument is const qualified
-    KeyArgIsRestrict            = 18, // Kernel argument is restrict qualified
-    KeyArgIsVolatile            = 19, // Kernel argument is volatile qualified
-    KeyArgIsPipe                = 20, // Kernel argument is pipe qualified
-    KeyReqdWorkGroupSize        = 21, // Required work group size
-    KeyWorkGroupSizeHint        = 22, // Work group size hint
-    KeyVecTypeHint              = 23, // Vector type hint
-    KeyKernelIndex              = 24, // Kernel index for device enqueue
-    KeyMinWavesPerSIMD          = 25, // Minimum number of waves per SIMD
-    KeyMaxWavesPerSIMD          = 26, // Maximum number of waves per SIMD
-    KeyFlatWorkGroupSizeLimits  = 27, // Flat work group size limits
-    KeyMaxWorkGroupSize         = 28, // Maximum work group size
-    KeyNoPartialWorkGroups      = 29, // No partial work groups
-    KeyPrintfInfo               = 30, // Prinf function call information
-    KeyArgActualAcc             = 31, // The actual kernel argument access qualifier
-    KeyArgPointeeAlign          = 32, // Alignment of pointee type
-  };
-
-  enum Language : uint8_t {
-    OpenCL_C      = 0,
-    HCC           = 1,
-    OpenMP        = 2,
-    OpenCL_CPP    = 3,
-};
-
-  enum LanguageVersion : uint16_t {
-    V100          = 100,
-    V110          = 110,
-    V120          = 120,
-    V200          = 200,
-    V210          = 210,
+  // Name of keys for runtime metadata.
+  namespace KeyName {
+    const char MDVersion[]                = "amd.MDVersion";            // Runtime metadata version
+    const char Language[]                 = "amd.Language";             // Language
+    const char LanguageVersion[]          = "amd.LanguageVersion";      // Language version
+    const char Kernels[]                  = "amd.Kernels";              // Kernels
+    const char KernelName[]               = "amd.KernelName";           // Kernel name
+    const char Args[]                     = "amd.Args";                 // Kernel arguments
+    const char ArgSize[]                  = "amd.ArgSize";              // Kernel arg size
+    const char ArgAlign[]                 = "amd.ArgAlign";             // Kernel arg alignment
+    const char ArgTypeName[]              = "amd.ArgTypeName";          // Kernel type name
+    const char ArgName[]                  = "amd.ArgName";              // Kernel name
+    const char ArgKind[]                  = "amd.ArgKind";              // Kernel argument kind
+    const char ArgValueType[]             = "amd.ArgValueType";         // Kernel argument value type
+    const char ArgAddrQual[]              = "amd.ArgAddrQual";          // Kernel argument address qualifier
+    const char ArgAccQual[]               = "amd.ArgAccQual";           // Kernel argument access qualifier
+    const char ArgIsConst[]               = "amd.ArgIsConst";           // Kernel argument is const qualified
+    const char ArgIsRestrict[]            = "amd.ArgIsRestrict";        // Kernel argument is restrict qualified
+    const char ArgIsVolatile[]            = "amd.ArgIsVolatile";        // Kernel argument is volatile qualified
+    const char ArgIsPipe[]                = "amd.ArgIsPipe";            // Kernel argument is pipe qualified
+    const char ReqdWorkGroupSize[]        = "amd.ReqdWorkGroupSize";    // Required work group size
+    const char WorkGroupSizeHint[]        = "amd.WorkGroupSizeHint";    // Work group size hint
+    const char VecTypeHint[]              = "amd.VecTypeHint";          // Vector type hint
+    const char KernelIndex[]              = "amd.KernelIndex";          // Kernel index for device enqueue
+    const char NoPartialWorkGroups[]      = "amd.NoPartialWorkGroups";  // No partial work groups
+    const char PrintfInfo[]               = "amd.PrintfInfo";           // Prinf function call information
+    const char ArgActualAcc[]             = "amd.ArgActualAcc";         // The actual kernel argument access qualifier
+    const char ArgPointeeAlign[]          = "amd.ArgPointeeAlign";      // Alignment of pointee type
   };
 
   namespace KernelArg {
@@ -130,8 +107,9 @@ namespace RuntimeMD {
       F64     = 11,
     };
 
+    // Avoid using 'None' since it conflicts with a macro in X11 header file.
     enum AccessQualifer : uint8_t {
-      None       = 0,
+      AccNone    = 0,
       ReadOnly   = 1,
       WriteOnly  = 2,
       ReadWrite  = 3,
@@ -146,6 +124,69 @@ namespace RuntimeMD {
       Region     = 5,
     };
   } // namespace KernelArg
+
+  // Invalid values are used to indicate an optional key should not be emitted.
+  const uint8_t INVALID_ADDR_QUAL     = 0xff;
+  const uint8_t INVALID_ACC_QUAL      = 0xff;
+  const uint32_t INVALID_KERNEL_INDEX = ~0U;
+
+  namespace KernelArg {
+    // In-memory representation of kernel argument information.
+    struct Metadata {
+      uint32_t Size;
+      uint32_t Align;
+      uint32_t PointeeAlign;
+      uint8_t Kind;
+      uint16_t ValueType;
+      std::string TypeName;
+      std::string Name;
+      uint8_t AddrQual;
+      uint8_t AccQual;
+      uint8_t IsVolatile;
+      uint8_t IsConst;
+      uint8_t IsRestrict;
+      uint8_t IsPipe;
+      Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0),
+          AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0),
+          IsConst(0), IsRestrict(0), IsPipe(0) {}
+    };
+  }
+
+  namespace Kernel {
+    // In-memory representation of kernel information.
+    struct Metadata {
+      std::string Name;
+      std::string Language;
+      std::vector<uint8_t> LanguageVersion;
+      std::vector<uint32_t> ReqdWorkGroupSize;
+      std::vector<uint32_t> WorkGroupSizeHint;
+      std::string VecTypeHint;
+      uint32_t KernelIndex;
+      uint8_t NoPartialWorkGroups;
+      std::vector<KernelArg::Metadata> Args;
+      Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {}
+    };
+  }
+
+  namespace Program {
+    // In-memory representation of program information.
+    struct Metadata {
+      std::vector<uint8_t> MDVersionSeq;
+      std::vector<std::string> PrintfInfo;
+      std::vector<Kernel::Metadata> Kernels;
+
+      explicit Metadata(){}
+
+      // Construct from an YAML string.
+      explicit Metadata(const std::string &YAML);
+
+      // Convert to YAML string.
+      std::string toYAML();
+
+      // Convert from YAML string.
+      static Metadata fromYAML(const std::string &S);
+    };
+  }
 } // namespace RuntimeMD
 } // namespace AMDGPU
 

Added: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp?rev=289674&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp Wed Dec 14 11:16:52 2016
@@ -0,0 +1,408 @@
+//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Generates AMDGPU runtime metadata for YAML mapping.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPURuntimeMetadata.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+#include "AMDGPURuntimeMD.h"
+
+using namespace llvm;
+using namespace ::AMDGPU::RuntimeMD;
+
+static cl::opt<bool>
+DumpRuntimeMD("amdgpu-dump-rtmd",
+              cl::desc("Dump AMDGPU runtime metadata"));
+
+static cl::opt<bool>
+CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
+                     cl::desc("Check AMDGPU runtime metadata YAML parser"));
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<KernelArg::Metadata> {
+  static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
+    YamlIO.mapRequired(KeyName::ArgSize, A.Size);
+    YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
+    YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
+    YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
+    YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
+    YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
+    YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
+    YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
+    YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
+    YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
+    YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
+    YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
+    YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
+  }
+  static const bool flow = true;
+};
+
+template <> struct MappingTraits<Kernel::Metadata> {
+  static void mapping(IO &YamlIO, Kernel::Metadata &K) {
+    YamlIO.mapRequired(KeyName::KernelName, K.Name);
+    YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
+    YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
+    YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
+    YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
+    YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
+    YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
+        INVALID_KERNEL_INDEX);
+    YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
+        uint8_t(0));
+    YamlIO.mapRequired(KeyName::Args, K.Args);
+  }
+  static const bool flow = true;
+};
+
+template <> struct MappingTraits<Program::Metadata> {
+  static void mapping(IO &YamlIO, Program::Metadata &Prog) {
+    YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
+    YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
+    YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
+  }
+  static const bool flow = true;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+// Get a vector of three integer values from MDNode \p Node;
+static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
+  assert(Node->getNumOperands() == 3);
+  std::vector<uint32_t> V;
+  for (const MDOperand &Op : Node->operands()) {
+    const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
+    V.push_back(CI->getZExtValue());
+  }
+  return V;
+}
+
+static std::string getOCLTypeName(Type *Ty, bool Signed) {
+  switch (Ty->getTypeID()) {
+  case Type::HalfTyID:
+    return "half";
+  case Type::FloatTyID:
+    return "float";
+  case Type::DoubleTyID:
+    return "double";
+  case Type::IntegerTyID: {
+    if (!Signed)
+      return (Twine('u') + getOCLTypeName(Ty, true)).str();
+    unsigned BW = Ty->getIntegerBitWidth();
+    switch (BW) {
+    case 8:
+      return "char";
+    case 16:
+      return "short";
+    case 32:
+      return "int";
+    case 64:
+      return "long";
+    default:
+      return (Twine('i') + Twine(BW)).str();
+    }
+  }
+  case Type::VectorTyID: {
+    VectorType *VecTy = cast<VectorType>(Ty);
+    Type *EleTy = VecTy->getElementType();
+    unsigned Size = VecTy->getVectorNumElements();
+    return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
+  }
+  default:
+    return "unknown";
+  }
+}
+
+static KernelArg::ValueType getRuntimeMDValueType(
+  Type *Ty, StringRef TypeName) {
+  switch (Ty->getTypeID()) {
+  case Type::HalfTyID:
+    return KernelArg::F16;
+  case Type::FloatTyID:
+    return KernelArg::F32;
+  case Type::DoubleTyID:
+    return KernelArg::F64;
+  case Type::IntegerTyID: {
+    bool Signed = !TypeName.startswith("u");
+    switch (Ty->getIntegerBitWidth()) {
+    case 8:
+      return Signed ? KernelArg::I8 : KernelArg::U8;
+    case 16:
+      return Signed ? KernelArg::I16 : KernelArg::U16;
+    case 32:
+      return Signed ? KernelArg::I32 : KernelArg::U32;
+    case 64:
+      return Signed ? KernelArg::I64 : KernelArg::U64;
+    default:
+      // Runtime does not recognize other integer types. Report as struct type.
+      return KernelArg::Struct;
+    }
+  }
+  case Type::VectorTyID:
+    return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
+  case Type::PointerTyID:
+    return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
+  default:
+    return KernelArg::Struct;
+  }
+}
+
+static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
+    AMDGPUAS::AddressSpaces A) {
+  switch (A) {
+  case AMDGPUAS::GLOBAL_ADDRESS:
+    return KernelArg::Global;
+  case AMDGPUAS::CONSTANT_ADDRESS:
+    return KernelArg::Constant;
+  case AMDGPUAS::LOCAL_ADDRESS:
+    return KernelArg::Local;
+  case AMDGPUAS::FLAT_ADDRESS:
+    return KernelArg::Generic;
+  case AMDGPUAS::REGION_ADDRESS:
+    return KernelArg::Region;
+  default:
+    return KernelArg::Private;
+  }
+}
+
+static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
+    Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
+    StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
+    StringRef AccQual = "") {
+
+  KernelArg::Metadata Arg;
+
+  // Set ArgSize and ArgAlign.
+  Arg.Size = DL.getTypeAllocSize(T);
+  Arg.Align = DL.getABITypeAlignment(T);
+  if (auto PT = dyn_cast<PointerType>(T)) {
+    auto ET = PT->getElementType();
+    if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
+      Arg.PointeeAlign = DL.getABITypeAlignment(ET);
+  }
+
+  // Set ArgTypeName.
+  Arg.TypeName = TypeName;
+
+  // Set ArgName.
+  Arg.Name = ArgName;
+
+  // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
+  SmallVector<StringRef, 1> SplitQ;
+  TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
+
+  for (StringRef KeyName : SplitQ) {
+    auto *P = StringSwitch<uint8_t *>(KeyName)
+      .Case("volatile", &Arg.IsVolatile)
+      .Case("restrict", &Arg.IsRestrict)
+      .Case("const",    &Arg.IsConst)
+      .Case("pipe",     &Arg.IsPipe)
+      .Default(nullptr);
+    if (P)
+      *P = 1;
+  }
+
+  // Set ArgKind.
+  Arg.Kind = Kind;
+
+  // Set ArgValueType.
+  Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
+
+  // Set ArgAccQual.
+  if (!AccQual.empty()) {
+    Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
+      .Case("read_only",  KernelArg::ReadOnly)
+      .Case("write_only", KernelArg::WriteOnly)
+      .Case("read_write", KernelArg::ReadWrite)
+      .Default(KernelArg::AccNone);
+  }
+
+  // Set ArgAddrQual.
+  if (auto *PT = dyn_cast<PointerType>(T)) {
+    Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
+        PT->getAddressSpace()));
+  }
+
+  return Arg;
+}
+
+static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
+  Kernel::Metadata Kernel;
+  Kernel.Name = F.getName();
+  auto &M = *F.getParent();
+
+  // Set Language and LanguageVersion.
+  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
+    if (MD->getNumOperands() != 0) {
+      auto Node = MD->getOperand(0);
+      if (Node->getNumOperands() > 1) {
+        Kernel.Language = "OpenCL C";
+        uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
+                         ->getZExtValue();
+        uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
+                         ->getZExtValue();
+        Kernel.LanguageVersion.push_back(Major);
+        Kernel.LanguageVersion.push_back(Minor);
+      }
+    }
+  }
+
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  for (auto &Arg : F.args()) {
+    unsigned I = Arg.getArgNo();
+    Type *T = Arg.getType();
+    auto TypeName = dyn_cast<MDString>(F.getMetadata(
+        "kernel_arg_type")->getOperand(I))->getString();
+    auto BaseTypeName = cast<MDString>(F.getMetadata(
+        "kernel_arg_base_type")->getOperand(I))->getString();
+    StringRef ArgName;
+    if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
+      ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
+    auto TypeQual = cast<MDString>(F.getMetadata(
+        "kernel_arg_type_qual")->getOperand(I))->getString();
+    auto AccQual = cast<MDString>(F.getMetadata(
+        "kernel_arg_access_qual")->getOperand(I))->getString();
+    KernelArg::Kind Kind;
+    if (TypeQual.find("pipe") != StringRef::npos)
+      Kind = KernelArg::Pipe;
+    else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
+      .Case("sampler_t", KernelArg::Sampler)
+      .Case("queue_t",   KernelArg::Queue)
+      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
+             "image2d_t" , "image2d_array_t",  KernelArg::Image)
+      .Cases("image2d_depth_t", "image2d_array_depth_t",
+             "image2d_msaa_t", "image2d_array_msaa_t",
+             "image2d_msaa_depth_t",  KernelArg::Image)
+      .Cases("image2d_array_msaa_depth_t", "image3d_t",
+             KernelArg::Image)
+      .Default(isa<PointerType>(T) ?
+                   (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
+                   KernelArg::DynamicSharedPointer :
+                   KernelArg::GlobalBuffer) :
+                   KernelArg::ByValue);
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
+        BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
+  }
+
+  // Emit hidden kernel arguments for OpenCL kernels.
+  if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
+    auto Int64T = Type::getInt64Ty(F.getContext());
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
+        KernelArg::HiddenGlobalOffsetX));
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
+        KernelArg::HiddenGlobalOffsetY));
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
+        KernelArg::HiddenGlobalOffsetZ));
+    if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
+      auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
+          KernelArg::Global);
+      Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
+          KernelArg::HiddenPrintfBuffer));
+    }
+  }
+
+  // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
+  if (auto RWGS = F.getMetadata("reqd_work_group_size"))
+    Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
+
+  if (auto WGSH = F.getMetadata("work_group_size_hint"))
+    Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
+
+  if (auto VTH = F.getMetadata("vec_type_hint"))
+    Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
+      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
+      VTH->getOperand(1))->getZExtValue());
+
+  return Kernel;
+}
+
+Program::Metadata::Metadata(const std::string &YAML) {
+  yaml::Input Input(YAML);
+  Input >> *this;
+}
+
+std::string Program::Metadata::toYAML(void) {
+  std::string Text;
+  raw_string_ostream Stream(Text);
+  yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
+  Output << *this;
+  return Stream.str();
+}
+
+Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
+  return Program::Metadata(S);
+}
+
+// Check if the YAML string can be parsed.
+static void checkRuntimeMDYAMLString(const std::string &YAML) {
+  auto P = Program::Metadata::fromYAML(YAML);
+  auto S = P.toYAML();
+  llvm::errs() << "AMDGPU runtime metadata parser test "
+               << (YAML == S ? "passes" : "fails") << ".\n";
+  if (YAML != S) {
+    llvm::errs() << "First output: " << YAML << '\n'
+                 << "Second output: " << S << '\n';
+  }
+}
+
+std::string llvm::getRuntimeMDYAMLString(Module &M) {
+  Program::Metadata Prog;
+  Prog.MDVersionSeq.push_back(MDVersion);
+  Prog.MDVersionSeq.push_back(MDRevision);
+
+  // Set PrintfInfo.
+  if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
+    for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
+      auto Node = MD->getOperand(I);
+      if (Node->getNumOperands() > 0)
+        Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
+            ->getString());
+    }
+  }
+
+  // Set Kernels.
+  for (auto &F: M.functions()) {
+    if (!F.getMetadata("kernel_arg_type"))
+      continue;
+    Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
+  }
+
+  auto YAML = Prog.toYAML();
+
+  if (DumpRuntimeMD)
+    llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
+
+  if (CheckRuntimeMDParser)
+    checkRuntimeMDYAMLString(YAML);
+
+  return YAML;
+}

Added: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h?rev=289674&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h (added)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h Wed Dec 14 11:16:52 2016
@@ -0,0 +1,26 @@
+//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares functions for generating runtime metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+// Get runtime metadata as YAML string.
+std::string getRuntimeMDYAMLString(Module &M);
+
+}
+#endif

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Dec 14 11:16:52 2016
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
+#include "AMDGPURuntimeMD.h"
 
 namespace llvm {
 #include "AMDGPUPTNote.h"
@@ -197,305 +198,7 @@ void AMDGPUTargetELFStreamer::EmitAMDGPU
   Symbol->setBinding(ELF::STB_GLOBAL);
 }
 
-void AMDGPUTargetStreamer::emitRuntimeMDIntValue(RuntimeMD::Key K, uint64_t V,
-                                                 unsigned Size) {
-  auto &S = getStreamer();
-  S.EmitIntValue(K, 1);
-  S.EmitIntValue(V, Size);
-}
-
-void AMDGPUTargetStreamer::emitRuntimeMDStringValue(RuntimeMD::Key K,
-                                                    StringRef R) {
-  auto &S = getStreamer();
-  S.EmitIntValue(K, 1);
-  S.EmitIntValue(R.size(), 4);
-  S.EmitBytes(R);
-}
-
-void AMDGPUTargetStreamer::emitRuntimeMDThreeIntValues(RuntimeMD::Key K,
-                                                       MDNode *Node,
-                                                       unsigned Size) {
-  assert(Node->getNumOperands() == 3);
-
-  auto &S = getStreamer();
-  S.EmitIntValue(K, 1);
-  for (const MDOperand &Op : Node->operands()) {
-    const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
-    S.EmitIntValue(CI->getZExtValue(), Size);
-  }
-}
-
-void AMDGPUTargetStreamer::emitStartOfRuntimeMetadata(const Module &M) {
-  emitRuntimeMDIntValue(RuntimeMD::KeyMDVersion,
-                        RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
-  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
-    if (MD->getNumOperands() != 0) {
-      auto Node = MD->getOperand(0);
-      if (Node->getNumOperands() > 1) {
-        emitRuntimeMDIntValue(RuntimeMD::KeyLanguage,
-                              RuntimeMD::OpenCL_C, 1);
-        uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
-                         ->getZExtValue();
-        uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
-                         ->getZExtValue();
-        emitRuntimeMDIntValue(RuntimeMD::KeyLanguageVersion,
-                              Major * 100 + Minor * 10, 2);
-      }
-    }
-  }
-
-  if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
-    for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
-      auto Node = MD->getOperand(I);
-      if (Node->getNumOperands() > 0)
-        emitRuntimeMDStringValue(RuntimeMD::KeyPrintfInfo,
-            cast<MDString>(Node->getOperand(0))->getString());
-    }
-  }
-}
-
-static std::string getOCLTypeName(Type *Ty, bool Signed) {
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:
-    return "half";
-  case Type::FloatTyID:
-    return "float";
-  case Type::DoubleTyID:
-    return "double";
-  case Type::IntegerTyID: {
-    if (!Signed)
-      return (Twine('u') + getOCLTypeName(Ty, true)).str();
-    unsigned BW = Ty->getIntegerBitWidth();
-    switch (BW) {
-    case 8:
-      return "char";
-    case 16:
-      return "short";
-    case 32:
-      return "int";
-    case 64:
-      return "long";
-    default:
-      return (Twine('i') + Twine(BW)).str();
-    }
-  }
-  case Type::VectorTyID: {
-    VectorType *VecTy = cast<VectorType>(Ty);
-    Type *EleTy = VecTy->getElementType();
-    unsigned Size = VecTy->getVectorNumElements();
-    return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
-  }
-  default:
-    return "unknown";
-  }
-}
-
-static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
-  Type *Ty, StringRef TypeName) {
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:
-    return RuntimeMD::KernelArg::F16;
-  case Type::FloatTyID:
-    return RuntimeMD::KernelArg::F32;
-  case Type::DoubleTyID:
-    return RuntimeMD::KernelArg::F64;
-  case Type::IntegerTyID: {
-    bool Signed = !TypeName.startswith("u");
-    switch (Ty->getIntegerBitWidth()) {
-    case 8:
-      return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
-    case 16:
-      return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
-    case 32:
-      return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
-    case 64:
-      return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
-    default:
-      // Runtime does not recognize other integer types. Report as struct type.
-      return RuntimeMD::KernelArg::Struct;
-    }
-  }
-  case Type::VectorTyID:
-    return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
-  case Type::PointerTyID:
-    return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
-  default:
-    return RuntimeMD::KernelArg::Struct;
-  }
-}
-
-static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
-    AMDGPUAS::AddressSpaces A) {
-  switch (A) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
-    return RuntimeMD::KernelArg::Global;
-  case AMDGPUAS::CONSTANT_ADDRESS:
-    return RuntimeMD::KernelArg::Constant;
-  case AMDGPUAS::LOCAL_ADDRESS:
-    return RuntimeMD::KernelArg::Local;
-  case AMDGPUAS::FLAT_ADDRESS:
-    return RuntimeMD::KernelArg::Generic;
-  case AMDGPUAS::REGION_ADDRESS:
-    return RuntimeMD::KernelArg::Region;
-  default:
-    return RuntimeMD::KernelArg::Private;
-  }
-}
-
-void AMDGPUTargetStreamer::emitRuntimeMetadataForKernelArg(const DataLayout &DL,
-    Type *T, RuntimeMD::KernelArg::Kind Kind,
-    StringRef BaseTypeName, StringRef TypeName,
-    StringRef ArgName, StringRef TypeQual, StringRef AccQual) {
-  auto &S = getStreamer();
-
-  // Emit KeyArgBegin.
-  S.EmitIntValue(RuntimeMD::KeyArgBegin, 1);
-
-  // Emit KeyArgSize and KeyArgAlign.
-  emitRuntimeMDIntValue(RuntimeMD::KeyArgSize,
-                        DL.getTypeAllocSize(T), 4);
-  emitRuntimeMDIntValue(RuntimeMD::KeyArgAlign,
-                        DL.getABITypeAlignment(T), 4);
-  if (auto PT = dyn_cast<PointerType>(T)) {
-    auto ET = PT->getElementType();
-    if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
-      emitRuntimeMDIntValue(RuntimeMD::KeyArgPointeeAlign,
-                            DL.getABITypeAlignment(ET), 4);
-  }
-
-  // Emit KeyArgTypeName.
-  if (!TypeName.empty())
-    emitRuntimeMDStringValue(RuntimeMD::KeyArgTypeName, TypeName);
-
-  // Emit KeyArgName.
-  if (!ArgName.empty())
-    emitRuntimeMDStringValue(RuntimeMD::KeyArgName, ArgName);
-
-  // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
-  SmallVector<StringRef, 1> SplitQ;
-  TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
-
-  for (StringRef KeyName : SplitQ) {
-    auto Key = StringSwitch<RuntimeMD::Key>(KeyName)
-      .Case("volatile", RuntimeMD::KeyArgIsVolatile)
-      .Case("restrict", RuntimeMD::KeyArgIsRestrict)
-      .Case("const",    RuntimeMD::KeyArgIsConst)
-      .Case("pipe",     RuntimeMD::KeyArgIsPipe)
-      .Default(RuntimeMD::KeyNull);
-    S.EmitIntValue(Key, 1);
-  }
-
-  // Emit KeyArgKind.
-  emitRuntimeMDIntValue(RuntimeMD::KeyArgKind, Kind, 1);
-
-  // Emit KeyArgValueType.
-  emitRuntimeMDIntValue(RuntimeMD::KeyArgValueType,
-                        getRuntimeMDValueType(T, BaseTypeName), 2);
-
-  // Emit KeyArgAccQual.
-  if (!AccQual.empty()) {
-    auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
-      .Case("read_only",  RuntimeMD::KernelArg::ReadOnly)
-      .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
-      .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
-      .Default(RuntimeMD::KernelArg::None);
-    emitRuntimeMDIntValue(RuntimeMD::KeyArgAccQual, AQ, 1);
-  }
-
-  // Emit KeyArgAddrQual.
-  if (auto *PT = dyn_cast<PointerType>(T))
-    emitRuntimeMDIntValue(RuntimeMD::KeyArgAddrQual,
-        getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
-            PT->getAddressSpace())), 1);
-
-  // Emit KeyArgEnd
-  S.EmitIntValue(RuntimeMD::KeyArgEnd, 1);
-}
-
-void AMDGPUTargetStreamer::emitRuntimeMetadata(const Function &F) {
-  if (!F.getMetadata("kernel_arg_type"))
-    return;
-  auto &S = getStreamer();
-  S.EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
-  emitRuntimeMDStringValue(RuntimeMD::KeyKernelName, F.getName());
-
-  const DataLayout &DL = F.getParent()->getDataLayout();
-  for (auto &Arg : F.args()) {
-    unsigned I = Arg.getArgNo();
-    Type *T = Arg.getType();
-    auto TypeName = dyn_cast<MDString>(F.getMetadata(
-        "kernel_arg_type")->getOperand(I))->getString();
-    auto BaseTypeName = cast<MDString>(F.getMetadata(
-        "kernel_arg_base_type")->getOperand(I))->getString();
-    StringRef ArgName;
-    if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
-      ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
-    auto TypeQual = cast<MDString>(F.getMetadata(
-        "kernel_arg_type_qual")->getOperand(I))->getString();
-    auto AccQual = cast<MDString>(F.getMetadata(
-        "kernel_arg_access_qual")->getOperand(I))->getString();
-    RuntimeMD::KernelArg::Kind Kind;
-    if (TypeQual.find("pipe") != StringRef::npos)
-      Kind = RuntimeMD::KernelArg::Pipe;
-    else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName)
-      .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
-      .Case("queue_t",   RuntimeMD::KernelArg::Queue)
-      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
-             "image2d_t" , "image2d_array_t",  RuntimeMD::KernelArg::Image)
-      .Cases("image2d_depth_t", "image2d_array_depth_t",
-             "image2d_msaa_t", "image2d_array_msaa_t",
-             "image2d_msaa_depth_t",  RuntimeMD::KernelArg::Image)
-      .Cases("image2d_array_msaa_depth_t", "image3d_t",
-             RuntimeMD::KernelArg::Image)
-      .Default(isa<PointerType>(T) ?
-                   (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
-                   RuntimeMD::KernelArg::DynamicSharedPointer :
-                   RuntimeMD::KernelArg::GlobalBuffer) :
-                   RuntimeMD::KernelArg::ByValue);
-    emitRuntimeMetadataForKernelArg(DL, T,
-        Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual);
-  }
-
-  // Emit hidden kernel arguments for OpenCL kernels.
-  if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
-    auto Int64T = Type::getInt64Ty(F.getContext());
-    emitRuntimeMetadataForKernelArg(DL, Int64T,
-                                    RuntimeMD::KernelArg::HiddenGlobalOffsetX);
-    emitRuntimeMetadataForKernelArg(DL, Int64T,
-                                    RuntimeMD::KernelArg::HiddenGlobalOffsetY);
-    emitRuntimeMetadataForKernelArg(DL, Int64T,
-                                    RuntimeMD::KernelArg::HiddenGlobalOffsetZ);
-    if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
-      auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
-          RuntimeMD::KernelArg::Global);
-      emitRuntimeMetadataForKernelArg(DL, Int8PtrT,
-                                      RuntimeMD::KernelArg::HiddenPrintfBuffer);
-    }
-  }
-
-  // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
-  if (auto RWGS = F.getMetadata("reqd_work_group_size")) {
-    emitRuntimeMDThreeIntValues(RuntimeMD::KeyReqdWorkGroupSize,
-                                RWGS, 4);
-  }
-
-  if (auto WGSH = F.getMetadata("work_group_size_hint")) {
-    emitRuntimeMDThreeIntValues(RuntimeMD::KeyWorkGroupSizeHint,
-                                WGSH, 4);
-  }
-
-  if (auto VTH = F.getMetadata("vec_type_hint")) {
-    auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
-      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
-      VTH->getOperand(1))->getZExtValue());
-    emitRuntimeMDStringValue(RuntimeMD::KeyVecTypeHint, TypeName);
-  }
-
-  // Emit KeyKernelEnd
-  S.EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
-}
-
-void AMDGPUTargetStreamer::emitRuntimeMetadataAsNoteElement(Module &M) {
+void AMDGPUTargetELFStreamer::emitRuntimeMetadata(Module &M) {
   auto &S = getStreamer();
   auto &Context = S.getContext();
 
@@ -520,17 +223,10 @@ void AMDGPUTargetStreamer::emitRuntimeMe
   S.EmitValue(DescSZ, 4);                                     // descz
   S.EmitIntValue(PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA, 4); // type
   S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ));          // name
-  S.EmitValueToAlignment(4);                                  // padding 0
+  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
   S.EmitLabel(DescBegin);
-  emitRuntimeMetadata(M);                                     // desc
+  S.EmitBytes(getRuntimeMDYAMLString(M));                               // desc
   S.EmitLabel(DescEnd);
-  S.EmitValueToAlignment(4);                                  // padding 0
+  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
   S.PopSection();
 }
-
-void AMDGPUTargetStreamer::emitRuntimeMetadata(Module &M) {
-  emitStartOfRuntimeMetadata(M);
-  for (auto &F : M.functions())
-    emitRuntimeMetadata(F);
-}
-

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Dec 14 11:16:52 2016
@@ -43,35 +43,7 @@ public:
 
   virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
 
-  /// Emit runtime metadata as a note element.
-  void emitRuntimeMetadataAsNoteElement(Module &M);
-
-private:
-  void emitRuntimeMetadata(Module &M);
-  void emitStartOfRuntimeMetadata(const Module &M);
-
-  /// Emit runtime metadata for a kernel function.
-  void emitRuntimeMetadata(const Function &F);
-
-  // Emit runtime metadata for a kernel argument.
-  void emitRuntimeMetadataForKernelArg(const DataLayout &DL,
-      Type *T, AMDGPU::RuntimeMD::KernelArg::Kind Kind,
-      StringRef BaseTypeName = "", StringRef TypeName = "",
-      StringRef ArgName = "", StringRef TypeQual = "",
-      StringRef AccQual = "");
-
-  /// Emit a key and an integer value for runtime metadata.
-  void emitRuntimeMDIntValue(AMDGPU::RuntimeMD::Key K,
-      uint64_t V, unsigned Size);
-
-  /// Emit a key and a string value for runtime metadata.
-  void emitRuntimeMDStringValue(AMDGPU::RuntimeMD::Key K,
-      StringRef S);
-
-  /// Emit a key and three integer values for runtime metadata.
-  /// The three integer values are obtained from MDNode \p Node;
-  void emitRuntimeMDThreeIntValues(AMDGPU::RuntimeMD::Key K, MDNode *Node,
-                                   unsigned Size);
+  virtual void emitRuntimeMetadata(Module &M) = 0;
 };
 
 class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@@ -92,6 +64,8 @@ public:
   void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
+
+  void emitRuntimeMetadata(Module &M) override {}
 };
 
 class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
@@ -116,6 +90,8 @@ public:
   void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
+
+  void emitRuntimeMetadata(Module &M) override;
 };
 
 }

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt Wed Dec 14 11:16:52 2016
@@ -6,6 +6,7 @@ add_llvm_library(LLVMAMDGPUDesc
   AMDGPUMCCodeEmitter.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUMCAsmInfo.cpp
+  AMDGPURuntimeMD.cpp
   AMDGPUTargetStreamer.cpp
   R600MCCodeEmitter.cpp
   SIMCCodeEmitter.cpp

Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll Wed Dec 14 11:16:52 2016
@@ -1,10 +1,6 @@
-; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
 ; check llc does not crash for invalid opencl version metadata
 
-; CHECK: .section        .note,#alloc
-; CHECK-NEXT: .long   4
-; CHECK-NEXT: .long   {{.+}}
-; CHECK-NEXT: .long   7
-; CHECK-NEXT: .asciz  "AMD"
+; CHECK: { amd.MDVersion: [ 2, 0 ] }
 
 !opencl.ocl.version = !{}

Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll Wed Dec 14 11:16:52 2016
@@ -1,11 +1,7 @@
-; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
 ; check llc does not crash for invalid opencl version metadata
 
-; CHECK: .section        .note,#alloc
-; CHECK-NEXT: .long   4
-; CHECK-NEXT: .long   {{.+}}
-; CHECK-NEXT: .long   7
-; CHECK-NEXT: .asciz  "AMD"
+; CHECK: { amd.MDVersion: [ 2, 0 ] }
 
 !opencl.ocl.version = !{!0}
 !0 = !{}

Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll Wed Dec 14 11:16:52 2016
@@ -1,11 +1,7 @@
-; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
 ; check llc does not crash for invalid opencl version metadata
 
-; CHECK: .section        .note,#alloc
-; CHECK-NEXT: .long   4
-; CHECK-NEXT: .long   {{.+}}
-; CHECK-NEXT: .long   7
-; CHECK-NEXT: .asciz  "AMD"
+; CHECK: { amd.MDVersion: [ 2, 0 ] }
 
 !opencl.ocl.version = !{!0}
 !0 = !{i32 1}

Modified: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Wed Dec 14 11:16:52 2016
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s
 
 %struct.A = type { i8, float }
 %opencl.image1d_t = type opaque
@@ -9,2414 +10,336 @@
 %struct.B = type { i32 addrspace(1)*}
 %opencl.clk_event_t = type opaque
 
-; CHECK: .section        .note,#alloc
-; CHECK-NEXT: .long   4
-; CHECK-NEXT: .long   [[Ltmp1:.+]]-[[Ltmp0:.+]]
-; CHECK-NEXT: .long   7
-; CHECK-NEXT: .asciz  "AMD"
-; CHECK-NEXT: .p2align        2
-; CHECK-NEXT: [[Ltmp0]]:
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .short	256
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .short	200
-; CHECK-NEXT: .byte	30
-; CHECK-NEXT: .long	10
-; CHECK-NEXT: .ascii	"1:1:4:%d\\n"
-; CHECK-NEXT: .byte	30
-; CHECK-NEXT: .long	10
-; CHECK-NEXT: .ascii	"2:1:8:%g\\n"
-
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"test_char"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	1
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	1
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .ascii	"char"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
+; CHECK: ---
+; CHECK-NEXT: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: 
 
+; CHECK-NEXT:   - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	12
-; CHECK-NEXT: .ascii	"test_ushort2"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"ushort2"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	4
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"test_int3"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .ascii	"int3"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_int3, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	11
-; CHECK-NEXT: .ascii	"test_ulong4"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	32
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	32
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	6
-; CHECK-NEXT: .ascii	"ulong4"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	10
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_ulong4, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 32, amd.ArgAlign: 32, amd.ArgKind: 0, amd.ArgValueType: 10, amd.ArgTypeName: ulong4, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	10
-; CHECK-NEXT: .ascii	"test_half8"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"half8"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	5
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_half8, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 5, amd.ArgTypeName: half8, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	12
-; CHECK-NEXT: .ascii	"test_float16"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	64
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	64
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"float16"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	8
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_float16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 64, amd.ArgAlign: 64, amd.ArgKind: 0, amd.ArgValueType: 8, amd.ArgTypeName: float16, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	13
-; CHECK-NEXT: .ascii	"test_double16"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	128
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	128
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .ascii	"double16"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	11
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_double16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 128, amd.ArgAlign: 128, amd.ArgKind: 0, amd.ArgValueType: 11, amd.ArgTypeName: double16, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	12
-; CHECK-NEXT: .ascii	"test_pointer"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_pointer, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	10
-; CHECK-NEXT: .ascii	"test_image"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"image2d_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_image, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	12
-; CHECK-NEXT: .ascii	"test_sampler"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"sampler_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_sampler, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 3, amd.ArgValueType: 6, amd.ArgTypeName: sampler_t, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	10
-; CHECK-NEXT: .ascii	"test_queue"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"queue_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_queue, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 6, amd.ArgValueType: 0, amd.ArgTypeName: queue_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	11
-; CHECK-NEXT: .ascii	"test_struct"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .ascii	"struct A"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_struct, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct A, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"test_i128"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .ascii	"i128"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_i128, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 8, amd.ArgKind: 0, amd.ArgValueType: 0, amd.ArgTypeName: i128, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	14
-; CHECK-NEXT: .ascii	"test_multi_arg"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	6
-; CHECK-NEXT: .ascii	"short2"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	3
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"char3"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_multi_arg, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 3, amd.ArgTypeName: short2, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	15
-; CHECK-NEXT: .ascii	"test_addr_space"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_addr_space, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 2, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	14
-; CHECK-NEXT: .ascii	"test_type_qual"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	19
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	17
-; CHECK-NEXT: .byte	18
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
-; CHECK-NEXT: .byte	20
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	5
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_type_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsVolatile: 1 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsConst: 1, amd.ArgIsRestrict: 1 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 5, amd.ArgValueType: 0, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsPipe: 1 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .ascii	"test_access_qual"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"image1d_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"image2d_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	9
-; CHECK-NEXT: .ascii	"image3d_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_access_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image1d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 1 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 2 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image3d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 3 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	23
-; CHECK-NEXT: .ascii	"test_vec_type_hint_half"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .ascii	"half"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_half, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: half, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	24
-; CHECK-NEXT: .ascii	"test_vec_type_hint_float"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"float"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_float, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: float, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	25
-; CHECK-NEXT: .ascii	"test_vec_type_hint_double"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	6
-; CHECK-NEXT: .ascii	"double"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_double, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: double, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	23
-; CHECK-NEXT: .ascii	"test_vec_type_hint_char"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .ascii	"char"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: char, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	24
-; CHECK-NEXT: .ascii	"test_vec_type_hint_short"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"short"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_short, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: short, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	23
-; CHECK-NEXT: .ascii	"test_vec_type_hint_long"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .ascii	"long"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_long, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: long, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	26
-; CHECK-NEXT: .ascii	"test_vec_type_hint_unknown"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"unknown"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_unknown, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: unknown, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	27
-; CHECK-NEXT: .ascii	"test_reqd_wgs_vec_type_hint"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	21
-; CHECK-NEXT: .long	1
-; CHECK-NEXT: .long	2
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_reqd_wgs_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.ReqdWorkGroupSize: [ 1, 2, 4 ], amd.VecTypeHint: int, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	27
-; CHECK-NEXT: .ascii	"test_wgs_hint_vec_type_hint"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	22
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .long	32
-; CHECK-NEXT: .byte	23
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"uint4"
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_wgs_hint_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.WorkGroupSizeHint: [ 8, 16, 32 ], amd.VecTypeHint: uint4, amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	19
-; CHECK-NEXT: .ascii	"test_arg_ptr_to_ptr"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	6
-; CHECK-NEXT: .ascii	"int **"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_arg_ptr_to_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int **', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
   ret void
 }
-
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	28
-; CHECK-NEXT: .ascii	"test_arg_struct_contains_ptr"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .ascii	"struct B"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_arg_struct_contains_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct B, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	22
-; CHECK-NEXT: .ascii	"test_arg_vector_of_ptr"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	47
-; CHECK-NEXT: .ascii	"global int* __attribute__((ext_vector_type(2)))"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_arg_vector_of_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: 'global int* __attribute__((ext_vector_type(2)))', amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	29
-; CHECK-NEXT: .ascii	"test_arg_unknown_builtin_type"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	11
-; CHECK-NEXT: .ascii	"clk_event_t"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	0
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
 
+; CHECK-NEXT:   - { amd.KernelName: test_arg_unknown_builtin_type, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: clk_event_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
 define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK-NEXT: .byte	4
-; CHECK-NEXT: .byte	6
-; CHECK-NEXT: .long	18
-; CHECK-NEXT: .ascii	"test_pointee_align"
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	6
-; CHECK-NEXT: .ascii	"long *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	1
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	6
-; CHECK-NEXT: .ascii	"char *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	2
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"char2 *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"char3 *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"char4 *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	7
-; CHECK-NEXT: .ascii	"char8 *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	16
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .ascii	"char16 *"
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	9
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	7
-; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	1
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
-; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT:   - { amd.KernelName: test_pointee_align, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 9, amd.ArgTypeName: 'long *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 1, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 2, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char2 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char3 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char4 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 8, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char8 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 16, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char16 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
+; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }
 define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
   ret void
 }
-; CHECK-NEXT: [[Ltmp1]]:
-; CHECK-NEXT: .p2align        2
+
+; CHECK-NEXT:...
+
+; PARSER: AMDGPU runtime metadata parser test passes.
 
 !llvm.printf.fmts = !{!100, !101}
 

Modified: llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/ELFDumper.cpp?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/ELFDumper.cpp (original)
+++ llvm/trunk/tools/llvm-readobj/ELFDumper.cpp Wed Dec 14 11:16:52 2016
@@ -129,6 +129,8 @@ public:
   void printMipsReginfo() override;
   void printMipsOptions() override;
 
+  void printAMDGPURuntimeMD() override;
+
   void printStackMap() const override;
 
   void printHashHistogram() override;
@@ -2339,6 +2341,36 @@ template <class ELFT> void ELFDumper<ELF
   }
 }
 
+template <class ELFT> void ELFDumper<ELFT>::printAMDGPURuntimeMD() {
+  const Elf_Shdr *Shdr = findSectionByName(*Obj, ".note");
+  if (!Shdr) {
+    W.startLine() << "There is no .note section in the file.\n";
+    return;
+  }
+  ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
+
+  const uint32_t RuntimeMDNoteType = 7;
+  for (auto I = reinterpret_cast<const uint32_t *>(&Sec[0]),
+       E = I + Sec.size()/4; I != E;) {
+    uint32_t NameSZ = I[0];
+    uint32_t DescSZ = I[1];
+    uint32_t Type = I[2];
+    I += 3;
+
+    StringRef Name;
+    if (NameSZ) {
+      Name = StringRef(reinterpret_cast<const char *>(I), NameSZ - 1);
+      I += alignTo<4>(NameSZ)/4;
+    }
+
+    if (Name == "AMD" && Type == RuntimeMDNoteType) {
+      StringRef Desc(reinterpret_cast<const char *>(I), DescSZ);
+      W.printString(Desc);
+    }
+    I += alignTo<4>(DescSZ)/4;
+  }
+}
+
 template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
   const Elf_Shdr *StackMapSection = nullptr;
   for (const auto &Sec : unwrapOrError(Obj->sections())) {

Modified: llvm/trunk/tools/llvm-readobj/ObjDumper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/ObjDumper.h?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/ObjDumper.h (original)
+++ llvm/trunk/tools/llvm-readobj/ObjDumper.h Wed Dec 14 11:16:52 2016
@@ -58,6 +58,9 @@ public:
   virtual void printMipsReginfo() { }
   virtual void printMipsOptions() { }
 
+  // Only implemented for AMDGPU ELF at this time.
+  virtual void printAMDGPURuntimeMD() {}
+
   // Only implemented for PE/COFF.
   virtual void printCOFFImports() { }
   virtual void printCOFFExports() { }

Modified: llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp?rev=289674&r1=289673&r2=289674&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp (original)
+++ llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp Wed Dec 14 11:16:52 2016
@@ -186,6 +186,10 @@ namespace opts {
   cl::opt<bool> MipsOptions("mips-options",
                             cl::desc("Display the MIPS .MIPS.options section"));
 
+  // -amdgpu-runtime-metadata
+  cl::opt<bool> AMDGPURuntimeMD("amdgpu-runtime-metadata",
+                                cl::desc("Display AMDGPU runtime metadata"));
+
   // -coff-imports
   cl::opt<bool>
   COFFImports("coff-imports", cl::desc("Display the PE/COFF import table"));
@@ -415,6 +419,9 @@ static void dumpObject(const ObjectFile
       if (opts::MipsOptions)
         Dumper->printMipsOptions();
     }
+    if (Obj->getArch() == llvm::Triple::amdgcn)
+      if (opts::AMDGPURuntimeMD)
+        Dumper->printAMDGPURuntimeMD();
     if (opts::SectionGroups)
       Dumper->printGroupSections();
     if (opts::HashHistogram)




More information about the llvm-commits mailing list