[llvm] r298552 - [AMDGPU] Restructure code object metadata creation

Konstantin Zhuravlyov via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 22 15:32:23 PDT 2017


Author: kzhuravl
Date: Wed Mar 22 17:32:22 2017
New Revision: 298552

URL: http://llvm.org/viewvc/llvm-project?rev=298552&view=rev
Log:
[AMDGPU] Restructure code object metadata creation
  - Rename runtime metadata -> code object metadata
  - Make metadata not flow
  - Switch enums to use ScalarEnumerationTraits
  - Cleanup and move AMDGPUCodeObjectMetadata.h to AMDGPU/MCTargetDesc
  - Introduce in-memory representation for attributes
  - Code object metadata streamer
  - Create metadata for isa and printf during EmitStartOfAsmFile
  - Create metadata for kernel during EmitFunctionBodyStart
  - Finalize and emit metadata to .note during EmitEndOfAsmFile
  - Other minor improvements/bug fixes

Differential Revision: https://reviews.llvm.org/D29948


Added:
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
    llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
    llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll
    llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll
    llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll
    llvm/trunk/test/MC/AMDGPU/code-object-metadata-isa.s
    llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-args.s
    llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
    llvm/trunk/test/MC/AMDGPU/code-object-metadata-unknown-key.s
Removed:
    llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
    llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
    llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s
    llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s
    llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
    llvm/trunk/test/MC/AMDGPU/hsa.s
    llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
    llvm/trunk/tools/llvm-readobj/ObjDumper.h
    llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Mar 22 17:32:22 2017
@@ -99,23 +99,33 @@ StringRef AMDGPUAsmPrinter::getPassName(
   return "AMDGPU Assembly Printer";
 }
 
+const MCSubtargetInfo* AMDGPUAsmPrinter::getSTI() const {
+  return TM.getMCSubtargetInfo();
+}
+
+AMDGPUTargetStreamer& AMDGPUAsmPrinter::getTargetStreamer() const {
+  return static_cast<AMDGPUTargetStreamer&>(*OutStreamer->getTargetStreamer());
+}
+
 void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
     return;
 
-  AMDGPUTargetStreamer *TS =
-      static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+  AMDGPU::IsaInfo::IsaVersion ISA =
+      AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
 
-  TS->EmitDirectiveHSACodeObjectVersion(2, 1);
+  getTargetStreamer().EmitDirectiveHSACodeObjectVersion(2, 1);
+  getTargetStreamer().EmitDirectiveHSACodeObjectISA(
+      ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+  getTargetStreamer().EmitStartOfCodeObjectMetadata(
+      getSTI()->getFeatureBits(), M);
+}
 
-  const MCSubtargetInfo *STI = TM.getMCSubtargetInfo();
-  AMDGPU::IsaInfo::IsaVersion ISA =
-      AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits());
-  TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
-                                    "AMD", "AMDGPU");
+void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+    return;
 
-  // Emit runtime metadata.
-  TS->EmitRuntimeMetadata(STI->getFeatureBits(), M);
+  getTargetStreamer().EmitEndOfCodeObjectMetadata(getSTI()->getFeatureBits());
 }
 
 bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -132,7 +142,6 @@ bool AMDGPUAsmPrinter::isBlockOnlyReacha
   return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
 }
 
-
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
@@ -140,17 +149,20 @@ void AMDGPUAsmPrinter::EmitFunctionBodyS
     getSIProgramInfo(KernelInfo, *MF);
     EmitAmdKernelCodeT(*MF, KernelInfo);
   }
+
+  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+    return;
+  getTargetStreamer().EmitKernelCodeObjectMetadata(*MF->getFunction());
 }
 
 void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
-    AMDGPUTargetStreamer *TS =
-        static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
     SmallString<128> SymbolName;
     getNameWithPrefix(SymbolName, MF->getFunction()),
-    TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
+    getTargetStreamer().EmitAMDGPUSymbolType(
+        SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
   }
 
   AsmPrinter::EmitFunctionEntryLabel();
@@ -806,11 +818,8 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
       KernelInfo.DebuggerPrivateSegmentBufferSGPR;
   }
 
-  AMDGPUTargetStreamer *TS =
-      static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
-
   OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
-  TS->EmitAMDKernelCodeT(header);
+  getTargetStreamer().EmitAMDKernelCodeT(header);
 }
 
 bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Wed Mar 22 17:32:22 2017
@@ -26,6 +26,7 @@
 
 namespace llvm {
 
+class AMDGPUTargetStreamer;
 class MCOperand;
 
 class AMDGPUAsmPrinter final : public AsmPrinter {
@@ -103,10 +104,14 @@ public:
   explicit AMDGPUAsmPrinter(TargetMachine &TM,
                             std::unique_ptr<MCStreamer> Streamer);
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
   StringRef getPassName() const override;
 
+  const MCSubtargetInfo* getSTI() const;
+
+  AMDGPUTargetStreamer& getTargetStreamer() const;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
   /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
   /// pseudo lowering.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
@@ -132,6 +137,8 @@ public:
 
   void EmitStartOfAsmFile(Module &M) override;
 
+  void EmitEndOfAsmFile(Module &M) override;
+
   bool isBlockOnlyReachableByFallthrough(
     const MachineBasicBlock *MBB) const override;
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPTNote.h Wed Mar 22 17:32:22 2017
@@ -33,9 +33,7 @@ enum NoteType{
     NT_AMDGPU_HSA_PRODUCER = 4,
     NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
     NT_AMDGPU_HSA_EXTENSION = 6,
-    NT_AMDGPU_HSA_RUNTIME_METADATA_V_1 = 7, // deprecated since 12/14/16.
-    NT_AMDGPU_HSA_RUNTIME_METADATA_V_2 = 8,
-    NT_AMDGPU_HSA_RUNTIME_METADATA = NT_AMDGPU_HSA_RUNTIME_METADATA_V_2,
+    NT_AMDGPU_HSA_CODE_OBJECT_METADATA = 10,
     NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
     NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
 };

Removed: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=298551&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (removed)
@@ -1,290 +0,0 @@
-//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Enums and structure types used by runtime metadata.
-///
-/// Runtime requests certain information (metadata) about kernels to be able
-/// to execute the kernels and answer the queries about the kernels.
-/// The metadata is represented as a note element in the .note ELF section of a
-/// binary (code object). The desc field of the note element is a YAML string
-/// consisting of key-value pairs. Each key is a string. Each value can be
-/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
-/// At the beginning of the YAML string is the module level YAML map. A
-/// kernel-level YAML map is in the amd.Kernels sequence. A
-/// kernel-argument-level map is in the amd.Args sequence.
-///
-/// The format should be kept backward compatible. New enum values and bit
-/// fields should be appended at the end. It is suggested to bump up the
-/// revision number whenever the format changes and document the change
-/// in the revision in this header.
-///
-//
-//===----------------------------------------------------------------------===//
-//
-#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
-
-#include <cstdint>
-#include <vector>
-#include <string>
-
-namespace AMDGPU {
-namespace RuntimeMD {
-
-  // Version and revision of runtime metadata
-  const uint32_t MDVersion   = 2;
-  const uint32_t MDRevision  = 1;
-
-  // Name of keys for runtime metadata.
-  namespace KeyName {
-
-    // Runtime metadata version
-    const char MDVersion[] = "amd.MDVersion";
-
-    // Instruction set architecture information
-    const char IsaInfo[] = "amd.IsaInfo";
-    // Wavefront size
-    const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
-    // Local memory size in bytes
-    const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
-    // Number of execution units per compute unit
-    const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
-    // Maximum number of waves per execution unit
-    const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
-    // Maximum flat work group size
-    const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
-    // SGPR allocation granularity
-    const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
-    // Total number of SGPRs
-    const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
-    // Addressable number of SGPRs
-    const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
-    // VGPR allocation granularity
-    const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
-    // Total number of VGPRs
-    const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
-    // Addressable number of VGPRs
-    const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
-
-    // Language
-    const char Language[] = "amd.Language";
-    // Language version
-    const char LanguageVersion[] = "amd.LanguageVersion";
-
-    // Kernels
-    const char Kernels[] = "amd.Kernels";
-    // Kernel name
-    const char KernelName[] = "amd.KernelName";
-    // Kernel arguments
-    const char Args[] = "amd.Args";
-    // Kernel argument size in bytes
-    const char ArgSize[] = "amd.ArgSize";
-    // Kernel argument alignment
-    const char ArgAlign[] = "amd.ArgAlign";
-    // Kernel argument type name
-    const char ArgTypeName[] = "amd.ArgTypeName";
-    // Kernel argument name
-    const char ArgName[] = "amd.ArgName";
-    // Kernel argument kind
-    const char ArgKind[] = "amd.ArgKind";
-    // Kernel argument value type
-    const char ArgValueType[] = "amd.ArgValueType";
-    // Kernel argument address qualifier
-    const char ArgAddrQual[] = "amd.ArgAddrQual";
-    // Kernel argument access qualifier
-    const char ArgAccQual[] = "amd.ArgAccQual";
-    // Kernel argument is const qualified
-    const char ArgIsConst[] = "amd.ArgIsConst";
-    // Kernel argument is restrict qualified
-    const char ArgIsRestrict[] = "amd.ArgIsRestrict";
-    // Kernel argument is volatile qualified
-    const char ArgIsVolatile[] = "amd.ArgIsVolatile";
-    // Kernel argument is pipe qualified
-    const char ArgIsPipe[] = "amd.ArgIsPipe";
-    // Required work group size
-    const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
-    // Work group size hint
-    const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
-    // Vector type hint
-    const char VecTypeHint[] = "amd.VecTypeHint";
-    // Kernel index for device enqueue
-    const char KernelIndex[] = "amd.KernelIndex";
-    // No partial work groups
-    const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
-    // Prinf function call information
-    const char PrintfInfo[] = "amd.PrintfInfo";
-    // The actual kernel argument access qualifier
-    const char ArgActualAcc[] = "amd.ArgActualAcc";
-    // Alignment of pointee type
-    const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
-
-  } // end namespace KeyName
-
-  namespace KernelArg {
-
-    enum Kind : uint8_t {
-      ByValue                 = 0,
-      GlobalBuffer            = 1,
-      DynamicSharedPointer    = 2,
-      Sampler                 = 3,
-      Image                   = 4,
-      Pipe                    = 5,
-      Queue                   = 6,
-      HiddenGlobalOffsetX     = 7,
-      HiddenGlobalOffsetY     = 8,
-      HiddenGlobalOffsetZ     = 9,
-      HiddenNone              = 10,
-      HiddenPrintfBuffer      = 11,
-      HiddenDefaultQueue      = 12,
-      HiddenCompletionAction  = 13,
-    };
-
-    enum ValueType : uint16_t {
-      Struct  = 0,
-      I8      = 1,
-      U8      = 2,
-      I16     = 3,
-      U16     = 4,
-      F16     = 5,
-      I32     = 6,
-      U32     = 7,
-      F32     = 8,
-      I64     = 9,
-      U64     = 10,
-      F64     = 11,
-    };
-
-    // Avoid using 'None' since it conflicts with a macro in X11 header file.
-    enum AccessQualifer : uint8_t {
-      AccNone    = 0,
-      ReadOnly   = 1,
-      WriteOnly  = 2,
-      ReadWrite  = 3,
-    };
-
-    enum AddressSpaceQualifer : uint8_t {
-      Private    = 0,
-      Global     = 1,
-      Constant   = 2,
-      Local      = 3,
-      Generic    = 4,
-      Region     = 5,
-    };
-
-  } // end namespace KernelArg
-
-  // Invalid values are used to indicate an optional key should not be emitted.
-  const uint8_t INVALID_ADDR_QUAL     = 0xff;
-  const uint8_t INVALID_ACC_QUAL      = 0xff;
-  const uint32_t INVALID_KERNEL_INDEX = ~0U;
-
-  namespace KernelArg {
-
-    // In-memory representation of kernel argument information.
-    struct Metadata {
-      uint32_t Size = 0;
-      uint32_t Align = 0;
-      uint32_t PointeeAlign = 0;
-      uint8_t Kind = 0;
-      uint16_t ValueType = 0;
-      std::string TypeName;
-      std::string Name;
-      uint8_t AddrQual = INVALID_ADDR_QUAL;
-      uint8_t AccQual = INVALID_ACC_QUAL;
-      uint8_t IsVolatile = 0;
-      uint8_t IsConst = 0;
-      uint8_t IsRestrict = 0;
-      uint8_t IsPipe = 0;
-
-      Metadata() = default;
-    };
-
-  } // end namespace KernelArg
-
-  namespace Kernel {
-
-    // In-memory representation of kernel information.
-    struct Metadata {
-      std::string Name;
-      std::string Language;
-      std::vector<uint32_t> LanguageVersion;
-      std::vector<uint32_t> ReqdWorkGroupSize;
-      std::vector<uint32_t> WorkGroupSizeHint;
-      std::string VecTypeHint;
-      uint32_t KernelIndex = INVALID_KERNEL_INDEX;
-      uint8_t NoPartialWorkGroups = 0;
-      std::vector<KernelArg::Metadata> Args;
-
-      Metadata() = default;
-    };
-
-  } // end namespace Kernel
-
-  namespace IsaInfo {
-
-    /// \brief In-memory representation of instruction set architecture
-    /// information.
-    struct Metadata {
-      /// \brief Wavefront size.
-      unsigned WavefrontSize = 0;
-      /// \brief Local memory size in bytes.
-      unsigned LocalMemorySize = 0;
-      /// \brief Number of execution units per compute unit.
-      unsigned EUsPerCU = 0;
-      /// \brief Maximum number of waves per execution unit.
-      unsigned MaxWavesPerEU = 0;
-      /// \brief Maximum flat work group size.
-      unsigned MaxFlatWorkGroupSize = 0;
-      /// \brief SGPR allocation granularity.
-      unsigned SGPRAllocGranule = 0;
-      /// \brief Total number of SGPRs.
-      unsigned TotalNumSGPRs = 0;
-      /// \brief Addressable number of SGPRs.
-      unsigned AddressableNumSGPRs = 0;
-      /// \brief VGPR allocation granularity.
-      unsigned VGPRAllocGranule = 0;
-      /// \brief Total number of VGPRs.
-      unsigned TotalNumVGPRs = 0;
-      /// \brief Addressable number of VGPRs.
-      unsigned AddressableNumVGPRs = 0;
-
-      Metadata() = default;
-    };
-
-  } // end namespace IsaInfo
-
-  namespace Program {
-
-    // In-memory representation of program information.
-    struct Metadata {
-      std::vector<uint32_t> MDVersionSeq;
-      IsaInfo::Metadata IsaInfo;
-      std::vector<std::string> PrintfInfo;
-      std::vector<Kernel::Metadata> Kernels;
-
-      explicit Metadata() = default;
-
-      // Construct from an YAML string.
-      explicit Metadata(const std::string &YAML);
-
-      // Convert to YAML string.
-      std::string toYAML();
-
-      // Convert from YAML string.
-      static Metadata fromYAML(const std::string &S);
-    };
-
-  } //end namespace Program
-
-} // end namespace RuntimeMD
-} // end namespace AMDGPU
-
-#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Mar 22 17:32:22 2017
@@ -806,7 +806,7 @@ private:
   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
   bool ParseDirectiveHSACodeObjectVersion();
   bool ParseDirectiveHSACodeObjectISA();
-  bool ParseDirectiveRuntimeMetadata();
+  bool ParseDirectiveCodeObjectMetadata();
   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
   bool ParseDirectiveAMDKernelCodeT();
   bool ParseSectionDirectiveHSAText();
@@ -2259,43 +2259,45 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
   return false;
 }
 
-bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
-  std::string Metadata;
-  raw_string_ostream MS(Metadata);
+bool AMDGPUAsmParser::ParseDirectiveCodeObjectMetadata() {
+  std::string YamlString;
+  raw_string_ostream YamlStream(YamlString);
 
   getLexer().setSkipSpace(false);
 
   bool FoundEnd = false;
   while (!getLexer().is(AsmToken::Eof)) {
     while (getLexer().is(AsmToken::Space)) {
-      MS << ' ';
+      YamlStream << getLexer().getTok().getString();
       Lex();
     }
 
     if (getLexer().is(AsmToken::Identifier)) {
       StringRef ID = getLexer().getTok().getIdentifier();
-      if (ID == ".end_amdgpu_runtime_metadata") {
+      if (ID == AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd) {
         Lex();
         FoundEnd = true;
         break;
       }
     }
 
-    MS << Parser.parseStringToEndOfStatement()
-       << getContext().getAsmInfo()->getSeparatorString();
+    YamlStream << Parser.parseStringToEndOfStatement()
+               << getContext().getAsmInfo()->getSeparatorString();
 
     Parser.eatToEndOfStatement();
   }
 
   getLexer().setSkipSpace(true);
 
-  if (getLexer().is(AsmToken::Eof) && !FoundEnd)
-    return TokError("expected directive .end_amdgpu_runtime_metadata not found");
+  if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
+    return TokError(
+        "expected directive .end_amdgpu_code_object_metadata not found");
+  }
 
-  MS.flush();
+  YamlStream.flush();
 
-  if (getTargetStreamer().EmitRuntimeMetadata(getFeatureBits(), Metadata))
-    return Error(getParser().getTok().getLoc(), "invalid runtime metadata");
+  if (!getTargetStreamer().EmitCodeObjectMetadata(getFeatureBits(), YamlString))
+    return Error(getParser().getTok().getLoc(), "invalid code object metadata");
 
   return false;
 }
@@ -2407,8 +2409,8 @@ bool AMDGPUAsmParser::ParseDirective(Asm
   if (IDVal == ".hsa_code_object_isa")
     return ParseDirectiveHSACodeObjectISA();
 
-  if (IDVal == ".amdgpu_runtime_metadata")
-    return ParseDirectiveRuntimeMetadata();
+  if (IDVal == AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin)
+    return ParseDirectiveCodeObjectMetadata();
 
   if (IDVal == ".amd_kernel_code_t")
     return ParseDirectiveAMDKernelCodeT();

Added: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h?rev=298552&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h (added)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadata.h Wed Mar 22 17:32:22 2017
@@ -0,0 +1,347 @@
+//===--- AMDGPUCodeObjectMetadata.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata definitions and in-memory
+/// representations.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H
+
+#include <cstdint>
+#include <string>
+#include <system_error>
+#include <vector>
+
+namespace llvm {
+namespace AMDGPU {
+
+//===----------------------------------------------------------------------===//
+// Code Object Metadata.
+//===----------------------------------------------------------------------===//
+namespace CodeObject {
+
+/// \brief Code object metadata major version.
+constexpr uint32_t MetadataVersionMajor = 1;
+/// \brief Code object metadata minor version.
+constexpr uint32_t MetadataVersionMinor = 0;
+
+/// \brief Code object metadata beginning assembler directive.
+constexpr char MetadataAssemblerDirectiveBegin[] =
+    ".amdgpu_code_object_metadata";
+/// \brief Code object metadata ending assembler directive.
+constexpr char MetadataAssemblerDirectiveEnd[] =
+    ".end_amdgpu_code_object_metadata";
+
+/// \brief Access qualifiers.
+enum class AccessQualifier : uint8_t {
+  Default   = 0,
+  ReadOnly  = 1,
+  WriteOnly = 2,
+  ReadWrite = 3,
+  Unknown   = 0xff
+};
+
+/// \brief Address space qualifiers.
+enum class AddressSpaceQualifier : uint8_t {
+  Private  = 0,
+  Global   = 1,
+  Constant = 2,
+  Local    = 3,
+  Generic  = 4,
+  Region   = 5,
+  Unknown  = 0xff
+};
+
+/// \brief Value kinds.
+enum class ValueKind : uint8_t {
+  ByValue                = 0,
+  GlobalBuffer           = 1,
+  DynamicSharedPointer   = 2,
+  Sampler                = 3,
+  Image                  = 4,
+  Pipe                   = 5,
+  Queue                  = 6,
+  HiddenGlobalOffsetX    = 7,
+  HiddenGlobalOffsetY    = 8,
+  HiddenGlobalOffsetZ    = 9,
+  HiddenNone             = 10,
+  HiddenPrintfBuffer     = 11,
+  HiddenDefaultQueue     = 12,
+  HiddenCompletionAction = 13,
+  Unknown                = 0xff
+};
+
+/// \brief Value types.
+enum class ValueType : uint8_t {
+  Struct  = 0,
+  I8      = 1,
+  U8      = 2,
+  I16     = 3,
+  U16     = 4,
+  F16     = 5,
+  I32     = 6,
+  U32     = 7,
+  F32     = 8,
+  I64     = 9,
+  U64     = 10,
+  F64     = 11,
+  Unknown = 0xff
+};
+
+//===----------------------------------------------------------------------===//
+// Instruction Set Architecture Metadata (ISA).
+//===----------------------------------------------------------------------===//
+namespace Isa {
+
+namespace Key {
+/// \brief Key for Isa::Metadata::mWavefrontSize.
+constexpr char WavefrontSize[] = "WavefrontSize";
+/// \brief Key for Isa::Metadata::mLocalMemorySize.
+constexpr char LocalMemorySize[] = "LocalMemorySize";
+/// \brief Key for Isa::Metadata::mEUsPerCU.
+constexpr char EUsPerCU[] = "EUsPerCU";
+/// \brief Key for Isa::Metadata::mMaxWavesPerEU.
+constexpr char MaxWavesPerEU[] = "MaxWavesPerEU";
+/// \brief Key for Isa::Metadata::mMaxFlatWorkGroupSize.
+constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
+/// \brief Key for Isa::Metadata::mSGPRAllocGranule.
+constexpr char SGPRAllocGranule[] = "SGPRAllocGranule";
+/// \brief Key for Isa::Metadata::mTotalNumSGPRs.
+constexpr char TotalNumSGPRs[] = "TotalNumSGPRs";
+/// \brief Key for Isa::Metadata::mAddressableNumSGPRs.
+constexpr char AddressableNumSGPRs[] = "AddressableNumSGPRs";
+/// \brief Key for Isa::Metadata::mVGPRAllocGranule.
+constexpr char VGPRAllocGranule[] = "VGPRAllocGranule";
+/// \brief Key for Isa::Metadata::mTotalNumVGPRs.
+constexpr char TotalNumVGPRs[] = "TotalNumVGPRs";
+/// \brief Key for Isa::Metadata::mAddressableNumVGPRs.
+constexpr char AddressableNumVGPRs[] = "AddressableNumVGPRs";
+} // end namespace Key
+
+/// \brief In-memory representation of instruction set architecture metadata.
+struct Metadata final {
+  /// \brief Wavefront size. Required.
+  uint32_t mWavefrontSize = 0;
+  /// \brief Local memory size in bytes. Required.
+  uint32_t mLocalMemorySize = 0;
+  /// \brief Number of execution units per compute unit. Required.
+  uint32_t mEUsPerCU = 0;
+  /// \brief Maximum number of waves per execution unit. Required.
+  uint32_t mMaxWavesPerEU = 0;
+  /// \brief Maximum flat work group size. Required.
+  uint32_t mMaxFlatWorkGroupSize = 0;
+  /// \brief SGPR allocation granularity. Required.
+  uint32_t mSGPRAllocGranule = 0;
+  /// \brief Total number of SGPRs. Required.
+  uint32_t mTotalNumSGPRs = 0;
+  /// \brief Addressable number of SGPRs. Required.
+  uint32_t mAddressableNumSGPRs = 0;
+  /// \brief VGPR allocation granularity. Required.
+  uint32_t mVGPRAllocGranule = 0;
+  /// \brief Total number of VGPRs. Required.
+  uint32_t mTotalNumVGPRs = 0;
+  /// \brief Addressable number of VGPRs. Required.
+  uint32_t mAddressableNumVGPRs = 0;
+
+  /// \brief Default constructor.
+  Metadata() = default;
+};
+
+} // end namespace Isa
+
+//===----------------------------------------------------------------------===//
+// Kernel Metadata.
+//===----------------------------------------------------------------------===//
+namespace Kernel {
+
+//===----------------------------------------------------------------------===//
+// Kernel Attributes Metadata.
+//===----------------------------------------------------------------------===//
+namespace Attrs {
+
+namespace Key {
+/// \brief Key for Kernel::Attr::Metadata::mReqdWorkGroupSize.
+constexpr char ReqdWorkGroupSize[] = "ReqdWorkGroupSize";
+/// \brief Key for Kernel::Attr::Metadata::mWorkGroupSizeHint.
+constexpr char WorkGroupSizeHint[] = "WorkGroupSizeHint";
+/// \brief Key for Kernel::Attr::Metadata::mVecTypeHint.
+constexpr char VecTypeHint[] = "VecTypeHint";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel attributes metadata.
+struct Metadata final {
+  /// \brief 'reqd_work_group_size' attribute. Optional.
+  std::vector<uint32_t> mReqdWorkGroupSize = std::vector<uint32_t>();
+  /// \brief 'work_group_size_hint' attribute. Optional.
+  std::vector<uint32_t> mWorkGroupSizeHint = std::vector<uint32_t>();
+  /// \brief 'vec_type_hint' attribute. Optional.
+  std::string mVecTypeHint = std::string();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+
+  /// \returns True if kernel attributes metadata is empty, false otherwise.
+  bool empty() const {
+    return mReqdWorkGroupSize.empty() &&
+           mWorkGroupSizeHint.empty() &&
+           mVecTypeHint.empty();
+  }
+
+  /// \returns True if kernel attributes metadata is not empty, false otherwise.
+  bool notEmpty() const {
+    return !empty();
+  }
+};
+
+} // end namespace Attrs
+
+//===----------------------------------------------------------------------===//
+// Kernel Argument Metadata.
+//===----------------------------------------------------------------------===//
+namespace Arg {
+
+namespace Key {
+/// \brief Key for Kernel::Arg::Metadata::mSize.
+constexpr char Size[] = "Size";
+/// \brief Key for Kernel::Arg::Metadata::mAlign.
+constexpr char Align[] = "Align";
+/// \brief Key for Kernel::Arg::Metadata::mValueKind.
+constexpr char Kind[] = "Kind";
+/// \brief Key for Kernel::Arg::Metadata::mValueType.
+constexpr char ValueType[] = "ValueType";
+/// \brief Key for Kernel::Arg::Metadata::mPointeeAlign.
+constexpr char PointeeAlign[] = "PointeeAlign";
+/// \brief Key for Kernel::Arg::Metadata::mAccQual.
+constexpr char AccQual[] = "AccQual";
+/// \brief Key for Kernel::Arg::Metadata::mAddrSpaceQual.
+constexpr char AddrSpaceQual[] = "AddrSpaceQual";
+/// \brief Key for Kernel::Arg::Metadata::mIsConst.
+constexpr char IsConst[] = "IsConst";
+/// \brief Key for Kernel::Arg::Metadata::mIsPipe.
+constexpr char IsPipe[] = "IsPipe";
+/// \brief Key for Kernel::Arg::Metadata::mIsRestrict.
+constexpr char IsRestrict[] = "IsRestrict";
+/// \brief Key for Kernel::Arg::Metadata::mIsVolatile.
+constexpr char IsVolatile[] = "IsVolatile";
+/// \brief Key for Kernel::Arg::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Arg::Metadata::mTypeName.
+constexpr char TypeName[] = "TypeName";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel argument metadata.
+struct Metadata final {
+  /// \brief Size in bytes. Required.
+  uint32_t mSize = 0;
+  /// \brief Alignment in bytes. Required.
+  uint32_t mAlign = 0;
+  /// \brief Value kind. Required.
+  ValueKind mValueKind = ValueKind::Unknown;
+  /// \brief Value type. Required.
+  ValueType mValueType = ValueType::Unknown;
+  /// \brief Pointee alignment in bytes. Optional.
+  uint32_t mPointeeAlign = 0;
+  /// \brief Access qualifier. Optional.
+  AccessQualifier mAccQual = AccessQualifier::Unknown;
+  /// \brief Address space qualifier. Optional.
+  AddressSpaceQualifier mAddrSpaceQual = AddressSpaceQualifier::Unknown;
+  /// \brief True if 'const' qualifier is specified. Optional.
+  bool mIsConst = false;
+  /// \brief True if 'pipe' qualifier is specified. Optional.
+  bool mIsPipe = false;
+  /// \brief True if 'restrict' qualifier is specified. Optional.
+  bool mIsRestrict = false;
+  /// \brief True if 'volatile' qualifier is specified. Optional.
+  bool mIsVolatile = false;
+  /// \brief Name. Optional.
+  std::string mName = std::string();
+  /// \brief Type name. Optional.
+  std::string mTypeName = std::string();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+};
+
+} // end namespace Arg
+
+namespace Key {
+/// \brief Key for Kernel::Metadata::mName.
+constexpr char Name[] = "Name";
+/// \brief Key for Kernel::Metadata::mLanguage.
+constexpr char Language[] = "Language";
+/// \brief Key for Kernel::Metadata::mLanguageVersion.
+constexpr char LanguageVersion[] = "LanguageVersion";
+/// \brief Key for Kernel::Metadata::mAttrs.
+constexpr char Attrs[] = "Attrs";
+/// \brief Key for Kernel::Metadata::mArgs.
+constexpr char Args[] = "Args";
+} // end namespace Key
+
+/// \brief In-memory representation of kernel metadata.
+struct Metadata final {
+  /// \brief Name. Required.
+  std::string mName = std::string();
+  /// \brief Language. Optional.
+  std::string mLanguage = std::string();
+  /// \brief Language version. Optional.
+  std::vector<uint32_t> mLanguageVersion = std::vector<uint32_t>();
+  /// \brief Attributes metadata. Optional.
+  Attrs::Metadata mAttrs = Attrs::Metadata();
+  /// \brief Arguments metadata. Optional.
+  std::vector<Arg::Metadata> mArgs = std::vector<Arg::Metadata>();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+};
+
+} // end namespace Kernel
+
+namespace Key {
+/// \brief Key for CodeObject::Metadata::mVersion.
+constexpr char Version[] = "Version";
+/// \brief Key for CodeObject::Metadata::mIsa.
+constexpr char Isa[] = "Isa";
+/// \brief Key for CodeObject::Metadata::mPrintf.
+constexpr char Printf[] = "Printf";
+/// \brief Key for CodeObject::Metadata::mKernels.
+constexpr char Kernels[] = "Kernels";
+} // end namespace Key
+
+/// \brief In-memory representation of code object metadata.
+struct Metadata final {
+  /// \brief Code object metadata version. Required.
+  std::vector<uint32_t> mVersion = std::vector<uint32_t>();
+  /// \brief Instruction set architecture metadata. Optional.
+  Isa::Metadata mIsa = Isa::Metadata();
+  /// \brief Printf metadata. Optional.
+  std::vector<std::string> mPrintf = std::vector<std::string>();
+  /// \brief Kernels metadata. Optional.
+  std::vector<Kernel::Metadata> mKernels = std::vector<Kernel::Metadata>();
+
+  /// \brief Default constructor.
+  Metadata() = default;
+
+  /// \brief Converts \p YamlString to \p CodeObjectMetadata.
+  static std::error_code fromYamlString(std::string YamlString,
+                                        Metadata &CodeObjectMetadata);
+
+  /// \brief Converts \p CodeObjectMetadata to \p YamlString.
+  static std::error_code toYamlString(Metadata CodeObjectMetadata,
+                                      std::string &YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATA_H

Added: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp?rev=298552&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp Wed Mar 22 17:32:22 2017
@@ -0,0 +1,577 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.cpp -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUCodeObjectMetadataStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm::AMDGPU;
+using namespace llvm::AMDGPU::CodeObject;
+using namespace llvm::AMDGPU::IsaInfo;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Arg::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+
+namespace llvm {
+
+static cl::opt<bool> DumpCodeObjectMetadata(
+    "amdgpu-dump-comd",
+    cl::desc("Dump AMDGPU Code Object Metadata"));
+static cl::opt<bool> VerifyCodeObjectMetadata(
+    "amdgpu-verify-comd",
+    cl::desc("Verify AMDGPU Code Object Metadata"));
+
+namespace yaml {
+
+template <>
+struct ScalarEnumerationTraits<AccessQualifier> {
+  static void enumeration(IO &YIO, AccessQualifier &EN) {
+    YIO.enumCase(EN, "Default", AccessQualifier::Default);
+    YIO.enumCase(EN, "ReadOnly", AccessQualifier::ReadOnly);
+    YIO.enumCase(EN, "WriteOnly", AccessQualifier::WriteOnly);
+    YIO.enumCase(EN, "ReadWrite", AccessQualifier::ReadWrite);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<AddressSpaceQualifier> {
+  static void enumeration(IO &YIO, AddressSpaceQualifier &EN) {
+    YIO.enumCase(EN, "Private", AddressSpaceQualifier::Private);
+    YIO.enumCase(EN, "Global", AddressSpaceQualifier::Global);
+    YIO.enumCase(EN, "Constant", AddressSpaceQualifier::Constant);
+    YIO.enumCase(EN, "Local", AddressSpaceQualifier::Local);
+    YIO.enumCase(EN, "Generic", AddressSpaceQualifier::Generic);
+    YIO.enumCase(EN, "Region", AddressSpaceQualifier::Region);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueKind> {
+  static void enumeration(IO &YIO, ValueKind &EN) {
+    YIO.enumCase(EN, "ByValue", ValueKind::ByValue);
+    YIO.enumCase(EN, "GlobalBuffer", ValueKind::GlobalBuffer);
+    YIO.enumCase(EN, "DynamicSharedPointer", ValueKind::DynamicSharedPointer);
+    YIO.enumCase(EN, "Sampler", ValueKind::Sampler);
+    YIO.enumCase(EN, "Image", ValueKind::Image);
+    YIO.enumCase(EN, "Pipe", ValueKind::Pipe);
+    YIO.enumCase(EN, "Queue", ValueKind::Queue);
+    YIO.enumCase(EN, "HiddenGlobalOffsetX", ValueKind::HiddenGlobalOffsetX);
+    YIO.enumCase(EN, "HiddenGlobalOffsetY", ValueKind::HiddenGlobalOffsetY);
+    YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
+    YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
+    YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+    YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
+    YIO.enumCase(EN, "HiddenCompletionAction",
+                 ValueKind::HiddenCompletionAction);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<ValueType> {
+  static void enumeration(IO &YIO, ValueType &EN) {
+    YIO.enumCase(EN, "Struct", ValueType::Struct);
+    YIO.enumCase(EN, "I8", ValueType::I8);
+    YIO.enumCase(EN, "U8", ValueType::U8);
+    YIO.enumCase(EN, "I16", ValueType::I16);
+    YIO.enumCase(EN, "U16", ValueType::U16);
+    YIO.enumCase(EN, "F16", ValueType::F16);
+    YIO.enumCase(EN, "I32", ValueType::I32);
+    YIO.enumCase(EN, "U32", ValueType::U32);
+    YIO.enumCase(EN, "F32", ValueType::F32);
+    YIO.enumCase(EN, "I64", ValueType::I64);
+    YIO.enumCase(EN, "U64", ValueType::U64);
+    YIO.enumCase(EN, "F64", ValueType::F64);
+  }
+};
+
+template <>
+struct MappingTraits<Isa::Metadata> {
+  static void mapping(IO &YIO, Isa::Metadata &MD) {
+    YIO.mapRequired(Isa::Key::WavefrontSize, MD.mWavefrontSize);
+    YIO.mapRequired(Isa::Key::LocalMemorySize, MD.mLocalMemorySize);
+    YIO.mapRequired(Isa::Key::EUsPerCU, MD.mEUsPerCU);
+    YIO.mapRequired(Isa::Key::MaxWavesPerEU, MD.mMaxWavesPerEU);
+    YIO.mapRequired(Isa::Key::MaxFlatWorkGroupSize, MD.mMaxFlatWorkGroupSize);
+    YIO.mapRequired(Isa::Key::SGPRAllocGranule, MD.mSGPRAllocGranule);
+    YIO.mapRequired(Isa::Key::TotalNumSGPRs, MD.mTotalNumSGPRs);
+    YIO.mapRequired(Isa::Key::AddressableNumSGPRs, MD.mAddressableNumSGPRs);
+    YIO.mapRequired(Isa::Key::VGPRAllocGranule, MD.mVGPRAllocGranule);
+    YIO.mapRequired(Isa::Key::TotalNumVGPRs, MD.mTotalNumVGPRs);
+    YIO.mapRequired(Isa::Key::AddressableNumVGPRs, MD.mAddressableNumVGPRs);
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Attrs::Metadata> {
+  static void mapping(IO &YIO, Kernel::Attrs::Metadata &MD) {
+    YIO.mapOptional(Kernel::Attrs::Key::ReqdWorkGroupSize,
+                    MD.mReqdWorkGroupSize, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::Attrs::Key::WorkGroupSizeHint,
+                    MD.mWorkGroupSizeHint, std::vector<uint32_t>());
+    YIO.mapOptional(Kernel::Attrs::Key::VecTypeHint,
+                    MD.mVecTypeHint, std::string());
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Arg::Metadata> {
+  static void mapping(IO &YIO, Kernel::Arg::Metadata &MD) {
+    YIO.mapRequired(Kernel::Arg::Key::Size, MD.mSize);
+    YIO.mapRequired(Kernel::Arg::Key::Align, MD.mAlign);
+    YIO.mapRequired(Kernel::Arg::Key::Kind, MD.mValueKind);
+    YIO.mapRequired(Kernel::Arg::Key::ValueType, MD.mValueType);
+    YIO.mapOptional(Kernel::Arg::Key::PointeeAlign, MD.mPointeeAlign,
+                    uint32_t(0));
+    YIO.mapOptional(Kernel::Arg::Key::AccQual, MD.mAccQual,
+                    AccessQualifier::Unknown);
+    YIO.mapOptional(Kernel::Arg::Key::AddrSpaceQual, MD.mAddrSpaceQual,
+                    AddressSpaceQualifier::Unknown);
+    YIO.mapOptional(Kernel::Arg::Key::IsConst, MD.mIsConst, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsPipe, MD.mIsPipe, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsRestrict, MD.mIsRestrict, false);
+    YIO.mapOptional(Kernel::Arg::Key::IsVolatile, MD.mIsVolatile, false);
+    YIO.mapOptional(Kernel::Arg::Key::Name, MD.mName, std::string());
+    YIO.mapOptional(Kernel::Arg::Key::TypeName, MD.mTypeName, std::string());
+  }
+};
+
+template <>
+struct MappingTraits<Kernel::Metadata> {
+  static void mapping(IO &YIO, Kernel::Metadata &MD) {
+    YIO.mapRequired(Kernel::Key::Name, MD.mName);
+    YIO.mapOptional(Kernel::Key::Language, MD.mLanguage, std::string());
+    YIO.mapOptional(Kernel::Key::LanguageVersion, MD.mLanguageVersion,
+                    std::vector<uint32_t>());
+    if (!MD.mAttrs.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::Attrs, MD.mAttrs);
+    if (!MD.mArgs.empty() || !YIO.outputting())
+      YIO.mapOptional(Kernel::Key::Args, MD.mArgs);
+  }
+};
+
+template <>
+struct MappingTraits<CodeObject::Metadata> {
+  static void mapping(IO &YIO, CodeObject::Metadata &MD) {
+    YIO.mapRequired(Key::Version, MD.mVersion);
+    YIO.mapOptional(Key::Isa, MD.mIsa);
+    YIO.mapOptional(Key::Printf, MD.mPrintf, std::vector<std::string>());
+    if (!MD.mKernels.empty() || !YIO.outputting())
+      YIO.mapOptional(Key::Kernels, MD.mKernels);
+  }
+};
+
+} // end namespace yaml
+
+namespace AMDGPU {
+
+/* static */
+std::error_code CodeObject::Metadata::fromYamlString(
+    std::string YamlString, CodeObject::Metadata &CodeObjectMetadata) {
+  yaml::Input YamlInput(YamlString);
+  YamlInput >> CodeObjectMetadata;
+  return YamlInput.error();
+}
+
+/* static */
+std::error_code CodeObject::Metadata::toYamlString(
+    CodeObject::Metadata CodeObjectMetadata, std::string &YamlString) {
+  raw_string_ostream YamlStream(YamlString);
+  yaml::Output YamlOutput(YamlStream, nullptr, std::numeric_limits<int>::max());
+  YamlOutput << CodeObjectMetadata;
+  return std::error_code();
+}
+
+namespace CodeObject {
+
+void MetadataStreamer::dump(StringRef YamlString) const {
+  errs() << "AMDGPU Code Object Metadata:\n" << YamlString << '\n';
+}
+
+void MetadataStreamer::verify(StringRef YamlString) const {
+  errs() << "AMDGPU Code Object Metadata Parser Test: ";
+
+  CodeObject::Metadata FromYamlString;
+  if (Metadata::fromYamlString(YamlString, FromYamlString)) {
+    errs() << "FAIL\n";
+    return;
+  }
+
+  std::string ToYamlString;
+  if (Metadata::toYamlString(FromYamlString, ToYamlString)) {
+    errs() << "FAIL\n";
+    return;
+  }
+
+  errs() << (YamlString == ToYamlString ? "PASS" : "FAIL") << '\n';
+  if (YamlString != ToYamlString) {
+    errs() << "Original input: " << YamlString << '\n'
+           << "Produced output: " << ToYamlString << '\n';
+  }
+}
+
+AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
+  if (AccQual.empty())
+    return AccessQualifier::Unknown;
+
+  return StringSwitch<AccessQualifier>(AccQual)
+             .Case("read_only",  AccessQualifier::ReadOnly)
+             .Case("write_only", AccessQualifier::WriteOnly)
+             .Case("read_write", AccessQualifier::ReadWrite)
+             .Default(AccessQualifier::Default);
+}
+
+AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
+    unsigned AddressSpace) const {
+  switch (AddressSpace) {
+  case AMDGPUAS::PRIVATE_ADDRESS:
+    return AddressSpaceQualifier::Private;
+  case AMDGPUAS::GLOBAL_ADDRESS:
+    return AddressSpaceQualifier::Global;
+  case AMDGPUAS::CONSTANT_ADDRESS:
+    return AddressSpaceQualifier::Constant;
+  case AMDGPUAS::LOCAL_ADDRESS:
+    return AddressSpaceQualifier::Local;
+  case AMDGPUAS::FLAT_ADDRESS:
+    return AddressSpaceQualifier::Generic;
+  case AMDGPUAS::REGION_ADDRESS:
+    return AddressSpaceQualifier::Region;
+  }
+
+  llvm_unreachable("Unknown address space qualifier");
+}
+
+ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
+                                         StringRef BaseTypeName) const {
+  if (TypeQual.find("pipe") != StringRef::npos)
+    return ValueKind::Pipe;
+
+  return StringSwitch<ValueKind>(BaseTypeName)
+             .Case("sampler_t", ValueKind::Sampler)
+             .Case("queue_t", ValueKind::Queue)
+             .Cases("image1d_t",
+                    "image1d_array_t",
+                    "image1d_buffer_t",
+                    "image2d_t" ,
+                    "image2d_array_t",
+                    "image2d_array_depth_t",
+                    "image2d_array_msaa_t"
+                    "image2d_array_msaa_depth_t"
+                    "image2d_depth_t",
+                    "image2d_msaa_t",
+                    "image2d_msaa_depth_t",
+                    "image3d_t", ValueKind::Image)
+             .Default(isa<PointerType>(Ty) ?
+                          (Ty->getPointerAddressSpace() ==
+                           AMDGPUAS::LOCAL_ADDRESS ?
+                           ValueKind::DynamicSharedPointer :
+                           ValueKind::GlobalBuffer) :
+                      ValueKind::ByValue);
+}
+
+ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID: {
+    auto Signed = !TypeName.startswith("u");
+    switch (Ty->getIntegerBitWidth()) {
+    case 8:
+      return Signed ? ValueType::I8 : ValueType::U8;
+    case 16:
+      return Signed ? ValueType::I16 : ValueType::U16;
+    case 32:
+      return Signed ? ValueType::I32 : ValueType::U32;
+    case 64:
+      return Signed ? ValueType::I64 : ValueType::U64;
+    default:
+      return ValueType::Struct;
+    }
+  }
+  case Type::HalfTyID:
+    return ValueType::F16;
+  case Type::FloatTyID:
+    return ValueType::F32;
+  case Type::DoubleTyID:
+    return ValueType::F64;
+  case Type::PointerTyID:
+    return getValueType(Ty->getPointerElementType(), TypeName);
+  case Type::VectorTyID:
+    return getValueType(Ty->getVectorElementType(), TypeName);
+  default:
+    return ValueType::Struct;
+  }
+}
+
+std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID: {
+    if (!Signed)
+      return (Twine('u') + getTypeName(Ty, true)).str();
+
+    auto BitWidth = Ty->getIntegerBitWidth();
+    switch (BitWidth) {
+    case 8:
+      return "char";
+    case 16:
+      return "short";
+    case 32:
+      return "int";
+    case 64:
+      return "long";
+    default:
+      return (Twine('i') + Twine(BitWidth)).str();
+    }
+  }
+  case Type::HalfTyID:
+    return "half";
+  case Type::FloatTyID:
+    return "float";
+  case Type::DoubleTyID:
+    return "double";
+  case Type::VectorTyID: {
+    auto VecTy = cast<VectorType>(Ty);
+    auto ElTy = VecTy->getElementType();
+    auto NumElements = VecTy->getVectorNumElements();
+    return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
+  }
+  default:
+    return "unknown";
+  }
+}
+
+std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
+    MDNode *Node) const {
+  std::vector<uint32_t> Dims;
+  if (Node->getNumOperands() != 3)
+    return Dims;
+
+  for (auto &Op : Node->operands())
+    Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
+  return Dims;
+}
+
+void MetadataStreamer::emitVersion() {
+  auto &Version = CodeObjectMetadata.mVersion;
+
+  Version.push_back(MetadataVersionMajor);
+  Version.push_back(MetadataVersionMinor);
+}
+
+void MetadataStreamer::emitIsa(const FeatureBitset &Features) {
+  auto &Isa = CodeObjectMetadata.mIsa;
+
+  Isa.mWavefrontSize = getWavefrontSize(Features);
+  Isa.mLocalMemorySize = getLocalMemorySize(Features);
+  Isa.mEUsPerCU = getEUsPerCU(Features);
+  Isa.mMaxWavesPerEU = getMaxWavesPerEU(Features);
+  Isa.mMaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features);
+  Isa.mSGPRAllocGranule = getSGPRAllocGranule(Features);
+  Isa.mTotalNumSGPRs = getTotalNumSGPRs(Features);
+  Isa.mAddressableNumSGPRs = getAddressableNumSGPRs(Features);
+  Isa.mVGPRAllocGranule = getVGPRAllocGranule(Features);
+  Isa.mTotalNumVGPRs = getTotalNumVGPRs(Features);
+  Isa.mAddressableNumVGPRs = getAddressableNumVGPRs(Features);
+}
+
+void MetadataStreamer::emitPrintf(const Module &Mod) {
+  auto &Printf = CodeObjectMetadata.mPrintf;
+
+  auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
+  if (!Node)
+    return;
+
+  for (auto Op : Node->operands())
+    if (Op->getNumOperands())
+      Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
+}
+
+void MetadataStreamer::emitKernelLanguage(const Function &Func) {
+  auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+  // TODO: What about other languages?
+  auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
+  if (!Node || !Node->getNumOperands())
+    return;
+  auto Op0 = Node->getOperand(0);
+  if (Op0->getNumOperands() <= 1)
+    return;
+
+  Kernel.mLanguage = "OpenCL C";
+  Kernel.mLanguageVersion.push_back(
+      mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
+  Kernel.mLanguageVersion.push_back(
+      mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
+}
+
+void MetadataStreamer::emitKernelAttrs(const Function &Func) {
+  auto &Attrs = CodeObjectMetadata.mKernels.back().mAttrs;
+
+  if (auto Node = Func.getMetadata("reqd_work_group_size"))
+    Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
+  if (auto Node = Func.getMetadata("work_group_size_hint"))
+    Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
+  if (auto Node = Func.getMetadata("vec_type_hint")) {
+    Attrs.mVecTypeHint = getTypeName(
+        cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
+        mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
+  }
+}
+
+void MetadataStreamer::emitKernelArgs(const Function &Func) {
+  for (auto &Arg : Func.args())
+    emitKernelArg(Arg);
+
+  // TODO: What about other languages?
+  if (!Func.getParent()->getNamedMetadata("opencl.ocl.version"))
+    return;
+
+  auto &DL = Func.getParent()->getDataLayout();
+  auto Int64Ty = Type::getInt64Ty(Func.getContext());
+
+  emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
+  emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
+  emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
+
+  if (!Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+    return;
+
+  auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
+                                      AMDGPUAS::GLOBAL_ADDRESS);
+  emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
+}
+
+void MetadataStreamer::emitKernelArg(const Argument &Arg) {
+  auto Func = Arg.getParent();
+  auto ArgNo = Arg.getArgNo();
+  const MDNode *Node;
+
+  StringRef TypeQual;
+  Node = Func->getMetadata("kernel_arg_type_qual");
+  if (Node && ArgNo < Node->getNumOperands())
+    TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef BaseTypeName;
+  Node = Func->getMetadata("kernel_arg_base_type");
+  if (Node && ArgNo < Node->getNumOperands())
+    BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef AccQual;
+  Node = Func->getMetadata("kernel_arg_access_qual");
+  if (Node && ArgNo < Node->getNumOperands())
+    AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef Name;
+  Node = Func->getMetadata("kernel_arg_name");
+  if (Node && ArgNo < Node->getNumOperands())
+    Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  StringRef TypeName;
+  Node = Func->getMetadata("kernel_arg_type");
+  if (Node && ArgNo < Node->getNumOperands())
+    TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
+
+  emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
+                getValueKind(Arg.getType(), TypeQual, BaseTypeName), TypeQual,
+                BaseTypeName, AccQual, Name, TypeName);
+}
+
+void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
+                                     ValueKind ValueKind, StringRef TypeQual,
+                                     StringRef BaseTypeName, StringRef AccQual,
+                                     StringRef Name, StringRef TypeName) {
+  CodeObjectMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
+  auto &Arg = CodeObjectMetadata.mKernels.back().mArgs.back();
+
+  Arg.mSize = DL.getTypeAllocSize(Ty);
+  Arg.mAlign = DL.getABITypeAlignment(Ty);
+  Arg.mValueKind = ValueKind;
+  Arg.mValueType = getValueType(Ty, BaseTypeName);
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
+    auto ElTy = PtrTy->getElementType();
+    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized())
+      Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy);
+  }
+
+  Arg.mAccQual = getAccessQualifier(AccQual);
+
+  if (auto PtrTy = dyn_cast<PointerType>(Ty))
+    Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
+
+  SmallVector<StringRef, 1> SplitTypeQuals;
+  TypeQual.split(SplitTypeQuals, " ", -1, false);
+  for (StringRef Key : SplitTypeQuals) {
+    auto P = StringSwitch<bool*>(Key)
+                 .Case("const",    &Arg.mIsConst)
+                 .Case("pipe",     &Arg.mIsPipe)
+                 .Case("restrict", &Arg.mIsRestrict)
+                 .Case("volatile", &Arg.mIsVolatile)
+                 .Default(nullptr);
+    if (P)
+      *P = true;
+  }
+
+  Arg.mName = Name;
+  Arg.mTypeName = TypeName;
+}
+
+void MetadataStreamer::begin(const FeatureBitset &Features, const Module &Mod) {
+  emitVersion();
+  emitIsa(Features);
+  emitPrintf(Mod);
+}
+
+void MetadataStreamer::emitKernel(const Function &Func) {
+  if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
+    return;
+
+  CodeObjectMetadata.mKernels.push_back(Kernel::Metadata());
+  auto &Kernel = CodeObjectMetadata.mKernels.back();
+
+  Kernel.mName = Func.getName();
+  emitKernelLanguage(Func);
+  emitKernelAttrs(Func);
+  emitKernelArgs(Func);
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString() {
+  std::string YamlString;
+  if (auto Error = Metadata::toYamlString(CodeObjectMetadata, YamlString))
+    return Error;
+
+  if (DumpCodeObjectMetadata)
+    dump(YamlString);
+  if (VerifyCodeObjectMetadata)
+    verify(YamlString);
+
+  return YamlString;
+}
+
+ErrorOr<std::string> MetadataStreamer::toYamlString(
+    const FeatureBitset &Features, StringRef YamlString) {
+  if (auto Error = Metadata::fromYamlString(YamlString, CodeObjectMetadata))
+    return Error;
+
+  emitIsa(Features);
+  return toYamlString();
+}
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm

Added: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h?rev=298552&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h (added)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h Wed Mar 22 17:32:22 2017
@@ -0,0 +1,95 @@
+//===--- AMDGPUCodeObjectMetadataStreamer.h ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU Code Object Metadata Streamer.
+///
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H
+
+#include "AMDGPUCodeObjectMetadata.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorOr.h"
+
+namespace llvm {
+
+class Argument;
+class DataLayout;
+class FeatureBitset;
+class Function;
+class MDNode;
+class Module;
+class Type;
+
+namespace AMDGPU {
+namespace CodeObject {
+
+class MetadataStreamer final {
+private:
+  Metadata CodeObjectMetadata;
+
+  void dump(StringRef YamlString) const;
+
+  void verify(StringRef YamlString) const;
+
+  AccessQualifier getAccessQualifier(StringRef AccQual) const;
+
+  AddressSpaceQualifier getAddressSpaceQualifer(unsigned AddressSpace) const;
+
+  ValueKind getValueKind(Type *Ty, StringRef TypeQual,
+                         StringRef BaseTypeName) const;
+
+  ValueType getValueType(Type *Ty, StringRef TypeName) const;
+
+  std::string getTypeName(Type *Ty, bool Signed) const;
+
+  std::vector<uint32_t> getWorkGroupDimensions(MDNode *Node) const;
+
+  void emitVersion();
+
+  void emitIsa(const FeatureBitset &Features);
+
+  void emitPrintf(const Module &Mod);
+
+  void emitKernelLanguage(const Function &Func);
+
+  void emitKernelAttrs(const Function &Func);
+
+  void emitKernelArgs(const Function &Func);
+
+  void emitKernelArg(const Argument &Arg);
+
+  void emitKernelArg(const DataLayout &DL, Type *Ty, ValueKind ValueKind,
+                     StringRef TypeQual = "", StringRef BaseTypeName = "",
+                     StringRef AccQual = "", StringRef Name = "",
+                     StringRef TypeName = "");
+public:
+  MetadataStreamer() = default;
+  ~MetadataStreamer() = default;
+
+  void begin(const FeatureBitset &Features, const Module &Mod);
+
+  void end() {}
+
+  void emitKernel(const Function &Func);
+
+  ErrorOr<std::string> toYamlString();
+
+  ErrorOr<std::string> toYamlString(const FeatureBitset &Features,
+                                    StringRef YamlString);
+};
+
+} // end namespace CodeObject
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H

Removed: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp?rev=298551&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp (removed)
@@ -1,469 +0,0 @@
-//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// Generates AMDGPU runtime metadata for YAML mapping.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPURuntimeMetadata.h"
-#include "MCTargetDesc/AMDGPURuntimeMD.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/YAMLTraits.h"
-#include <cassert>
-#include <cstdint>
-#include <limits>
-#include <vector>
-
-using namespace llvm;
-using namespace llvm::AMDGPU::IsaInfo;
-using namespace ::AMDGPU::RuntimeMD;
-
-static cl::opt<bool>
-DumpRuntimeMD("amdgpu-dump-rtmd",
-              cl::desc("Dump AMDGPU runtime metadata"));
-
-static cl::opt<bool>
-CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
-                     cl::desc("Check AMDGPU runtime metadata YAML parser"));
-
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
-LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
-LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
-
-namespace llvm {
-namespace yaml {
-
-template <> struct MappingTraits<KernelArg::Metadata> {
-  static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
-    YamlIO.mapRequired(KeyName::ArgSize, A.Size);
-    YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
-    YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
-    YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
-    YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
-    YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
-    YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
-    YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
-    YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
-    YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
-    YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
-    YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
-    YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
-  }
-  static const bool flow = true;
-};
-
-template <> struct MappingTraits<Kernel::Metadata> {
-  static void mapping(IO &YamlIO, Kernel::Metadata &K) {
-    YamlIO.mapRequired(KeyName::KernelName, K.Name);
-    YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
-    YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
-    YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
-    YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
-    YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
-    YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
-        INVALID_KERNEL_INDEX);
-    YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
-        uint8_t(0));
-    YamlIO.mapOptional(KeyName::Args, K.Args);
-  }
-  static const bool flow = true;
-};
-
-template <> struct MappingTraits<IsaInfo::Metadata> {
-  static void mapping(IO &YamlIO, IsaInfo::Metadata &I) {
-    YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize);
-    YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize);
-    YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU);
-    YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU);
-    YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize,
-        I.MaxFlatWorkGroupSize);
-    YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule);
-    YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs);
-    YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs,
-        I.AddressableNumSGPRs);
-    YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule);
-    YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs);
-    YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs,
-        I.AddressableNumVGPRs);
-  }
-  static const bool flow = true;
-};
-
-template <> struct MappingTraits<Program::Metadata> {
-  static void mapping(IO &YamlIO, Program::Metadata &Prog) {
-    YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
-    YamlIO.mapOptional(KeyName::IsaInfo, Prog.IsaInfo);
-    YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
-    YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
-  }
-  static const bool flow = true;
-};
-
-} // end namespace yaml
-} // end namespace llvm
-
-// Get a vector of three integer values from MDNode \p Node;
-static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
-  assert(Node->getNumOperands() == 3);
-  std::vector<uint32_t> V;
-  for (const MDOperand &Op : Node->operands()) {
-    const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
-    V.push_back(CI->getZExtValue());
-  }
-  return V;
-}
-
-static std::string getOCLTypeName(Type *Ty, bool Signed) {
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:
-    return "half";
-  case Type::FloatTyID:
-    return "float";
-  case Type::DoubleTyID:
-    return "double";
-  case Type::IntegerTyID: {
-    if (!Signed)
-      return (Twine('u') + getOCLTypeName(Ty, true)).str();
-    unsigned BW = Ty->getIntegerBitWidth();
-    switch (BW) {
-    case 8:
-      return "char";
-    case 16:
-      return "short";
-    case 32:
-      return "int";
-    case 64:
-      return "long";
-    default:
-      return (Twine('i') + Twine(BW)).str();
-    }
-  }
-  case Type::VectorTyID: {
-    VectorType *VecTy = cast<VectorType>(Ty);
-    Type *EleTy = VecTy->getElementType();
-    unsigned Size = VecTy->getVectorNumElements();
-    return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
-  }
-  default:
-    return "unknown";
-  }
-}
-
-static KernelArg::ValueType getRuntimeMDValueType(
-  Type *Ty, StringRef TypeName) {
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:
-    return KernelArg::F16;
-  case Type::FloatTyID:
-    return KernelArg::F32;
-  case Type::DoubleTyID:
-    return KernelArg::F64;
-  case Type::IntegerTyID: {
-    bool Signed = !TypeName.startswith("u");
-    switch (Ty->getIntegerBitWidth()) {
-    case 8:
-      return Signed ? KernelArg::I8 : KernelArg::U8;
-    case 16:
-      return Signed ? KernelArg::I16 : KernelArg::U16;
-    case 32:
-      return Signed ? KernelArg::I32 : KernelArg::U32;
-    case 64:
-      return Signed ? KernelArg::I64 : KernelArg::U64;
-    default:
-      // Runtime does not recognize other integer types. Report as struct type.
-      return KernelArg::Struct;
-    }
-  }
-  case Type::VectorTyID:
-    return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
-  case Type::PointerTyID:
-    return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
-  default:
-    return KernelArg::Struct;
-  }
-}
-
-static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
-    AMDGPUAS::AddressSpaces A) {
-  switch (A) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
-    return KernelArg::Global;
-  case AMDGPUAS::CONSTANT_ADDRESS:
-    return KernelArg::Constant;
-  case AMDGPUAS::LOCAL_ADDRESS:
-    return KernelArg::Local;
-  case AMDGPUAS::FLAT_ADDRESS:
-    return KernelArg::Generic;
-  case AMDGPUAS::REGION_ADDRESS:
-    return KernelArg::Region;
-  default:
-    return KernelArg::Private;
-  }
-}
-
-static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
-    Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
-    StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
-    StringRef AccQual = "") {
-  KernelArg::Metadata Arg;
-
-  // Set ArgSize and ArgAlign.
-  Arg.Size = DL.getTypeAllocSize(T);
-  Arg.Align = DL.getABITypeAlignment(T);
-  if (auto PT = dyn_cast<PointerType>(T)) {
-    auto ET = PT->getElementType();
-    if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
-      Arg.PointeeAlign = DL.getABITypeAlignment(ET);
-  }
-
-  // Set ArgTypeName.
-  Arg.TypeName = TypeName;
-
-  // Set ArgName.
-  Arg.Name = ArgName;
-
-  // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
-  SmallVector<StringRef, 1> SplitQ;
-  TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
-
-  for (StringRef KeyName : SplitQ) {
-    auto *P = StringSwitch<uint8_t *>(KeyName)
-      .Case("volatile", &Arg.IsVolatile)
-      .Case("restrict", &Arg.IsRestrict)
-      .Case("const",    &Arg.IsConst)
-      .Case("pipe",     &Arg.IsPipe)
-      .Default(nullptr);
-    if (P)
-      *P = 1;
-  }
-
-  // Set ArgKind.
-  Arg.Kind = Kind;
-
-  // Set ArgValueType.
-  Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
-
-  // Set ArgAccQual.
-  if (!AccQual.empty()) {
-    Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
-      .Case("read_only",  KernelArg::ReadOnly)
-      .Case("write_only", KernelArg::WriteOnly)
-      .Case("read_write", KernelArg::ReadWrite)
-      .Default(KernelArg::AccNone);
-  }
-
-  // Set ArgAddrQual.
-  if (auto *PT = dyn_cast<PointerType>(T)) {
-    Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
-        PT->getAddressSpace()));
-  }
-
-  return Arg;
-}
-
-static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
-  Kernel::Metadata Kernel;
-  Kernel.Name = F.getName();
-  auto &M = *F.getParent();
-
-  // Set Language and LanguageVersion.
-  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
-    if (MD->getNumOperands() != 0) {
-      auto Node = MD->getOperand(0);
-      if (Node->getNumOperands() > 1) {
-        Kernel.Language = "OpenCL C";
-        uint32_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
-                         ->getZExtValue();
-        uint32_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
-                         ->getZExtValue();
-        Kernel.LanguageVersion.push_back(Major);
-        Kernel.LanguageVersion.push_back(Minor);
-      }
-    }
-  }
-
-  const DataLayout &DL = F.getParent()->getDataLayout();
-  for (auto &Arg : F.args()) {
-    unsigned I = Arg.getArgNo();
-    Type *T = Arg.getType();
-    auto TypeName = dyn_cast<MDString>(F.getMetadata(
-        "kernel_arg_type")->getOperand(I))->getString();
-    auto BaseTypeName = cast<MDString>(F.getMetadata(
-        "kernel_arg_base_type")->getOperand(I))->getString();
-    StringRef ArgName;
-    if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
-      ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
-    auto TypeQual = cast<MDString>(F.getMetadata(
-        "kernel_arg_type_qual")->getOperand(I))->getString();
-    auto AccQual = cast<MDString>(F.getMetadata(
-        "kernel_arg_access_qual")->getOperand(I))->getString();
-    KernelArg::Kind Kind;
-    if (TypeQual.find("pipe") != StringRef::npos)
-      Kind = KernelArg::Pipe;
-    else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
-      .Case("sampler_t", KernelArg::Sampler)
-      .Case("queue_t",   KernelArg::Queue)
-      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
-             "image2d_t" , "image2d_array_t",  KernelArg::Image)
-      .Cases("image2d_depth_t", "image2d_array_depth_t",
-             "image2d_msaa_t", "image2d_array_msaa_t",
-             "image2d_msaa_depth_t",  KernelArg::Image)
-      .Cases("image2d_array_msaa_depth_t", "image3d_t",
-             KernelArg::Image)
-      .Default(isa<PointerType>(T) ?
-                   (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
-                   KernelArg::DynamicSharedPointer :
-                   KernelArg::GlobalBuffer) :
-                   KernelArg::ByValue);
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
-        BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
-  }
-
-  // Emit hidden kernel arguments for OpenCL kernels.
-  if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
-    auto Int64T = Type::getInt64Ty(F.getContext());
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
-        KernelArg::HiddenGlobalOffsetX));
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
-        KernelArg::HiddenGlobalOffsetY));
-    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
-        KernelArg::HiddenGlobalOffsetZ));
-    if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
-      auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
-          KernelArg::Global);
-      Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
-          KernelArg::HiddenPrintfBuffer));
-    }
-  }
-
-  // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
-  if (auto RWGS = F.getMetadata("reqd_work_group_size"))
-    Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
-
-  if (auto WGSH = F.getMetadata("work_group_size_hint"))
-    Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
-
-  if (auto VTH = F.getMetadata("vec_type_hint"))
-    Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
-      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
-      VTH->getOperand(1))->getZExtValue());
-
-  return Kernel;
-}
-
-static void getIsaInfo(const FeatureBitset &Features, IsaInfo::Metadata &IIM) {
-  IIM.WavefrontSize = getWavefrontSize(Features);
-  IIM.LocalMemorySize = getLocalMemorySize(Features);
-  IIM.EUsPerCU = getEUsPerCU(Features);
-  IIM.MaxWavesPerEU = getMaxWavesPerEU(Features);
-  IIM.MaxFlatWorkGroupSize = getMaxFlatWorkGroupSize(Features);
-  IIM.SGPRAllocGranule = getSGPRAllocGranule(Features);
-  IIM.TotalNumSGPRs = getTotalNumSGPRs(Features);
-  IIM.AddressableNumSGPRs = getAddressableNumSGPRs(Features);
-  IIM.VGPRAllocGranule = getVGPRAllocGranule(Features);
-  IIM.TotalNumVGPRs = getTotalNumVGPRs(Features);
-  IIM.AddressableNumVGPRs = getAddressableNumVGPRs(Features);
-}
-
-Program::Metadata::Metadata(const std::string &YAML) {
-  yaml::Input Input(YAML);
-  Input >> *this;
-}
-
-std::string Program::Metadata::toYAML() {
-  std::string Text;
-  raw_string_ostream Stream(Text);
-  yaml::Output Output(Stream, nullptr,
-                      std::numeric_limits<int>::max() /* do not wrap line */);
-  Output << *this;
-  return Stream.str();
-}
-
-Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
-  return Program::Metadata(S);
-}
-
-// Check if the YAML string can be parsed.
-static void checkRuntimeMDYAMLString(const std::string &YAML) {
-  auto P = Program::Metadata::fromYAML(YAML);
-  auto S = P.toYAML();
-  errs() << "AMDGPU runtime metadata parser test "
-         << (YAML == S ? "passes" : "fails") << ".\n";
-  if (YAML != S) {
-    errs() << "First output: " << YAML << '\n'
-           << "Second output: " << S << '\n';
-  }
-}
-
-std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
-                                         const Module &M) {
-  Program::Metadata Prog;
-  Prog.MDVersionSeq.push_back(MDVersion);
-  Prog.MDVersionSeq.push_back(MDRevision);
-
-  getIsaInfo(Features, Prog.IsaInfo);
-
-  // Set PrintfInfo.
-  if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
-    for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
-      auto Node = MD->getOperand(I);
-      if (Node->getNumOperands() > 0)
-        Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
-            ->getString());
-    }
-  }
-
-  // Set Kernels.
-  for (auto &F: M.functions()) {
-    if (!F.getMetadata("kernel_arg_type"))
-      continue;
-    Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
-  }
-
-  auto YAML = Prog.toYAML();
-
-  if (DumpRuntimeMD)
-    errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
-
-  if (CheckRuntimeMDParser)
-    checkRuntimeMDYAMLString(YAML);
-
-  return YAML;
-}
-
-ErrorOr<std::string> llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
-                                                  StringRef YAML) {
-  Program::Metadata Prog;
-  yaml::Input Input(YAML);
-  Input >> Prog;
-
-  getIsaInfo(Features, Prog.IsaInfo);
-
-  if (Input.error())
-    return Input.error();
-  return Prog.toYAML();
-}

Removed: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h?rev=298551&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h (removed)
@@ -1,33 +0,0 @@
-//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares functions for generating runtime metadata.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
-
-#include "llvm/Support/ErrorOr.h"
-#include <string>
-
-namespace llvm {
-class FeatureBitset;
-class Module;
-
-/// \returns Runtime metadata as YAML string.
-std::string getRuntimeMDYAMLString(const FeatureBitset &Features,
-                                   const Module &M);
-
-/// \returns \p YAML if \p YAML is valid runtime metadata, error otherwise.
-ErrorOr<std::string> getRuntimeMDYAMLString(const FeatureBitset &Features,
-                                            StringRef YAML);
-
-}
-#endif

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Mar 22 17:32:22 2017
@@ -27,7 +27,6 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
-#include "AMDGPURuntimeMD.h"
 
 namespace llvm {
 #include "AMDGPUPTNote.h"
@@ -36,9 +35,29 @@ namespace llvm {
 using namespace llvm;
 using namespace llvm::AMDGPU;
 
+//===----------------------------------------------------------------------===//
+// AMDGPUTargetStreamer
+//===----------------------------------------------------------------------===//
+
 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
     : MCTargetStreamer(S) {}
 
+void AMDGPUTargetStreamer::EmitStartOfCodeObjectMetadata(
+    const FeatureBitset &Features, const Module &Mod) {
+  CodeObjectMetadataStreamer.begin(Features, Mod);
+}
+
+void AMDGPUTargetStreamer::EmitKernelCodeObjectMetadata(const Function &Func) {
+  CodeObjectMetadataStreamer.emitKernel(Func);
+}
+
+void AMDGPUTargetStreamer::EmitEndOfCodeObjectMetadata(
+    const FeatureBitset &Features) {
+  CodeObjectMetadataStreamer.end();
+  EmitCodeObjectMetadata(Features,
+                         CodeObjectMetadataStreamer.toYamlString().get());
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetAsmStreamer
 //===----------------------------------------------------------------------===//
@@ -93,24 +112,18 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPU
   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
 }
 
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
-                                                  const Module &M) {
-  OS << "\t.amdgpu_runtime_metadata\n";
-  OS << getRuntimeMDYAMLString(Features, M);
-  OS << "\n\t.end_amdgpu_runtime_metadata\n";
-}
-
-bool AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
-                                                  StringRef Metadata) {
-  auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata);
-  if (!VerifiedMetadata)
-    return true;
-
-  OS << "\t.amdgpu_runtime_metadata";
-  OS << VerifiedMetadata.get();
-  OS << "\t.end_amdgpu_runtime_metadata\n";
+bool AMDGPUTargetAsmStreamer::EmitCodeObjectMetadata(
+    const FeatureBitset &Features, StringRef YamlString) {
+  auto VerifiedYamlString =
+      CodeObjectMetadataStreamer.toYamlString(Features, YamlString);
+  if (!VerifiedYamlString)
+    return false;
+
+  OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveBegin << '\n';
+  OS << VerifiedYamlString.get();
+  OS << '\t' << AMDGPU::CodeObject::MetadataAssemblerDirectiveEnd << '\n';
 
-  return false;
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
@@ -223,11 +236,12 @@ void AMDGPUTargetELFStreamer::EmitAMDGPU
   Symbol->setBinding(ELF::STB_GLOBAL);
 }
 
-bool AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
-                                                  StringRef Metadata) {
-  auto VerifiedMetadata = getRuntimeMDYAMLString(Features, Metadata);
-  if (!VerifiedMetadata)
-    return true;
+bool AMDGPUTargetELFStreamer::EmitCodeObjectMetadata(
+    const FeatureBitset &Features, StringRef YamlString) {
+  auto VerifiedYamlString =
+      CodeObjectMetadataStreamer.toYamlString(Features, YamlString);
+  if (!VerifiedYamlString)
+    return false;
 
   // Create two labels to mark the beginning and end of the desc field
   // and a MCExpr to calculate the size of the desc field.
@@ -240,18 +254,13 @@ bool AMDGPUTargetELFStreamer::EmitRuntim
 
   EmitAMDGPUNote(
     DescSZ,
-    ElfNote::NT_AMDGPU_HSA_RUNTIME_METADATA,
+    ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_METADATA,
     [&](MCELFStreamer &OS) {
       OS.EmitLabel(DescBegin);
-      OS.EmitBytes(VerifiedMetadata.get());
+      OS.EmitBytes(VerifiedYamlString.get());
       OS.EmitLabel(DescEnd);
     }
   );
 
-  return false;
-}
-
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
-                                                  const Module &M) {
-  EmitRuntimeMetadata(Features, getRuntimeMDYAMLString(Features, M));
+  return true;
 }

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Mar 22 17:32:22 2017
@@ -10,6 +10,7 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
 #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
 
+#include "AMDGPUCodeObjectMetadataStreamer.h"
 #include "AMDKernelCodeT.h"
 #include "llvm/MC/MCStreamer.h"
 
@@ -27,6 +28,7 @@ class Type;
 
 class AMDGPUTargetStreamer : public MCTargetStreamer {
 protected:
+  AMDGPU::CodeObject::MetadataStreamer CodeObjectMetadataStreamer;
   MCContext &getContext() const { return Streamer.getContext(); }
 
 public:
@@ -47,15 +49,19 @@ public:
 
   virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
 
-  virtual void EmitRuntimeMetadata(const FeatureBitset &Features,
-                                   const Module &M) = 0;
+  virtual void EmitStartOfCodeObjectMetadata(const FeatureBitset &Features,
+                                             const Module &Mod);
 
-  /// \returns False on success, true on failure.
-  virtual bool EmitRuntimeMetadata(const FeatureBitset &Features,
-                                   StringRef Metadata) = 0;
+  virtual void EmitKernelCodeObjectMetadata(const Function &Func);
+
+  virtual void EmitEndOfCodeObjectMetadata(const FeatureBitset &Features);
+
+  /// \returns True on success, false on failure.
+  virtual bool EmitCodeObjectMetadata(const FeatureBitset &Features,
+                                      StringRef YamlString) = 0;
 };
 
-class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
   formatted_raw_ostream &OS;
 public:
   AMDGPUTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
@@ -74,15 +80,12 @@ public:
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 
-  void EmitRuntimeMetadata(const FeatureBitset &Features,
-                           const Module &M) override;
-
-  /// \returns False on success, true on failure.
-  bool EmitRuntimeMetadata(const FeatureBitset &Features,
-                           StringRef Metadata) override;
+  /// \returns True on success, false on failure.
+  bool EmitCodeObjectMetadata(const FeatureBitset &Features,
+                              StringRef YamlString) override;
 };
 
-class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
   MCStreamer &Streamer;
 
   void EmitAMDGPUNote(const MCExpr *DescSize,
@@ -109,12 +112,9 @@ public:
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 
-  void EmitRuntimeMetadata(const FeatureBitset &Features,
-                           const Module &M) override;
-
-  /// \returns False on success, true on failure.
-  bool EmitRuntimeMetadata(const FeatureBitset &Features,
-                           StringRef Metadata) override;
+  /// \returns True on success, false on failure.
+  bool EmitCodeObjectMetadata(const FeatureBitset &Features,
+                              StringRef YamlString) override;
 };
 
 }

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt Wed Mar 22 17:32:22 2017
@@ -1,13 +1,12 @@
-
 add_llvm_library(LLVMAMDGPUDesc
   AMDGPUAsmBackend.cpp
+  AMDGPUCodeObjectMetadataStreamer.cpp
   AMDGPUELFObjectWriter.cpp
   AMDGPUELFStreamer.cpp
+  AMDGPUMCAsmInfo.cpp
   AMDGPUMCCodeEmitter.cpp
   AMDGPUMCTargetDesc.cpp
-  AMDGPUMCAsmInfo.cpp
-  AMDGPURuntimeMD.cpp
   AMDGPUTargetStreamer.cpp
   R600MCCodeEmitter.cpp
   SIMCCodeEmitter.cpp
-  )
+)

Added: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-from-llvm-ir-full.ll Wed Mar 22 17:32:22 2017
@@ -0,0 +1,1281 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-comd -amdgpu-verify-comd -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+%struct.A = type { i8, float }
+%opencl.image1d_t = type opaque
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+%opencl.queue_t = type opaque
+%opencl.pipe_t = type opaque
+%struct.B = type { i32 addrspace(1)*}
+%opencl.clk_event_t = type opaque
+
+; CHECK: ---
+; CHECK:  Version: [ 1, 0 ]
+
+; CHECK:  Isa:
+; CHECK:    WavefrontSize:        64
+; CHECK:    LocalMemorySize:      65536
+; CHECK:    EUsPerCU:             4
+; CHECK:    MaxWavesPerEU:        10
+; CHECK:    MaxFlatWorkGroupSize: 2048
+; GFX700:   SGPRAllocGranule:     8
+; GFX800:   SGPRAllocGranule:     16
+; GFX900:   SGPRAllocGranule:     16
+; GFX700:   TotalNumSGPRs:        512
+; GFX800:   TotalNumSGPRs:        800
+; GFX900:   TotalNumSGPRs:        800
+; GFX700:   AddressableNumSGPRs:  104
+; GFX800:   AddressableNumSGPRs:  96
+; GFX900:   AddressableNumSGPRs:  102
+; CHECK:    VGPRAllocGranule:     4
+; CHECK:    TotalNumVGPRs:        256
+; CHECK:    AddressableNumVGPRs:  256
+
+; CHECK:  Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+
+; CHECK:  Kernels:
+
+; CHECK:      - Name:            test_char
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          1
+; CHECK-NEXT:       Align:         1
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      char
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_char(i8 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
+    !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_ushort2
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     U16
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      ushort2
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10
+    !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_int3
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          16
+; CHECK-NEXT:       Align:         16
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int3
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_int3(<3 x i32> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11
+    !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_ulong4
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          32
+; CHECK-NEXT:       Align:         32
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     U64
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      ulong4
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12
+    !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_half8
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          16
+; CHECK-NEXT:       Align:         16
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     F16
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      half8
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_half8(<8 x half> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13
+    !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_float16
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          64
+; CHECK-NEXT:       Align:         64
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     F32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      float16
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_float16(<16 x float> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14
+    !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_double16
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          128
+; CHECK-NEXT:       Align:         128
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     F64
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      double16
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_double16(<16 x double> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15
+    !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_pointer
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16
+    !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_image
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          Image
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      image2d_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17
+    !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_sampler
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          Sampler
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      sampler_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_sampler(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18
+    !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_queue
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          Queue
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      queue_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19
+    !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_struct
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Private
+; CHECK-NEXT:       TypeName:      struct A
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_struct(%struct.A* byval %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
+    !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_i128
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          16
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      i128
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_i128(i128 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21
+    !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_multi_arg
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I16
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      short2
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      char3
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
+    !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24
+    !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
+  ret void
+}
+
+; CHECK:      - Name:            test_addr_space
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Constant
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       PointeeAlign:  4
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
+                                           i32 addrspace(2)* %c,
+                                           i32 addrspace(3)* %l)
+    !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
+    !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
+  ret void
+}
+
+; CHECK:      - Name:            test_type_qual
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       IsVolatile:    true
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       IsConst:       true
+; CHECK-NEXT:       IsRestrict:    true
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          Pipe
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       IsPipe:        true
+; CHECK-NEXT:       TypeName:      'int *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a,
+                                          i32 addrspace(1)* %b,
+                                          %opencl.pipe_t addrspace(1)* %c)
+    !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51
+    !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
+  ret void
+}
+
+; CHECK:      - Name:            test_access_qual
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          Image
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       ReadOnly
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      image1d_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          Image
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       WriteOnly
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      image2d_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          Image
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       ReadWrite
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      image3d_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro,
+                                            %opencl.image2d_t addrspace(1)* %wo,
+                                            %opencl.image3d_t addrspace(1)* %rw)
+    !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62
+    !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_half
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   half
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_half(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_float
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   float
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_float(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_double
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   double
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_double(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_char
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   char
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_char(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_short
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   short
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_short(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_long
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   long
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_long(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
+  ret void
+}
+
+; CHECK:      - Name:            test_vec_type_hint_unknown
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       VecTypeHint:   unknown
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      int
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
+  ret void
+}
+
+; CHECK:      - Name:            test_reqd_wgs_vec_type_hint
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       ReqdWorkGroupSize: [ 1, 2, 4 ]
+; CHECK-NEXT:       VecTypeHint:       int
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:              4
+; CHECK-NEXT:       Align:             4
+; CHECK-NEXT:       Kind:              ByValue
+; CHECK-NEXT:       ValueType:         I32
+; CHECK-NEXT:       AccQual:           Default
+; CHECK-NEXT:       TypeName:          int
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:         I64
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:         I64
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:         I64
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:         I8
+; CHECK-NEXT:       AddrSpaceQual:     Global
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5
+    !reqd_work_group_size !6 {
+  ret void
+}
+
+; CHECK:      - Name:            test_wgs_hint_vec_type_hint
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Attrs:
+; CHECK-NEXT:       WorkGroupSizeHint: [ 8, 16, 32 ]
+; CHECK-NEXT:       VecTypeHint:       uint4
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:              4
+; CHECK-NEXT:       Align:             4
+; CHECK-NEXT:       Kind:              ByValue
+; CHECK-NEXT:       ValueType:         I32
+; CHECK-NEXT:       AccQual:           Default
+; CHECK-NEXT:       TypeName:          int
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:         I64
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:         I64
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:         I64
+; CHECK-NEXT:     - Size:              8
+; CHECK-NEXT:       Align:             8
+; CHECK-NEXT:       Kind:              HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:         I8
+; CHECK-NEXT:       AddrSpaceQual:     Global
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7
+    !work_group_size_hint !8 {
+  ret void
+}
+
+; CHECK:      - Name:            test_arg_ptr_to_ptr
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      'int **'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32* addrspace(1)* %a)
+    !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
+    !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_arg_struct_contains_ptr
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Private
+; CHECK-NEXT:       TypeName:      struct B
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B* byval %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
+    !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK:      - Name:            test_arg_vector_of_ptr
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          16
+; CHECK-NEXT:       Align:         16
+; CHECK-NEXT:       Kind:          ByValue
+; CHECK-NEXT:       ValueType:     I32
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       TypeName:      'global int* __attribute__((ext_vector_type(2)))'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a)
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83
+    !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_arg_unknown_builtin_type
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      clk_event_t
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_arg_unknown_builtin_type(
+    %opencl.clk_event_t addrspace(1)* %a)
+    !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84
+    !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:      - Name:            test_pointee_align
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          GlobalBuffer
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Global
+; CHECK-NEXT:       TypeName:      'long *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       PointeeAlign:  1
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'char *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       PointeeAlign:  2
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'char2 *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       PointeeAlign:  4
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'char3 *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       PointeeAlign:  4
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'char4 *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       PointeeAlign:  8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'char8 *'
+; CHECK-NEXT:     - Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       Kind:          DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       PointeeAlign:  16
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:       AddrSpaceQual: Local
+; CHECK-NEXT:       TypeName:      'char16 *'
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       Kind:          HiddenPrintfBuffer
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AddrSpaceQual: Global
+define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
+                                              i8 addrspace(3)* %b,
+                                              <2 x i8> addrspace(3)* %c,
+                                              <3 x i8> addrspace(3)* %d,
+                                              <4 x i8> addrspace(3)* %e,
+                                              <8 x i8> addrspace(3)* %f,
+                                              <16 x i8> addrspace(3)* %g)
+    !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
+    !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
+  ret void
+}
+
+!llvm.printf.fmts = !{!100, !101}
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"int"}
+!4 = !{!""}
+!5 = !{i32 undef, i32 1}
+!6 = !{i32 1, i32 2, i32 4}
+!7 = !{<4 x i32> undef, i32 0}
+!8 = !{i32 8, i32 16, i32 32}
+!9 = !{!"char"}
+!10 = !{!"ushort2"}
+!11 = !{!"int3"}
+!12 = !{!"ulong4"}
+!13 = !{!"half8"}
+!14 = !{!"float16"}
+!15 = !{!"double16"}
+!16 = !{!"int *"}
+!17 = !{!"image2d_t"}
+!18 = !{!"sampler_t"}
+!19 = !{!"queue_t"}
+!20 = !{!"struct A"}
+!21 = !{!"i128"}
+!22 = !{i32 0, i32 0, i32 0}
+!23 = !{!"none", !"none", !"none"}
+!24 = !{!"int", !"short2", !"char3"}
+!25 = !{!"", !"", !""}
+!26 = !{half undef, i32 1}
+!27 = !{float undef, i32 1}
+!28 = !{double undef, i32 1}
+!29 = !{i8 undef, i32 1}
+!30 = !{i16 undef, i32 1}
+!31 = !{i64 undef, i32 1}
+!32 = !{i32 *undef, i32 1}
+!50 = !{i32 1, i32 2, i32 3}
+!51 = !{!"int *", !"int *", !"int *"}
+!60 = !{i32 1, i32 1, i32 1}
+!61 = !{!"read_only", !"write_only", !"read_write"}
+!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
+!70 = !{!"volatile", !"const restrict", !"pipe"}
+!80 = !{!"int **"}
+!81 = !{i32 1}
+!82 = !{!"struct B"}
+!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
+!84 = !{!"clk_event_t"}
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
+!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
+!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
+!94 = !{!"", !"", !"", !"", !"", !"", !""}
+!100 = !{!"1:1:4:%d\5Cn"}
+!101 = !{!"2:1:8:%g\5Cn"}
+
+; NOTES: Displaying notes found at file offset 0x{{[0-9]+}}
+; NOTES-NEXT: Owner    Data size    Description
+; NOTES-NEXT: AMD      0x00000008   Unknown note type: (0x00000001)
+; NOTES-NEXT: AMD      0x0000001b   Unknown note type: (0x00000003)
+; GFX700:     AMD      0x000078b2   Unknown note type: (0x0000000a)
+; GFX800:     AMD      0x000078b2   Unknown note type: (0x0000000a)
+; GFX900:     AMD      0x000078b3   Unknown note type: (0x0000000a)
+
+; PARSER: AMDGPU Code Object Metadata Parser Test: PASS

Added: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-1.ll Wed Mar 22 17:32:22 2017
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s
+
+; Make sure llc does not crash for invalid opencl version metadata.
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+; CHECK: Isa:
+; CHECK:   WavefrontSize:        64
+; CHECK:   LocalMemorySize:      65536
+; CHECK:   EUsPerCU:             4
+; CHECK:   MaxWavesPerEU:        10
+; CHECK:   MaxFlatWorkGroupSize: 2048
+; CHECK:   SGPRAllocGranule:     8
+; CHECK:   TotalNumSGPRs:        512
+; CHECK:   AddressableNumSGPRs:  104
+; CHECK:   VGPRAllocGranule:     4
+; CHECK:   TotalNumVGPRs:        256
+; CHECK:   AddressableNumVGPRs:  256
+; CHECK: ...
+
+!opencl.ocl.version = !{}

Added: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-2.ll Wed Mar 22 17:32:22 2017
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s
+
+; Make sure llc does not crash for invalid opencl version metadata.
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+; CHECK: Isa:
+; CHECK:   WavefrontSize:        64
+; CHECK:   LocalMemorySize:      65536
+; CHECK:   EUsPerCU:             4
+; CHECK:   MaxWavesPerEU:        10
+; CHECK:   MaxFlatWorkGroupSize: 2048
+; CHECK:   SGPRAllocGranule:     8
+; CHECK:   TotalNumSGPRs:        512
+; CHECK:   AddressableNumSGPRs:  104
+; CHECK:   VGPRAllocGranule:     4
+; CHECK:   TotalNumVGPRs:        256
+; CHECK:   AddressableNumVGPRs:  256
+; CHECK: ...
+
+!opencl.ocl.version = !{!0}
+!0 = !{}

Added: llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/code-object-metadata-invalid-ocl-version-3.ll Wed Mar 22 17:32:22 2017
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-code-object-metadata | FileCheck %s
+
+; Make sure llc does not crash for invalid opencl version metadata.
+
+; CHECK: ---
+; CHECK: Version: [ 1, 0 ]
+; CHECK: Isa:
+; CHECK:   WavefrontSize:        64
+; CHECK:   LocalMemorySize:      65536
+; CHECK:   EUsPerCU:             4
+; CHECK:   MaxWavesPerEU:        10
+; CHECK:   MaxFlatWorkGroupSize: 2048
+; CHECK:   SGPRAllocGranule:     8
+; CHECK:   TotalNumSGPRs:        512
+; CHECK:   AddressableNumSGPRs:  104
+; CHECK:   VGPRAllocGranule:     4
+; CHECK:   TotalNumVGPRs:        256
+; CHECK:   AddressableNumVGPRs:  256
+; CHECK: ...
+
+!opencl.ocl.version = !{!0}
+!0 = !{i32 1}

Removed: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll (removed)
@@ -1,6 +0,0 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
-; check llc does not crash for invalid opencl version metadata
-
-; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
-
-!opencl.ocl.version = !{}

Removed: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll (removed)
@@ -1,7 +0,0 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
-; check llc does not crash for invalid opencl version metadata
-
-; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
-
-!opencl.ocl.version = !{!0}
-!0 = !{}

Removed: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll (removed)
@@ -1,7 +0,0 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
-; check llc does not crash for invalid opencl version metadata
-
-; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
-
-!opencl.ocl.version = !{!0}
-!0 = !{i32 1}

Removed: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (removed)
@@ -1,406 +0,0 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s
-
-%struct.A = type { i8, float }
-%opencl.image1d_t = type opaque
-%opencl.image2d_t = type opaque
-%opencl.image3d_t = type opaque
-%opencl.queue_t = type opaque
-%opencl.pipe_t = type opaque
-%struct.B = type { i32 addrspace(1)*}
-%opencl.clk_event_t = type opaque
-
-; CHECK: ---
-; SI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: 
-; VI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: 
-
-; CHECK:   - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_int3, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_ulong4, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 32, amd.ArgAlign: 32, amd.ArgKind: 0, amd.ArgValueType: 10, amd.ArgTypeName: ulong4, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_half8, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 5, amd.ArgTypeName: half8, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_float16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 64, amd.ArgAlign: 64, amd.ArgKind: 0, amd.ArgValueType: 8, amd.ArgTypeName: float16, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_double16, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 128, amd.ArgAlign: 128, amd.ArgKind: 0, amd.ArgValueType: 11, amd.ArgTypeName: double16, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_pointer, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_image, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_sampler, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 3, amd.ArgValueType: 6, amd.ArgTypeName: sampler_t, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_queue, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 6, amd.ArgValueType: 0, amd.ArgTypeName: queue_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_struct, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct A, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_i128, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 8, amd.ArgKind: 0, amd.ArgValueType: 0, amd.ArgTypeName: i128, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_multi_arg, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 3, amd.ArgTypeName: short2, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_addr_space, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 2, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_type_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsVolatile: 1 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsConst: 1, amd.ArgIsRestrict: 1 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 5, amd.ArgValueType: 0, amd.ArgTypeName: 'int *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0, amd.ArgIsPipe: 1 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_access_qual, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image1d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 1 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image2d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 2 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 4, amd.ArgValueType: 0, amd.ArgTypeName: image3d_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 3 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_half, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: half, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_float, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: float, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_double, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: double, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: char, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_short, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: short, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_long, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: long, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_vec_type_hint_unknown, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.VecTypeHint: unknown, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_reqd_wgs_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.ReqdWorkGroupSize: [ 1, 2, 4 ], amd.VecTypeHint: int, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_wgs_hint_vec_type_hint, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.WorkGroupSizeHint: [ 8, 16, 32 ], amd.VecTypeHint: uint4, amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: int, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_arg_ptr_to_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int **', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
-  ret void
-}
-; CHECK-NEXT:   - { amd.KernelName: test_arg_struct_contains_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: struct B, amd.ArgAddrQual: 0, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_arg_vector_of_ptr, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 16, amd.ArgAlign: 16, amd.ArgKind: 0, amd.ArgValueType: 6, amd.ArgTypeName: 'global int* __attribute__((ext_vector_type(2)))', amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-
-; CHECK-NEXT:   - { amd.KernelName: test_arg_unknown_builtin_type, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 0, amd.ArgTypeName: clk_event_t, amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK-NEXT:   - { amd.KernelName: test_pointee_align, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 9, amd.ArgTypeName: 'long *', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 1, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 2, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char2 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char3 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 4, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char4 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 8, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char8 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgPointeeAlign: 16, amd.ArgKind: 2, amd.ArgValueType: 1, amd.ArgTypeName: 'char16 *', amd.ArgAddrQual: 3, amd.ArgAccQual: 0 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }
-define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
-  ret void
-}
-
-; CHECK-NEXT:...
-
-; PARSER: AMDGPU runtime metadata parser test passes.
-
-; NOTES: Displaying notes found at file offset 0x{{[0-9]+}}
-; NOTES-NEXT: Owner    Data size    Description
-; NOTES-NEXT: AMD      0x00000008   Unknown note type: (0x00000001)
-; NOTES-NEXT: AMD      0x0000001b   Unknown note type: (0x00000003)
-
-; SI:         AMD      0x0000530d   Unknown note type: (0x00000008)
-; VI:         AMD      0x0000530e   Unknown note type: (0x00000008)
-
-!llvm.printf.fmts = !{!100, !101}
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"int"}
-!4 = !{!""}
-!5 = !{i32 undef, i32 1}
-!6 = !{i32 1, i32 2, i32 4}
-!7 = !{<4 x i32> undef, i32 0}
-!8 = !{i32 8, i32 16, i32 32}
-!9 = !{!"char"}
-!10 = !{!"ushort2"}
-!11 = !{!"int3"}
-!12 = !{!"ulong4"}
-!13 = !{!"half8"}
-!14 = !{!"float16"}
-!15 = !{!"double16"}
-!16 = !{!"int *"}
-!17 = !{!"image2d_t"}
-!18 = !{!"sampler_t"}
-!19 = !{!"queue_t"}
-!20 = !{!"struct A"}
-!21 = !{!"i128"}
-!22 = !{i32 0, i32 0, i32 0}
-!23 = !{!"none", !"none", !"none"}
-!24 = !{!"int", !"short2", !"char3"}
-!25 = !{!"", !"", !""}
-!26 = !{half undef, i32 1}
-!27 = !{float undef, i32 1}
-!28 = !{double undef, i32 1}
-!29 = !{i8 undef, i32 1}
-!30 = !{i16 undef, i32 1}
-!31 = !{i64 undef, i32 1}
-!32 = !{i32 *undef, i32 1}
-!50 = !{i32 1, i32 2, i32 3}
-!51 = !{!"int *", !"int *", !"int *"}
-!60 = !{i32 1, i32 1, i32 1}
-!61 = !{!"read_only", !"write_only", !"read_write"}
-!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
-!70 = !{!"volatile", !"const restrict", !"pipe"}
-!80 = !{!"int **"}
-!81 = !{i32 1}
-!82 = !{!"struct B"}
-!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
-!84 = !{!"clk_event_t"}
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-!91 = !{i32 0, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3}
-!92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
-!93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
-!94 = !{!"", !"", !"", !"", !"", !"", !""}
-!100 = !{!"1:1:4:%d\5Cn"}
-!101 = !{!"2:1:8:%g\5Cn"}

Added: llvm/trunk/test/MC/AMDGPU/code-object-metadata-isa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/code-object-metadata-isa.s?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/code-object-metadata-isa.s (added)
+++ llvm/trunk/test/MC/AMDGPU/code-object-metadata-isa.s Wed Mar 22 17:32:22 2017
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
+
+// CHECK:  .amdgpu_code_object_metadata
+// CHECK:    Version: [ 1, 0 ]
+// CHECK:    Isa:
+// CHECK:      WavefrontSize:        64
+// CHECK:      LocalMemorySize:      65536
+// CHECK:      EUsPerCU:             4
+// CHECK:      MaxWavesPerEU:        10
+// CHECK:      MaxFlatWorkGroupSize: 2048
+// GFX700:     SGPRAllocGranule:     8
+// GFX800:     SGPRAllocGranule:     16
+// GFX900:     SGPRAllocGranule:     16
+// GFX700:     TotalNumSGPRs:        512
+// GFX800:     TotalNumSGPRs:        800
+// GFX900:     TotalNumSGPRs:        800
+// GFX700:     AddressableNumSGPRs:  104
+// GFX800:     AddressableNumSGPRs:  96
+// GFX900:     AddressableNumSGPRs:  102
+// CHECK:      VGPRAllocGranule:     4
+// CHECK:      TotalNumVGPRs:        256
+// CHECK:      AddressableNumVGPRs:  256
+// CHECK:    Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+// CHECK:    Kernels:
+// CHECK:      - Name:            test_kernel
+// CHECK:        Language:        OpenCL C
+// CHECK:        LanguageVersion: [ 2, 0 ]
+// CHECK:        Args:
+// CHECK:          - Size:          1
+// CHECK:            Align:         1
+// CHECK:            Kind:          ByValue
+// CHECK:            ValueType:     I8
+// CHECK:            AccQual:       Default
+// CHECK:            TypeName:      char
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenGlobalOffsetX
+// CHECK:            ValueType:     I64
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenGlobalOffsetY
+// CHECK:            ValueType:     I64
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenGlobalOffsetZ
+// CHECK:            ValueType:     I64
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenPrintfBuffer
+// CHECK:            ValueType:     I8
+// CHECK:            AddrSpaceQual: Global
+// CHECK:  .end_amdgpu_code_object_metadata
+.amdgpu_code_object_metadata
+  Version: [ 1, 0 ]
+  Isa:
+    WavefrontSize:        1
+    LocalMemorySize:      1
+    EUsPerCU:             1
+    MaxWavesPerEU:        1
+    MaxFlatWorkGroupSize: 1
+    SGPRAllocGranule:     1
+    TotalNumSGPRs:        1
+    AddressableNumSGPRs:  1
+    VGPRAllocGranule:     1
+    TotalNumVGPRs:        1
+    AddressableNumVGPRs:  1
+  Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+  Kernels:
+    - Name:            test_kernel
+      Language:        OpenCL C
+      LanguageVersion: [ 2, 0 ]
+      Args:
+        - Size:          1
+          Align:         1
+          Kind:          ByValue
+          ValueType:     I8
+          AccQual:       Default
+          TypeName:      char
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetX
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetY
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetZ
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenPrintfBuffer
+          ValueType:     I8
+          AddrSpaceQual: Global
+.end_amdgpu_code_object_metadata

Added: llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-args.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-args.s?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-args.s (added)
+++ llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-args.s Wed Mar 22 17:32:22 2017
@@ -0,0 +1,86 @@
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
+
+// CHECK:  .amdgpu_code_object_metadata
+// CHECK:    Version: [ 1, 0 ]
+// CHECK:    Isa:
+// CHECK:      WavefrontSize:        64
+// CHECK:      LocalMemorySize:      65536
+// CHECK:      EUsPerCU:             4
+// CHECK:      MaxWavesPerEU:        10
+// CHECK:      MaxFlatWorkGroupSize: 2048
+// GFX700:     SGPRAllocGranule:     8
+// GFX800:     SGPRAllocGranule:     16
+// GFX900:     SGPRAllocGranule:     16
+// GFX700:     TotalNumSGPRs:        512
+// GFX800:     TotalNumSGPRs:        800
+// GFX900:     TotalNumSGPRs:        800
+// GFX700:     AddressableNumSGPRs:  104
+// GFX800:     AddressableNumSGPRs:  96
+// GFX900:     AddressableNumSGPRs:  102
+// CHECK:      VGPRAllocGranule:     4
+// CHECK:      TotalNumVGPRs:        256
+// CHECK:      AddressableNumVGPRs:  256
+// CHECK:    Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+// CHECK:    Kernels:
+// CHECK:      - Name:            test_kernel
+// CHECK:        Language:        OpenCL C
+// CHECK:        LanguageVersion: [ 2, 0 ]
+// CHECK:        Args:
+// CHECK:          - Size:          1
+// CHECK:            Align:         1
+// CHECK:            Kind:          ByValue
+// CHECK:            ValueType:     I8
+// CHECK:            AccQual:       Default
+// CHECK:            TypeName:      char
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenGlobalOffsetX
+// CHECK:            ValueType:     I64
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenGlobalOffsetY
+// CHECK:            ValueType:     I64
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenGlobalOffsetZ
+// CHECK:            ValueType:     I64
+// CHECK:          - Size:          8
+// CHECK:            Align:         8
+// CHECK:            Kind:          HiddenPrintfBuffer
+// CHECK:            ValueType:     I8
+// CHECK:            AddrSpaceQual: Global
+// CHECK:  .end_amdgpu_code_object_metadata
+.amdgpu_code_object_metadata
+  Version: [ 1, 0 ]
+  Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+  Kernels:
+    - Name:            test_kernel
+      Language:        OpenCL C
+      LanguageVersion: [ 2, 0 ]
+      Args:
+        - Size:          1
+          Align:         1
+          Kind:          ByValue
+          ValueType:     I8
+          AccQual:       Default
+          TypeName:      char
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetX
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetY
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetZ
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenPrintfBuffer
+          ValueType:     I8
+          AddrSpaceQual: Global
+.end_amdgpu_code_object_metadata

Added: llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s (added)
+++ llvm/trunk/test/MC/AMDGPU/code-object-metadata-kernel-attrs.s Wed Mar 22 17:32:22 2017
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX700 %s
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX800 %s
+// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=CHECK --check-prefix=GFX900 %s
+
+// CHECK:  .amdgpu_code_object_metadata
+// CHECK:    Version: [ 1, 0 ]
+// CHECK:    Isa:
+// CHECK:      WavefrontSize:        64
+// CHECK:      LocalMemorySize:      65536
+// CHECK:      EUsPerCU:             4
+// CHECK:      MaxWavesPerEU:        10
+// CHECK:      MaxFlatWorkGroupSize: 2048
+// GFX700:     SGPRAllocGranule:     8
+// GFX800:     SGPRAllocGranule:     16
+// GFX900:     SGPRAllocGranule:     16
+// GFX700:     TotalNumSGPRs:        512
+// GFX800:     TotalNumSGPRs:        800
+// GFX900:     TotalNumSGPRs:        800
+// GFX700:     AddressableNumSGPRs:  104
+// GFX800:     AddressableNumSGPRs:  96
+// GFX900:     AddressableNumSGPRs:  102
+// CHECK:      VGPRAllocGranule:     4
+// CHECK:      TotalNumVGPRs:        256
+// CHECK:      AddressableNumVGPRs:  256
+// CHECK:    Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+// CHECK:    Kernels:
+// CHECK:      - Name:            test_kernel
+// CHECK:        Language:        OpenCL C
+// CHECK:        LanguageVersion: [ 2, 0 ]
+// CHECK:    Attrs:
+// CHECK:        ReqdWorkGroupSize: [ 1, 2, 4 ]
+// CHECK:        WorkGroupSizeHint: [ 8, 16, 32 ]
+// CHECK:        VecTypeHint:       int
+// CHECK: .end_amdgpu_code_object_metadata
+.amdgpu_code_object_metadata
+  Version: [ 1, 0 ]
+  Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+  Kernels:
+    - Name:            test_kernel
+      Language:        OpenCL C
+      LanguageVersion: [ 2, 0 ]
+      Attrs:
+        ReqdWorkGroupSize: [ 1, 2, 4 ]
+        WorkGroupSizeHint: [ 8, 16, 32 ]
+        VecTypeHint:       int
+.end_amdgpu_code_object_metadata

Added: llvm/trunk/test/MC/AMDGPU/code-object-metadata-unknown-key.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/code-object-metadata-unknown-key.s?rev=298552&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/code-object-metadata-unknown-key.s (added)
+++ llvm/trunk/test/MC/AMDGPU/code-object-metadata-unknown-key.s Wed Mar 22 17:32:22 2017
@@ -0,0 +1,41 @@
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s
+
+// CHECK: error: unknown key 'UnknownKey'
+.amdgpu_code_object_metadata
+  UnknownKey: [ 2, 0 ]
+  Version: [ 1, 0 ]
+  Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
+  Kernels:
+    - Name:            test_kernel
+      Language:        OpenCL C
+      LanguageVersion: [ 2, 0 ]
+      Args:
+        - Size:          1
+          Align:         1
+          Kind:          ByValue
+          ValueType:     I8
+          AccQual:       Default
+          TypeName:      char
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetX
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetY
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenGlobalOffsetZ
+          ValueType:     I64
+        - Size:          8
+          Align:         8
+          Kind:          HiddenPrintfBuffer
+          ValueType:     I8
+          AddrSpaceQual: Global
+.end_amdgpu_code_object_metadata

Modified: llvm/trunk/test/MC/AMDGPU/hsa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa.s?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa.s Wed Mar 22 17:32:22 2017
@@ -37,25 +37,31 @@
 .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 
-.amdgpu_runtime_metadata
-    {
-        amd.MDVersion: [ 2, 0 ]
-        amd.Kernels: [
-            { amd.KernelName: amd_kernel_code_t_test_all },
-            { amd.KernelName: amd_kernel_code_t_minimal }
-        ]
-    }
-.end_amdgpu_runtime_metadata
+.amdgpu_code_object_metadata
+  Version: [ 3, 0 ]
+  Kernels:
+    - Name: amd_kernel_code_t_test_all
+    - Name: amd_kernel_code_t_minimal
+.end_amdgpu_code_object_metadata
 
-// ASM: .amdgpu_runtime_metadata
-// ASM:  {
-// ASM:    amd.MDVersion: [ 2, 0 ]
-// ASM:    amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 },
-// ASM:    amd.Kernels:
-// ASM:      - { amd.KernelName: amd_kernel_code_t_test_all }
-// ASM:      - { amd.KernelName: amd_kernel_code_t_minimal }
-// ASM:  }
-// ASM: .end_amdgpu_runtime_metadata
+// ASM: .amdgpu_code_object_metadata
+// ASM:    Version: [ 3, 0 ]
+// ASM:    Isa:
+// ASM:      WavefrontSize:        64
+// ASM:      LocalMemorySize:      65536
+// ASM:      EUsPerCU:             4
+// ASM:      MaxWavesPerEU:        10
+// ASM:      MaxFlatWorkGroupSize: 2048
+// ASM:      SGPRAllocGranule:     8
+// ASM:      TotalNumSGPRs:        512
+// ASM:      AddressableNumSGPRs:  104
+// ASM:      VGPRAllocGranule:     4
+// ASM:      TotalNumVGPRs:        256
+// ASM:      AddressableNumVGPRs:  256
+// ASM:    Kernels:
+// ASM:      - Name: amd_kernel_code_t_test_all
+// ASM:      - Name: amd_kernel_code_t_minimal
+// ASM: .end_amdgpu_code_object_metadata
 
 .amdgpu_hsa_kernel amd_kernel_code_t_test_all
 .amdgpu_hsa_kernel amd_kernel_code_t_minimal

Removed: llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s (original)
+++ llvm/trunk/test/MC/AMDGPU/runtime-metadata-1.s (removed)
@@ -1,39 +0,0 @@
-// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700
-// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800
-// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900
-
-.amdgpu_runtime_metadata
-  { amd.MDVersion: [ 2, 1 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-    - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-        - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-    - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-        - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-  }
-.end_amdgpu_runtime_metadata
-
-// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-
-// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-
-// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-
-// GCN:      - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-// GCN-NEXT:     - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-// GCN-NEXT:     - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }

Removed: llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s (original)
+++ llvm/trunk/test/MC/AMDGPU/runtime-metadata-2.s (removed)
@@ -1,39 +0,0 @@
-// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX700
-// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX800
-// RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -show-encoding %s | FileCheck %s --check-prefix=GCN --check-prefix=GFX900
-
-.amdgpu_runtime_metadata
-  { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-    - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-        - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-    - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-        - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-        - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-  }
-.end_amdgpu_runtime_metadata
-
-// GFX700: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-
-// GFX800: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 96, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-
-// GFX900: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-
-// GCN:      - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-// GCN-NEXT:     - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } }
-// GCN-NEXT: - { amd.KernelName: test_ushort2, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
-// GCN-NEXT:     - { amd.ArgSize: 4, amd.ArgAlign: 4, amd.ArgKind: 0, amd.ArgValueType: 4, amd.ArgTypeName: ushort2, amd.ArgAccQual: 0 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 }
-// GCN-NEXT:     - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 11, amd.ArgValueType: 1, amd.ArgAddrQual: 1 } } }

Removed: llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s?rev=298551&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s (original)
+++ llvm/trunk/test/MC/AMDGPU/runtime-metadata-invalid-1.s (removed)
@@ -1,106 +0,0 @@
-; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 %s 2>&1 | FileCheck %s
-; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj %s 2>&1 | FileCheck %s
-; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 %s 2>&1 | FileCheck %s
-; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj %s 2>&1 | FileCheck %s
-; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 %s 2>&1 | FileCheck %s
-; RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj %s 2>&1 | FileCheck %s
-
-; CHECK: error: unknown key 'amd.RandomUnknownKey'
-
-	.text
-	.hsa_code_object_version 2,1
-	.hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
-	.amdgpu_runtime_metadata
----
-{ amd.MDVersion: [ 2, 1 ], amd.RandomUnknownKey, amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.Kernels:
-  - { amd.KernelName: test, amd.Language: OpenCL C, amd.LanguageVersion: [ 1, 0 ], amd.Args:
-      - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 1, amd.ArgValueType: 6, amd.ArgTypeName: 'int*', amd.ArgAddrQual: 1, amd.ArgAccQual: 0 }
-      - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
-      - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
-      - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 9, amd.ArgValueType: 9 } } }
-...
-
-	.end_amdgpu_runtime_metadata
-	.globl	test
-	.p2align	8
-	.type	test, at function
-	.amdgpu_hsa_kernel test
-test:                                   ; @test
-	.amd_kernel_code_t
-		amd_code_version_major = 1
-		amd_code_version_minor = 0
-		amd_machine_kind = 1
-		amd_machine_version_major = 8
-		amd_machine_version_minor = 0
-		amd_machine_version_stepping = 3
-		kernel_code_entry_byte_offset = 256
-		kernel_code_prefetch_byte_size = 0
-		max_scratch_backing_memory_byte_size = 0
-		granulated_workitem_vgpr_count = 0
-		granulated_wavefront_sgpr_count = 0
-		priority = 0
-		float_mode = 192
-		priv = 0
-		enable_dx10_clamp = 1
-		debug_mode = 0
-		enable_ieee_mode = 1
-		enable_sgpr_private_segment_wave_byte_offset = 0
-		user_sgpr_count = 6
-		enable_trap_handler = 1
-		enable_sgpr_workgroup_id_x = 1
-		enable_sgpr_workgroup_id_y = 0
-		enable_sgpr_workgroup_id_z = 0
-		enable_sgpr_workgroup_info = 0
-		enable_vgpr_workitem_id = 0
-		enable_exception_msb = 0
-		granulated_lds_size = 0
-		enable_exception = 0
-		enable_sgpr_private_segment_buffer = 1
-		enable_sgpr_dispatch_ptr = 0
-		enable_sgpr_queue_ptr = 0
-		enable_sgpr_kernarg_segment_ptr = 1
-		enable_sgpr_dispatch_id = 0
-		enable_sgpr_flat_scratch_init = 0
-		enable_sgpr_private_segment_size = 0
-		enable_sgpr_grid_workgroup_count_x = 0
-		enable_sgpr_grid_workgroup_count_y = 0
-		enable_sgpr_grid_workgroup_count_z = 0
-		enable_ordered_append_gds = 0
-		private_element_size = 1
-		is_ptr64 = 1
-		is_dynamic_callstack = 0
-		is_debug_enabled = 0
-		is_xnack_enabled = 0
-		workitem_private_segment_byte_size = 0
-		workgroup_group_segment_byte_size = 0
-		gds_segment_byte_size = 0
-		kernarg_segment_byte_size = 8
-		workgroup_fbarrier_count = 0
-		wavefront_sgpr_count = 6
-		workitem_vgpr_count = 3
-		reserved_vgpr_first = 0
-		reserved_vgpr_count = 0
-		reserved_sgpr_first = 0
-		reserved_sgpr_count = 0
-		debug_wavefront_private_segment_offset_sgpr = 0
-		debug_private_segment_buffer_sgpr = 0
-		kernarg_segment_alignment = 4
-		group_segment_alignment = 4
-		private_segment_alignment = 4
-		wavefront_size = 6
-		call_convention = -1
-		runtime_loader_kernel_symbol = 0
-	.end_amd_kernel_code_t
-; BB#0:                                 ; %entry
-	s_load_dwordx2 s[0:1], s[4:5], 0x0
-	v_mov_b32_e32 v2, 0x309
-	s_waitcnt lgkmcnt(0)
-	v_mov_b32_e32 v0, s0
-	v_mov_b32_e32 v1, s1
-	flat_store_dword v[0:1], v2
-	s_endpgm
-.Lfunc_end0:
-	.size	test, .Lfunc_end0-test
-
-	.ident	""
-	.section	".note.GNU-stack"

Modified: llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/ELFDumper.cpp?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/ELFDumper.cpp (original)
+++ llvm/trunk/tools/llvm-readobj/ELFDumper.cpp Wed Mar 22 17:32:22 2017
@@ -129,7 +129,7 @@ public:
   void printMipsReginfo() override;
   void printMipsOptions() override;
 
-  void printAMDGPURuntimeMD() override;
+  void printAMDGPUCodeObjectMetadata() override;
 
   void printStackMap() const override;
 
@@ -2357,7 +2357,7 @@ template <class ELFT> void ELFDumper<ELF
   }
 }
 
-template <class ELFT> void ELFDumper<ELFT>::printAMDGPURuntimeMD() {
+template <class ELFT> void ELFDumper<ELFT>::printAMDGPUCodeObjectMetadata() {
   const Elf_Shdr *Shdr = findSectionByName(*Obj, ".note");
   if (!Shdr) {
     W.startLine() << "There is no .note section in the file.\n";
@@ -2365,7 +2365,7 @@ template <class ELFT> void ELFDumper<ELF
   }
   ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
 
-  const uint32_t RuntimeMDNoteType = 8;
+  const uint32_t RuntimeMDNoteType = 10;
   for (auto I = reinterpret_cast<const Elf_Word *>(&Sec[0]),
        E = I + Sec.size()/4; I != E;) {
     uint32_t NameSZ = I[0];

Modified: llvm/trunk/tools/llvm-readobj/ObjDumper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/ObjDumper.h?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/ObjDumper.h (original)
+++ llvm/trunk/tools/llvm-readobj/ObjDumper.h Wed Mar 22 17:32:22 2017
@@ -59,7 +59,7 @@ public:
   virtual void printMipsOptions() { }
 
   // Only implemented for AMDGPU ELF at this time.
-  virtual void printAMDGPURuntimeMD() {}
+  virtual void printAMDGPUCodeObjectMetadata() {}
 
   // Only implemented for PE/COFF.
   virtual void printCOFFImports() { }

Modified: llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp?rev=298552&r1=298551&r2=298552&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp (original)
+++ llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp Wed Mar 22 17:32:22 2017
@@ -186,9 +186,10 @@ namespace opts {
   cl::opt<bool> MipsOptions("mips-options",
                             cl::desc("Display the MIPS .MIPS.options section"));
 
-  // -amdgpu-runtime-metadata
-  cl::opt<bool> AMDGPURuntimeMD("amdgpu-runtime-metadata",
-                                cl::desc("Display AMDGPU runtime metadata"));
+  // -amdgpu-code-object-metadata
+  cl::opt<bool> AMDGPUCodeObjectMetadata(
+      "amdgpu-code-object-metadata",
+      cl::desc("Display AMDGPU code object metadata"));
 
   // -coff-imports
   cl::opt<bool>
@@ -422,8 +423,8 @@ static void dumpObject(const ObjectFile
         Dumper->printMipsOptions();
     }
     if (Obj->getArch() == llvm::Triple::amdgcn)
-      if (opts::AMDGPURuntimeMD)
-        Dumper->printAMDGPURuntimeMD();
+      if (opts::AMDGPUCodeObjectMetadata)
+        Dumper->printAMDGPUCodeObjectMetadata();
     if (opts::SectionGroups)
       Dumper->printGroupSections();
     if (opts::HashHistogram)




More information about the llvm-commits mailing list