[llvm] r275566 - [AMDGPU] Add metadata for runtime

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 15 07:58:21 PDT 2016


Author: yaxunl
Date: Fri Jul 15 09:58:21 2016
New Revision: 275566

URL: http://llvm.org/viewvc/llvm-project?rev=275566&view=rev
Log:
[AMDGPU] Add metadata for runtime

Added emitting metadata to elf for runtime.

Runtime requires certain information (metadata) about kernels to be able to execute and query them. Such information is emitted to an elf section as a key-value pair stream.

Differential Revision: https://reviews.llvm.org/D21849

Added:
    llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
    llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=275566&r1=275565&r2=275566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Fri Jul 15 09:58:21 2016
@@ -39,7 +39,9 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "AMDGPURuntimeMetadata.h"
 
+using namespace ::AMDGPU;
 using namespace llvm;
 
 // TODO: This should get the default rounding mode from the kernel. We just set
@@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
   TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
                                     "AMD", "AMDGPU");
+  emitStartOfRuntimeMetadata(M);
 }
 
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
@@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
     }
   }
 
+  emitRuntimeMetadata(*MF.getFunction());
+
   return false;
 }
 
@@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(c
                    *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
   return false;
 }
+
+// Emit a key and an integer value for runtime metadata.
+static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
+                                  RuntimeMD::Key K, uint64_t V,
+                                  unsigned Size) {
+  Streamer->EmitIntValue(K, 1);
+  Streamer->EmitIntValue(V, Size);
+}
+
+// Emit a key and a string value for runtime metadata.
+static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
+                                     RuntimeMD::Key K, StringRef S) {
+  Streamer->EmitIntValue(K, 1);
+  Streamer->EmitIntValue(S.size(), 4);
+  Streamer->EmitBytes(S);
+}
+
+// Emit a key and three integer values for runtime metadata.
+// The three integer values are obtained from MDNode \p Node;
+static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
+                                        RuntimeMD::Key K, MDNode *Node,
+                                        unsigned Size) {
+  Streamer->EmitIntValue(K, 1);
+  Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
+    Node->getOperand(0))->getZExtValue(), Size);
+  Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
+    Node->getOperand(1))->getZExtValue(), Size);
+  Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
+    Node->getOperand(2))->getZExtValue(), Size);
+}
+
+void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
+  OutStreamer->SwitchSection(getObjFileLowering().getContext()
+    .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
+
+  emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
+                        RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
+  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
+                          RuntimeMD::OpenCL_C, 1);
+    auto Node = MD->getOperand(0);
+    unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
+                             ->getZExtValue();
+    unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
+                             ->getZExtValue();
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
+                          Major * 100 + Minor * 10, 2);
+  }
+}
+
+static Twine getOCLTypeName(Type *Ty, bool isSigned) {
+  if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
+    Type* EleTy = VecTy->getElementType();
+    unsigned Size = VecTy->getVectorNumElements();
+    return getOCLTypeName(EleTy, isSigned) + Twine(Size);
+  }
+  switch (Ty->getTypeID()) {
+  case Type::HalfTyID:   return "half";
+  case Type::FloatTyID:  return "float";
+  case Type::DoubleTyID: return "double";
+  case Type::IntegerTyID: {
+    if (!isSigned)
+      return Twine('u') + getOCLTypeName(Ty, true);
+    auto IntTy = cast<IntegerType>(Ty);
+    auto BW = IntTy->getIntegerBitWidth();
+    switch (BW) {
+    case 8:
+      return "char";
+    case 16:
+      return "short";
+    case 32:
+      return "int";
+    case 64:
+      return "long";
+    default:
+      return Twine("i") + Twine(BW);
+    }
+  }
+  default:
+    llvm_unreachable("invalid type");
+  }
+}
+
+static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
+         Type *Ty, StringRef TypeName) {
+  if (auto VT = dyn_cast<VectorType>(Ty))
+    return getRuntimeMDValueType(VT->getElementType(), TypeName);
+  else if (auto PT = dyn_cast<PointerType>(Ty))
+    return getRuntimeMDValueType(PT->getElementType(), TypeName);
+  else if (Ty->isHalfTy())
+    return RuntimeMD::KernelArg::F16;
+  else if (Ty->isFloatTy())
+    return RuntimeMD::KernelArg::F32;
+  else if (Ty->isDoubleTy())
+    return RuntimeMD::KernelArg::F64;
+  else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
+    bool Signed = !TypeName.startswith("u");
+    switch (intTy->getIntegerBitWidth()) {
+    case 8:
+      return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
+    case 16:
+      return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
+    case 32:
+      return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
+    case 64:
+      return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
+    default:
+      // Runtime does not recognize other integer types. Report as
+      // struct type.
+      return RuntimeMD::KernelArg::Struct;
+    }
+  } else
+    return RuntimeMD::KernelArg::Struct;
+}
+
+void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
+  if (!F.getMetadata("kernel_arg_type"))
+    return;
+
+  MCContext &Context = getObjFileLowering().getContext();
+  OutStreamer->SwitchSection(
+      Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
+  OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
+  emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
+
+  for (auto &Arg:F.args()) {
+    // Emit KeyArgBegin.
+    unsigned I = Arg.getArgNo();
+    OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
+
+    // Emit KeyArgSize and KeyArgAlign.
+    auto T = Arg.getType();
+    auto DL = F.getParent()->getDataLayout();
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
+                          DL.getTypeAllocSize(T), 4);
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
+                          DL.getABITypeAlignment(T), 4);
+
+    // Emit KeyArgTypeName.
+    auto TypeName = dyn_cast<MDString>(F.getMetadata(
+      "kernel_arg_type")->getOperand(I))->getString();
+    emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
+
+    // Emit KeyArgName.
+    if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
+      auto ArgName = cast<MDString>(ArgNameMD->getOperand(
+        I))->getString();
+      emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
+    }
+
+    // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
+    auto TypeQual = cast<MDString>(F.getMetadata(
+      "kernel_arg_type_qual")->getOperand(I))->getString();
+    SmallVector<StringRef, 1> SplitQ;
+    TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
+    for (auto &I:SplitQ) {
+      auto Key = StringSwitch<RuntimeMD::Key>(I)
+        .Case("volatile", RuntimeMD::KeyArgIsVolatile)
+        .Case("restrict", RuntimeMD::KeyArgIsRestrict)
+        .Case("const",    RuntimeMD::KeyArgIsConst)
+        .Case("pipe",     RuntimeMD::KeyArgIsPipe)
+        .Default(RuntimeMD::KeyNull);
+      OutStreamer->EmitIntValue(Key, 1);
+    }
+
+    // Emit KeyArgTypeKind.
+    auto BaseTypeName = cast<MDString>(
+      F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
+    auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
+      .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
+      .Case("queue_t",   RuntimeMD::KernelArg::Queue)
+      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
+             "image2d_t" , "image2d_array_t",  RuntimeMD::KernelArg::Image)
+      .Cases("image2d_depth_t", "image2d_array_depth_t",
+             "image2d_msaa_t", "image2d_array_msaa_t",
+             "image2d_msaa_depth_t",  RuntimeMD::KernelArg::Image)
+      .Cases("image2d_array_msaa_depth_t", "image3d_t",
+             RuntimeMD::KernelArg::Image)
+      .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
+               RuntimeMD::KernelArg::Value);
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
+
+    // Emit KeyArgValueType.
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
+                          getRuntimeMDValueType(T, BaseTypeName), 2);
+
+    // Emit KeyArgAccQual.
+    auto AccQual = cast<MDString>(F.getMetadata(
+      "kernel_arg_access_qual")->getOperand(I))->getString();
+    auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
+      .Case("read_only",  RuntimeMD::KernelArg::ReadOnly)
+      .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
+      .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
+      .Default(RuntimeMD::KernelArg::None);
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
+                          AQ, 1);
+
+    // Emit KeyArgAddrQual.
+    if (isa<PointerType>(T))
+      emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
+                            T->getPointerAddressSpace(), 1);
+
+    // Emit KeyArgEnd
+    OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
+  }
+
+  // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
+  if (auto RWGS = F.getMetadata("reqd_work_group_size"))
+    emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
+                                RWGS, 4);
+  if (auto WGSH = F.getMetadata("work_group_size_hint"))
+    emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
+                                WGSH, 4);
+  if (auto VTH = F.getMetadata("vec_type_hint")) {
+    auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
+      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
+      VTH->getOperand(1))->getZExtValue()).str();
+    emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
+                             TypeName);
+  }
+
+  // Emit KeyKernelEnd
+  OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
+}

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=275566&r1=275565&r2=275566&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Fri Jul 15 09:58:21 2016
@@ -127,6 +127,10 @@ public:
                        unsigned AsmVariant, const char *ExtraCode,
                        raw_ostream &O) override;
 
+  void emitStartOfRuntimeMetadata(const Module &M);
+
+  void emitRuntimeMetadata(const Function &F);
+
 protected:
   std::vector<std::string> DisasmLines, HexLines;
   size_t DisasmLineMaxLen;

Added: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=275566&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (added)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Fri Jul 15 09:58:21 2016
@@ -0,0 +1,138 @@
+//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Enums and structure types used by runtime metadata.
+///
+/// Runtime requests certain information (metadata) about kernels to be able
+/// to execute the kernels and answer the queries about the kernels.
+/// The metadata is represented as a byte stream in an ELF section of a
+/// binary (code object). The byte stream consists of key-value pairs.
+/// Each key is an 8 bit unsigned integer. Each value can be an integer,
+/// a string, or a stream of key-value pairs. There are 3 levels of key-value
+/// pair streams. At the beginning of the ELF section is the top level
+/// key-value pair stream. A kernel-level key-value pair stream starts after
+/// encountering KeyKernelBegin and ends immediately before encountering
+/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
+/// after encountering KeyArgBegin and ends immediately before encountering
+/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
+/// level key-value pair stream. A kernel-argument-level key-value pair stream
+/// can only appear in a kernel-level key-value pair stream.
+///
+/// The format should be kept backward compatible. New enum values and bit
+/// fields should be appended at the end. It is suggested to bump up the
+/// revision number whenever the format changes and document the change
+/// in the revision in this header.
+///
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
+
+#include <stdint.h>
+
+namespace AMDGPU {
+
+namespace RuntimeMD {
+
+  // Version and revision of runtime metadata
+  const unsigned char MDVersion   = 1;
+  const unsigned char MDRevision  = 0;
+
+  // ELF section name containing runtime metadata
+  const char SectionName[] = ".AMDGPU.runtime_metadata";
+
+  // Enumeration values of keys in runtime metadata.
+  enum Key {
+    KeyNull                     = 0, // Place holder. Ignored when encountered
+    KeyMDVersion                = 1, // Runtime metadata version
+    KeyLanguage                 = 2, // Language
+    KeyLanguageVersion          = 3, // Language version
+    KeyKernelBegin              = 4, // Beginning of kernel-level stream
+    KeyKernelEnd                = 5, // End of kernel-level stream
+    KeyKernelName               = 6, // Kernel name
+    KeyArgBegin                 = 7, // Beginning of kernel-arg-level stream
+    KeyArgEnd                   = 8, // End of kernel-arg-level stream
+    KeyArgSize                  = 9, // Kernel arg size
+    KeyArgAlign                 = 10, // Kernel arg alignment
+    KeyArgTypeName              = 11, // Kernel type name
+    KeyArgName                  = 12, // Kernel name
+    KeyArgTypeKind              = 13, // Kernel argument type kind
+    KeyArgValueType             = 14, // Kernel argument value type
+    KeyArgAddrQual              = 15, // Kernel argument address qualifier
+    KeyArgAccQual               = 16, // Kernel argument access qualifier
+    KeyArgIsConst               = 17, // Kernel argument is const qualified
+    KeyArgIsRestrict            = 18, // Kernel argument is restrict qualified
+    KeyArgIsVolatile            = 19, // Kernel argument is volatile qualified
+    KeyArgIsPipe                = 20, // Kernel argument is pipe qualified
+    KeyReqdWorkGroupSize        = 21, // Required work group size
+    KeyWorkGroupSizeHint        = 22, // Work group size hint
+    KeyVecTypeHint              = 23, // Vector type hint
+    KeyKernelIndex              = 24, // Kernel index for device enqueue
+    KeySGPRs                    = 25, // Number of SGPRs
+    KeyVGPRs                    = 26, // Number of VGPRs
+    KeyMinWavesPerSIMD          = 27, // Minimum number of waves per SIMD
+    KeyMaxWavesPerSIMD          = 28, // Maximum number of waves per SIMD
+    KeyFlatWorkGroupSizeLimits  = 29, // Flat work group size limits
+    KeyMaxWorkGroupSize         = 30, // Maximum work group size
+    KeyNoPartialWorkGroups      = 31, // No partial work groups
+  };
+
+  enum Language : uint8_t {
+    OpenCL_C      = 0,
+    HCC           = 1,
+    OpenMP        = 2,
+    OpenCL_CPP    = 3,
+};
+
+  enum LanguageVersion : uint16_t {
+    V100          = 100,
+    V110          = 110,
+    V120          = 120,
+    V200          = 200,
+    V210          = 210,
+  };
+
+  namespace KernelArg {
+    enum TypeKind : uint8_t {
+      Value     = 0,
+      Pointer   = 1,
+      Image     = 2,
+      Sampler   = 3,
+      Queue     = 4,
+    };
+
+    enum ValueType : uint16_t {
+      Struct  = 0,
+      I8      = 1,
+      U8      = 2,
+      I16     = 3,
+      U16     = 4,
+      F16     = 5,
+      I32     = 6,
+      U32     = 7,
+      F32     = 8,
+      I64     = 9,
+      U64     = 10,
+      F64     = 11,
+    };
+
+    enum AccessQualifer : uint8_t {
+      None       = 0,
+      ReadOnly   = 1,
+      WriteOnly  = 2,
+      ReadWrite  = 3,
+    };
+  } // namespace KernelArg
+} // namespace RuntimeMD
+} // namespace AMDGPU
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H

Added: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=275566&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Fri Jul 15 09:58:21 2016
@@ -0,0 +1,848 @@
+; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+
+%struct.A = type { i8, float }
+%opencl.image1d_t = type opaque
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+%opencl.queue_t = type opaque
+%opencl.pipe_t = type opaque
+%struct.B = type { i32 addrspace(1)*}
+%opencl.clk_event_t = type opaque
+
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .short	256
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .short	200
+
+; CHECK-LABEL:{{^}}test_char:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"test_char"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	1
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	1
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .ascii	"char"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_ushort2:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	12
+; CHECK-NEXT: .ascii	"test_ushort2"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"ushort2"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	4
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_int3:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"test_int3"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .ascii	"int3"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_ulong4:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	11
+; CHECK-NEXT: .ascii	"test_ulong4"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	32
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	32
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	6
+; CHECK-NEXT: .ascii	"ulong4"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	10
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_half8:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	10
+; CHECK-NEXT: .ascii	"test_half8"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"half8"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	5
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_float16:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	12
+; CHECK-NEXT: .ascii	"test_float16"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	64
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	64
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"float16"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	8
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_double16:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	13
+; CHECK-NEXT: .ascii	"test_double16"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	128
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	128
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .ascii	"double16"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	11
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_pointer:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	12
+; CHECK-NEXT: .ascii	"test_pointer"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_image:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	10
+; CHECK-NEXT: .ascii	"test_image"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"image2d_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_sampler:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	12
+; CHECK-NEXT: .ascii	"test_sampler"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"sampler_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_queue:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	10
+; CHECK-NEXT: .ascii	"test_queue"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	7
+; CHECK-NEXT: .ascii	"queue_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_struct:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	11
+; CHECK-NEXT: .ascii	"test_struct"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .ascii	"struct A"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_i128:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"test_i128"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .ascii	"i128"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_multi_arg:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	14
+; CHECK-NEXT: .ascii	"test_multi_arg"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	3
+; CHECK-NEXT: .ascii	"int"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	6
+; CHECK-NEXT: .ascii	"short2"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	3
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"char3"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_addr_space:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	15
+; CHECK-NEXT: .ascii	"test_addr_space"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_type_qual:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	14
+; CHECK-NEXT: .ascii	"test_type_qual"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	19
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	17
+; CHECK-NEXT: .byte	18
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	20
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_access_qual:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .ascii	"test_access_qual"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"image1d_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"image2d_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	9
+; CHECK-NEXT: .ascii	"image3d_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	27
+; CHECK-NEXT: .ascii	"test_reqd_wgs_vec_type_hint"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	3
+; CHECK-NEXT: .ascii	"int"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	21
+; CHECK-NEXT: .long	1
+; CHECK-NEXT: .long	2
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	23
+; CHECK-NEXT: .long	3
+; CHECK-NEXT: .ascii	"int"
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	27
+; CHECK-NEXT: .ascii	"test_wgs_hint_vec_type_hint"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	3
+; CHECK-NEXT: .ascii	"int"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	22
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .long	32
+; CHECK-NEXT: .byte	23
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"uint4"
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	19
+; CHECK-NEXT: .ascii	"test_arg_ptr_to_ptr"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	6
+; CHECK-NEXT: .ascii	"int **"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	28
+; CHECK-NEXT: .ascii	"test_arg_struct_contains_ptr"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .ascii	"struct B"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	22
+; CHECK-NEXT: .ascii	"test_arg_vector_of_ptr"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	16
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	47
+; CHECK-NEXT: .ascii	"global int* __attribute__((ext_vector_type(2)))"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
+; CHECK: .section        .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
+; CHECK-NEXT: .long	29
+; CHECK-NEXT: .ascii	"test_arg_unknown_builtin_type"
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	11
+; CHECK-NEXT: .ascii	"clk_event_t"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	0
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
+define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"int"}
+!4 = !{!""}
+!5 = !{i32 undef, i32 1}
+!6 = !{i32 1, i32 2, i32 4}
+!7 = !{<4 x i32> undef, i32 0}
+!8 = !{i32 8, i32 16, i32 32}
+!9 = !{!"char"}
+!10 = !{!"ushort2"}
+!11 = !{!"int3"}
+!12 = !{!"ulong4"}
+!13 = !{!"half8"}
+!14 = !{!"float16"}
+!15 = !{!"double16"}
+!16 = !{!"int *"}
+!17 = !{!"image2d_t"}
+!18 = !{!"sampler_t"}
+!19 = !{!"queue_t"}
+!20 = !{!"struct A"}
+!21 = !{!"i128"}
+!22 = !{i32 0, i32 0, i32 0}
+!23 = !{!"none", !"none", !"none"}
+!24 = !{!"int", !"short2", !"char3"}
+!25 = !{!"", !"", !""}
+!50 = !{i32 1, i32 2, i32 3}
+!51 = !{!"int *", !"int *", !"int *"}
+!60 = !{i32 1, i32 1, i32 1}
+!61 = !{!"read_only", !"write_only", !"read_write"}
+!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
+!70 = !{!"volatile", !"const restrict", !"pipe"}
+!80 = !{!"int **"}
+!81 = !{i32 1}
+!82 = !{!"struct B"}
+!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
+!84 = !{!"clk_event_t"}
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}




More information about the llvm-commits mailing list