[llvm] r275676 - Re-commit [AMDGPU] Add metadata for runtime
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 15 22:09:21 PDT 2016
Author: yaxunl
Date: Sat Jul 16 00:09:21 2016
New Revision: 275676
URL: http://llvm.org/viewvc/llvm-project?rev=275676&view=rev
Log:
Re-commit [AMDGPU] Add metadata for runtime
Attempting to fix lit test failure on ppc.
Added:
llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=275676&r1=275675&r2=275676&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Sat Jul 16 00:09:21 2016
@@ -39,7 +39,9 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "AMDGPURuntimeMetadata.h"
+using namespace ::AMDGPU;
using namespace llvm;
// TODO: This should get the default rounding mode from the kernel. We just set
@@ -111,6 +113,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
"AMD", "AMDGPU");
+ emitStartOfRuntimeMetadata(M);
}
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
@@ -244,6 +247,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunct
}
}
+ emitRuntimeMetadata(*MF.getFunction());
+
return false;
}
@@ -740,3 +745,227 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(c
*TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
return false;
}
+
+// Emit a key and an integer value for runtime metadata.
+static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
+ RuntimeMD::Key K, uint64_t V,
+ unsigned Size) {
+ Streamer->EmitIntValue(K, 1);
+ Streamer->EmitIntValue(V, Size);
+}
+
+// Emit a key and a string value for runtime metadata.
+static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
+ RuntimeMD::Key K, StringRef S) {
+ Streamer->EmitIntValue(K, 1);
+ Streamer->EmitIntValue(S.size(), 4);
+ Streamer->EmitBytes(S);
+}
+
+// Emit a key and three integer values for runtime metadata.
+// The three integer values are obtained from MDNode \p Node;
+static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
+ RuntimeMD::Key K, MDNode *Node,
+ unsigned Size) {
+ Streamer->EmitIntValue(K, 1);
+ Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
+ Node->getOperand(0))->getZExtValue(), Size);
+ Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
+ Node->getOperand(1))->getZExtValue(), Size);
+ Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
+ Node->getOperand(2))->getZExtValue(), Size);
+}
+
+void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
+ OutStreamer->SwitchSection(getObjFileLowering().getContext()
+ .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
+
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
+ RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
+ if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
+ RuntimeMD::OpenCL_C, 1);
+ auto Node = MD->getOperand(0);
+ unsigned short Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
+ ->getZExtValue();
+ unsigned short Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
+ ->getZExtValue();
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
+ Major * 100 + Minor * 10, 2);
+ }
+}
+
+static std::string getOCLTypeName(Type *Ty, bool isSigned) {
+ if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
+ Type* EleTy = VecTy->getElementType();
+ unsigned Size = VecTy->getVectorNumElements();
+ return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str();
+ }
+ switch (Ty->getTypeID()) {
+ case Type::HalfTyID: return "half";
+ case Type::FloatTyID: return "float";
+ case Type::DoubleTyID: return "double";
+ case Type::IntegerTyID: {
+ if (!isSigned)
+ return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str();
+ auto IntTy = cast<IntegerType>(Ty);
+ auto BW = IntTy->getIntegerBitWidth();
+ switch (BW) {
+ case 8:
+ return "char";
+ case 16:
+ return "short";
+ case 32:
+ return "int";
+ case 64:
+ return "long";
+ default:
+ return (Twine('i') + Twine(BW)).str();
+ }
+ }
+ default:
+ llvm_unreachable("invalid type");
+ }
+}
+
+static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
+ Type *Ty, StringRef TypeName) {
+ if (auto VT = dyn_cast<VectorType>(Ty))
+ return getRuntimeMDValueType(VT->getElementType(), TypeName);
+ else if (auto PT = dyn_cast<PointerType>(Ty))
+ return getRuntimeMDValueType(PT->getElementType(), TypeName);
+ else if (Ty->isHalfTy())
+ return RuntimeMD::KernelArg::F16;
+ else if (Ty->isFloatTy())
+ return RuntimeMD::KernelArg::F32;
+ else if (Ty->isDoubleTy())
+ return RuntimeMD::KernelArg::F64;
+ else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
+ bool Signed = !TypeName.startswith("u");
+ switch (intTy->getIntegerBitWidth()) {
+ case 8:
+ return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
+ case 16:
+ return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
+ case 32:
+ return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
+ case 64:
+ return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
+ default:
+ // Runtime does not recognize other integer types. Report as
+ // struct type.
+ return RuntimeMD::KernelArg::Struct;
+ }
+ } else
+ return RuntimeMD::KernelArg::Struct;
+}
+
+void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
+ if (!F.getMetadata("kernel_arg_type"))
+ return;
+
+ MCContext &Context = getObjFileLowering().getContext();
+ OutStreamer->SwitchSection(
+ Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
+ OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
+ emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
+
+ for (auto &Arg:F.args()) {
+ // Emit KeyArgBegin.
+ unsigned I = Arg.getArgNo();
+ OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
+
+ // Emit KeyArgSize and KeyArgAlign.
+ auto T = Arg.getType();
+ auto DL = F.getParent()->getDataLayout();
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
+ DL.getTypeAllocSize(T), 4);
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
+ DL.getABITypeAlignment(T), 4);
+
+ // Emit KeyArgTypeName.
+ auto TypeName = dyn_cast<MDString>(F.getMetadata(
+ "kernel_arg_type")->getOperand(I))->getString();
+ emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
+
+ // Emit KeyArgName.
+ if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
+ auto ArgName = cast<MDString>(ArgNameMD->getOperand(
+ I))->getString();
+ emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
+ }
+
+ // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
+ auto TypeQual = cast<MDString>(F.getMetadata(
+ "kernel_arg_type_qual")->getOperand(I))->getString();
+ SmallVector<StringRef, 1> SplitQ;
+ TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
+ for (auto &I:SplitQ) {
+ auto Key = StringSwitch<RuntimeMD::Key>(I)
+ .Case("volatile", RuntimeMD::KeyArgIsVolatile)
+ .Case("restrict", RuntimeMD::KeyArgIsRestrict)
+ .Case("const", RuntimeMD::KeyArgIsConst)
+ .Case("pipe", RuntimeMD::KeyArgIsPipe)
+ .Default(RuntimeMD::KeyNull);
+ OutStreamer->EmitIntValue(Key, 1);
+ }
+
+ // Emit KeyArgTypeKind.
+ auto BaseTypeName = cast<MDString>(
+ F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
+ auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
+ .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
+ .Case("queue_t", RuntimeMD::KernelArg::Queue)
+ .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
+ "image2d_t" , "image2d_array_t", RuntimeMD::KernelArg::Image)
+ .Cases("image2d_depth_t", "image2d_array_depth_t",
+ "image2d_msaa_t", "image2d_array_msaa_t",
+ "image2d_msaa_depth_t", RuntimeMD::KernelArg::Image)
+ .Cases("image2d_array_msaa_depth_t", "image3d_t",
+ RuntimeMD::KernelArg::Image)
+ .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
+ RuntimeMD::KernelArg::Value);
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
+
+ // Emit KeyArgValueType.
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
+ getRuntimeMDValueType(T, BaseTypeName), 2);
+
+ // Emit KeyArgAccQual.
+ auto AccQual = cast<MDString>(F.getMetadata(
+ "kernel_arg_access_qual")->getOperand(I))->getString();
+ auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
+ .Case("read_only", RuntimeMD::KernelArg::ReadOnly)
+ .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
+ .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
+ .Default(RuntimeMD::KernelArg::None);
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
+ AQ, 1);
+
+ // Emit KeyArgAddrQual.
+ if (isa<PointerType>(T))
+ emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
+ T->getPointerAddressSpace(), 1);
+
+ // Emit KeyArgEnd
+ OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
+ }
+
+ // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
+ if (auto RWGS = F.getMetadata("reqd_work_group_size"))
+ emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
+ RWGS, 4);
+ if (auto WGSH = F.getMetadata("work_group_size_hint"))
+ emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
+ WGSH, 4);
+ if (auto VTH = F.getMetadata("vec_type_hint")) {
+ auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
+ VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
+ VTH->getOperand(1))->getZExtValue());
+ emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
+ TypeName);
+ }
+
+ // Emit KeyKernelEnd
+ OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h?rev=275676&r1=275675&r2=275676&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.h Sat Jul 16 00:09:21 2016
@@ -127,6 +127,10 @@ public:
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
+ void emitStartOfRuntimeMetadata(const Module &M);
+
+ void emitRuntimeMetadata(const Function &F);
+
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
Added: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=275676&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (added)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Sat Jul 16 00:09:21 2016
@@ -0,0 +1,138 @@
+//===-- AMDGPURuntimeMetadata.h - AMDGPU Runtime Metadata -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Enums and structure types used by runtime metadata.
+///
+/// Runtime requests certain information (metadata) about kernels to be able
+/// to execute the kernels and answer the queries about the kernels.
+/// The metadata is represented as a byte stream in an ELF section of a
+/// binary (code object). The byte stream consists of key-value pairs.
+/// Each key is an 8 bit unsigned integer. Each value can be an integer,
+/// a string, or a stream of key-value pairs. There are 3 levels of key-value
+/// pair streams. At the beginning of the ELF section is the top level
+/// key-value pair stream. A kernel-level key-value pair stream starts after
+/// encountering KeyKernelBegin and ends immediately before encountering
+/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
+/// after encountering KeyArgBegin and ends immediately before encountering
+/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
+/// level key-value pair stream. A kernel-argument-level key-value pair stream
+/// can only appear in a kernel-level key-value pair stream.
+///
+/// The format should be kept backward compatible. New enum values and bit
+/// fields should be appended at the end. It is suggested to bump up the
+/// revision number whenever the format changes and document the change
+/// in the revision in this header.
+///
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
+
+#include <stdint.h>
+
+namespace AMDGPU {
+
+namespace RuntimeMD {
+
+ // Version and revision of runtime metadata
+ const unsigned char MDVersion = 1;
+ const unsigned char MDRevision = 0;
+
+ // ELF section name containing runtime metadata
+ const char SectionName[] = ".AMDGPU.runtime_metadata";
+
+ // Enumeration values of keys in runtime metadata.
+ enum Key {
+ KeyNull = 0, // Place holder. Ignored when encountered
+ KeyMDVersion = 1, // Runtime metadata version
+ KeyLanguage = 2, // Language
+ KeyLanguageVersion = 3, // Language version
+ KeyKernelBegin = 4, // Beginning of kernel-level stream
+ KeyKernelEnd = 5, // End of kernel-level stream
+ KeyKernelName = 6, // Kernel name
+ KeyArgBegin = 7, // Beginning of kernel-arg-level stream
+ KeyArgEnd = 8, // End of kernel-arg-level stream
+ KeyArgSize = 9, // Kernel arg size
+ KeyArgAlign = 10, // Kernel arg alignment
+ KeyArgTypeName = 11, // Kernel type name
+ KeyArgName = 12, // Kernel name
+ KeyArgTypeKind = 13, // Kernel argument type kind
+ KeyArgValueType = 14, // Kernel argument value type
+ KeyArgAddrQual = 15, // Kernel argument address qualifier
+ KeyArgAccQual = 16, // Kernel argument access qualifier
+ KeyArgIsConst = 17, // Kernel argument is const qualified
+ KeyArgIsRestrict = 18, // Kernel argument is restrict qualified
+ KeyArgIsVolatile = 19, // Kernel argument is volatile qualified
+ KeyArgIsPipe = 20, // Kernel argument is pipe qualified
+ KeyReqdWorkGroupSize = 21, // Required work group size
+ KeyWorkGroupSizeHint = 22, // Work group size hint
+ KeyVecTypeHint = 23, // Vector type hint
+ KeyKernelIndex = 24, // Kernel index for device enqueue
+ KeySGPRs = 25, // Number of SGPRs
+ KeyVGPRs = 26, // Number of VGPRs
+ KeyMinWavesPerSIMD = 27, // Minimum number of waves per SIMD
+ KeyMaxWavesPerSIMD = 28, // Maximum number of waves per SIMD
+ KeyFlatWorkGroupSizeLimits = 29, // Flat work group size limits
+ KeyMaxWorkGroupSize = 30, // Maximum work group size
+ KeyNoPartialWorkGroups = 31, // No partial work groups
+ };
+
+ enum Language : uint8_t {
+ OpenCL_C = 0,
+ HCC = 1,
+ OpenMP = 2,
+ OpenCL_CPP = 3,
+};
+
+ enum LanguageVersion : uint16_t {
+ V100 = 100,
+ V110 = 110,
+ V120 = 120,
+ V200 = 200,
+ V210 = 210,
+ };
+
+ namespace KernelArg {
+ enum TypeKind : uint8_t {
+ Value = 0,
+ Pointer = 1,
+ Image = 2,
+ Sampler = 3,
+ Queue = 4,
+ };
+
+ enum ValueType : uint16_t {
+ Struct = 0,
+ I8 = 1,
+ U8 = 2,
+ I16 = 3,
+ U16 = 4,
+ F16 = 5,
+ I32 = 6,
+ U32 = 7,
+ F32 = 8,
+ I64 = 9,
+ U64 = 10,
+ F64 = 11,
+ };
+
+ enum AccessQualifer : uint8_t {
+ None = 0,
+ ReadOnly = 1,
+ WriteOnly = 2,
+ ReadWrite = 3,
+ };
+ } // namespace KernelArg
+} // namespace RuntimeMD
+} // namespace AMDGPU
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
Added: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=275676&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Sat Jul 16 00:09:21 2016
@@ -0,0 +1,848 @@
+; RUN: llc -mtriple=amdgcn--amdhsa < %s | FileCheck %s
+
+%struct.A = type { i8, float }
+%opencl.image1d_t = type opaque
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+%opencl.queue_t = type opaque
+%opencl.pipe_t = type opaque
+%struct.B = type { i32 addrspace(1)*}
+%opencl.clk_event_t = type opaque
+
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .short 256
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 3
+; CHECK-NEXT: .short 200
+
+; CHECK-LABEL:{{^}}test_char:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "test_char"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .ascii "char"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 1
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_ushort2:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 12
+; CHECK-NEXT: .ascii "test_ushort2"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 7
+; CHECK-NEXT: .ascii "ushort2"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 4
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_int3:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "test_int3"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .ascii "int3"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_ulong4:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 11
+; CHECK-NEXT: .ascii "test_ulong4"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 32
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 32
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 6
+; CHECK-NEXT: .ascii "ulong4"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 10
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_half8:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 10
+; CHECK-NEXT: .ascii "test_half8"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "half8"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 5
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_float16:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 12
+; CHECK-NEXT: .ascii "test_float16"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 64
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 64
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 7
+; CHECK-NEXT: .ascii "float16"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 8
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_double16:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 13
+; CHECK-NEXT: .ascii "test_double16"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 128
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 128
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .ascii "double16"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 11
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_pointer:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 12
+; CHECK-NEXT: .ascii "test_pointer"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_image:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 10
+; CHECK-NEXT: .ascii "test_image"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "image2d_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_sampler:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 12
+; CHECK-NEXT: .ascii "test_sampler"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "sampler_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 3
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_queue:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 10
+; CHECK-NEXT: .ascii "test_queue"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 7
+; CHECK-NEXT: .ascii "queue_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_struct:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 11
+; CHECK-NEXT: .ascii "test_struct"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .ascii "struct A"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_i128:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "test_i128"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .ascii "i128"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_multi_arg:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 14
+; CHECK-NEXT: .ascii "test_multi_arg"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .ascii "int"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 6
+; CHECK-NEXT: .ascii "short2"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 3
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "char3"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 1
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_addr_space:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 15
+; CHECK-NEXT: .ascii "test_addr_space"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 3
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(2)* %c, i32 addrspace(3)* %l) !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_type_qual:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 14
+; CHECK-NEXT: .ascii "test_type_qual"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 19
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 17
+; CHECK-NEXT: .byte 18
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "int *"
+; CHECK-NEXT: .byte 20
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_access_qual:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .ascii "test_access_qual"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "image1d_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "image2d_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 9
+; CHECK-NEXT: .ascii "image3d_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 2
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 3
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_reqd_wgs_vec_type_hint:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 27
+; CHECK-NEXT: .ascii "test_reqd_wgs_vec_type_hint"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .ascii "int"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 21
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 23
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .ascii "int"
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_wgs_hint_vec_type_hint:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 27
+; CHECK-NEXT: .ascii "test_wgs_hint_vec_type_hint"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 3
+; CHECK-NEXT: .ascii "int"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 22
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .long 32
+; CHECK-NEXT: .byte 23
+; CHECK-NEXT: .long 5
+; CHECK-NEXT: .ascii "uint4"
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_ptr_to_ptr:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 19
+; CHECK-NEXT: .ascii "test_arg_ptr_to_ptr"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 6
+; CHECK-NEXT: .ascii "int **"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_struct_contains_ptr:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 28
+; CHECK-NEXT: .ascii "test_arg_struct_contains_ptr"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 4
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .ascii "struct B"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_vector_of_ptr:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 22
+; CHECK-NEXT: .ascii "test_arg_vector_of_ptr"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 16
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 47
+; CHECK-NEXT: .ascii "global int* __attribute__((ext_vector_type(2)))"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 6
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+; CHECK-LABEL:{{^}}test_arg_unknown_builtin_type:
+; CHECK: .section .AMDGPU.runtime_metadata
+; CHECK-NEXT: .byte 4
+; CHECK-NEXT: .byte 6
+; CHECK-NEXT: .long 29
+; CHECK-NEXT: .ascii "test_arg_unknown_builtin_type"
+; CHECK-NEXT: .byte 7
+; CHECK-NEXT: .byte 9
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 10
+; CHECK-NEXT: .long 8
+; CHECK-NEXT: .byte 11
+; CHECK-NEXT: .long 11
+; CHECK-NEXT: .ascii "clk_event_t"
+; CHECK-NEXT: .byte 13
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 14
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .byte 16
+; CHECK-NEXT: .byte 0
+; CHECK-NEXT: .byte 15
+; CHECK-NEXT: .byte 1
+; CHECK-NEXT: .byte 8
+; CHECK-NEXT: .byte 5
+
+define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
+ ret void
+}
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"int"}
+!4 = !{!""}
+!5 = !{i32 undef, i32 1}
+!6 = !{i32 1, i32 2, i32 4}
+!7 = !{<4 x i32> undef, i32 0}
+!8 = !{i32 8, i32 16, i32 32}
+!9 = !{!"char"}
+!10 = !{!"ushort2"}
+!11 = !{!"int3"}
+!12 = !{!"ulong4"}
+!13 = !{!"half8"}
+!14 = !{!"float16"}
+!15 = !{!"double16"}
+!16 = !{!"int *"}
+!17 = !{!"image2d_t"}
+!18 = !{!"sampler_t"}
+!19 = !{!"queue_t"}
+!20 = !{!"struct A"}
+!21 = !{!"i128"}
+!22 = !{i32 0, i32 0, i32 0}
+!23 = !{!"none", !"none", !"none"}
+!24 = !{!"int", !"short2", !"char3"}
+!25 = !{!"", !"", !""}
+!50 = !{i32 1, i32 2, i32 3}
+!51 = !{!"int *", !"int *", !"int *"}
+!60 = !{i32 1, i32 1, i32 1}
+!61 = !{!"read_only", !"write_only", !"read_write"}
+!62 = !{!"image1d_t", !"image2d_t", !"image3d_t"}
+!70 = !{!"volatile", !"const restrict", !"pipe"}
+!80 = !{!"int **"}
+!81 = !{i32 1}
+!82 = !{!"struct B"}
+!83 = !{!"global int* __attribute__((ext_vector_type(2)))"}
+!84 = !{!"clk_event_t"}
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
More information about the llvm-commits
mailing list