[llvm] r280829 - AMDGPU: Add hidden kernel arguments to runtime metadata

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 7 10:44:01 PDT 2016


Author: yaxunl
Date: Wed Sep  7 12:44:00 2016
New Revision: 280829

URL: http://llvm.org/viewvc/llvm-project?rev=280829&view=rev
Log:
AMDGPU: Add hidden kernel arguments to runtime metadata

OpenCL kernels have hidden kernel arguments for global offset and printf buffer. For consistency, these hidden argument should be included in the runtime metadata. Also updated kernel argument kind metadata.

Differential Revision: https://reviews.llvm.org/D23424

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
    llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=280829&r1=280828&r2=280829&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Sep  7 12:44:00 2016
@@ -824,6 +824,15 @@ void AMDGPUAsmPrinter::emitStartOfRuntim
       }
     }
   }
+
+  if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
+    for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
+      auto Node = MD->getOperand(I);
+      if (Node->getNumOperands() > 0)
+        emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyPrintfInfo,
+            cast<MDString>(Node->getOperand(0))->getString());
+    }
+  }
 }
 
 static std::string getOCLTypeName(Type *Ty, bool Signed) {
@@ -896,6 +905,93 @@ static RuntimeMD::KernelArg::ValueType g
   }
 }
 
+static RuntimeMD::KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
+    AMDGPUAS::AddressSpaces A) {
+  switch (A) {
+  case AMDGPUAS::GLOBAL_ADDRESS:
+    return RuntimeMD::KernelArg::Global;
+  case AMDGPUAS::CONSTANT_ADDRESS:
+    return RuntimeMD::KernelArg::Constant;
+  case AMDGPUAS::LOCAL_ADDRESS:
+    return RuntimeMD::KernelArg::Local;
+  case AMDGPUAS::FLAT_ADDRESS:
+    return RuntimeMD::KernelArg::Generic;
+  case AMDGPUAS::REGION_ADDRESS:
+    return RuntimeMD::KernelArg::Region;
+  default:
+    return RuntimeMD::KernelArg::Private;
+  }
+}
+
+static void emitRuntimeMetadataForKernelArg(const DataLayout &DL,
+    MCStreamer &OutStreamer, Type *T,
+    RuntimeMD::KernelArg::Kind Kind,
+    StringRef BaseTypeName = "", StringRef TypeName = "",
+    StringRef ArgName = "", StringRef TypeQual = "", StringRef AccQual = "") {
+  // Emit KeyArgBegin.
+  OutStreamer.EmitIntValue(RuntimeMD::KeyArgBegin, 1);
+
+  // Emit KeyArgSize and KeyArgAlign.
+  emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
+                        DL.getTypeAllocSize(T), 4);
+  emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
+                        DL.getABITypeAlignment(T), 4);
+  if (auto PT = dyn_cast<PointerType>(T)) {
+    auto ET = PT->getElementType();
+    if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
+      emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgPointeeAlign,
+                        DL.getABITypeAlignment(ET), 4);
+  }
+
+  // Emit KeyArgTypeName.
+  if (!TypeName.empty())
+    emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
+
+  // Emit KeyArgName.
+  if (!ArgName.empty())
+    emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
+
+  // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
+  SmallVector<StringRef, 1> SplitQ;
+  TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
+
+  for (StringRef KeyName : SplitQ) {
+    auto Key = StringSwitch<RuntimeMD::Key>(KeyName)
+      .Case("volatile", RuntimeMD::KeyArgIsVolatile)
+      .Case("restrict", RuntimeMD::KeyArgIsRestrict)
+      .Case("const",    RuntimeMD::KeyArgIsConst)
+      .Case("pipe",     RuntimeMD::KeyArgIsPipe)
+      .Default(RuntimeMD::KeyNull);
+    OutStreamer.EmitIntValue(Key, 1);
+  }
+
+  // Emit KeyArgKind.
+  emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgKind, Kind, 1);
+
+  // Emit KeyArgValueType.
+  emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
+                        getRuntimeMDValueType(T, BaseTypeName), 2);
+
+  // Emit KeyArgAccQual.
+  if (!AccQual.empty()) {
+    auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
+      .Case("read_only",  RuntimeMD::KernelArg::ReadOnly)
+      .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
+      .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
+      .Default(RuntimeMD::KernelArg::None);
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1);
+  }
+
+  // Emit KeyArgAddrQual.
+  if (auto *PT = dyn_cast<PointerType>(T))
+    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
+        getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
+            PT->getAddressSpace())), 1);
+
+  // Emit KeyArgEnd
+  OutStreamer.EmitIntValue(RuntimeMD::KeyArgEnd, 1);
+}
+
 void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
   if (!F.getMetadata("kernel_arg_type"))
     return;
@@ -906,56 +1002,25 @@ void AMDGPUAsmPrinter::emitRuntimeMetada
   OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
   emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyKernelName, F.getName());
 
+  const DataLayout &DL = F.getParent()->getDataLayout();
   for (auto &Arg : F.args()) {
-    // Emit KeyArgBegin.
     unsigned I = Arg.getArgNo();
-    OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
-
-    // Emit KeyArgSize, KeyArgAlign and KeyArgPointeeAlign.
     Type *T = Arg.getType();
-    const DataLayout &DL = F.getParent()->getDataLayout();
-    emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgSize,
-                          DL.getTypeAllocSize(T), 4);
-    emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAlign,
-                          DL.getABITypeAlignment(T), 4);
-    if (auto PT = dyn_cast<PointerType>(T)) {
-      auto ET = PT->getElementType();
-      if (ET->isSized())
-        emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgPointeeAlign,
-                          DL.getABITypeAlignment(ET), 4);
-    }
-
-    // Emit KeyArgTypeName.
     auto TypeName = dyn_cast<MDString>(F.getMetadata(
-      "kernel_arg_type")->getOperand(I))->getString();
-    emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
-
-    // Emit KeyArgName.
-    if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
-      auto ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
-      emitRuntimeMDStringValue(*OutStreamer, RuntimeMD::KeyArgName, ArgName);
-    }
-
-    // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
+        "kernel_arg_type")->getOperand(I))->getString();
+    auto BaseTypeName = cast<MDString>(F.getMetadata(
+        "kernel_arg_base_type")->getOperand(I))->getString();
+    StringRef ArgName;
+    if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
+      ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
     auto TypeQual = cast<MDString>(F.getMetadata(
-      "kernel_arg_type_qual")->getOperand(I))->getString();
-    SmallVector<StringRef, 1> SplitQ;
-    TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
-
-    for (StringRef KeyName : SplitQ) {
-      auto Key = StringSwitch<RuntimeMD::Key>(KeyName)
-        .Case("volatile", RuntimeMD::KeyArgIsVolatile)
-        .Case("restrict", RuntimeMD::KeyArgIsRestrict)
-        .Case("const",    RuntimeMD::KeyArgIsConst)
-        .Case("pipe",     RuntimeMD::KeyArgIsPipe)
-        .Default(RuntimeMD::KeyNull);
-      OutStreamer->EmitIntValue(Key, 1);
-    }
-
-    // Emit KeyArgTypeKind.
-    auto BaseTypeName = cast<MDString>(
-      F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
-    auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
+        "kernel_arg_type_qual")->getOperand(I))->getString();
+    auto AccQual = cast<MDString>(F.getMetadata(
+        "kernel_arg_access_qual")->getOperand(I))->getString();
+    RuntimeMD::KernelArg::Kind Kind;
+    if (TypeQual.find("pipe") != StringRef::npos)
+      Kind = RuntimeMD::KernelArg::Pipe;
+    else Kind = StringSwitch<RuntimeMD::KernelArg::Kind>(BaseTypeName)
       .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
       .Case("queue_t",   RuntimeMD::KernelArg::Queue)
       .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
@@ -965,32 +1030,30 @@ void AMDGPUAsmPrinter::emitRuntimeMetada
              "image2d_msaa_depth_t",  RuntimeMD::KernelArg::Image)
       .Cases("image2d_array_msaa_depth_t", "image3d_t",
              RuntimeMD::KernelArg::Image)
-      .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
-               RuntimeMD::KernelArg::Value);
-    emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
-
-    // Emit KeyArgValueType.
-    emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgValueType,
-                          getRuntimeMDValueType(T, BaseTypeName), 2);
-
-    // Emit KeyArgAccQual.
-    auto AccQual = cast<MDString>(F.getMetadata(
-      "kernel_arg_access_qual")->getOperand(I))->getString();
-    auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
-      .Case("read_only",  RuntimeMD::KernelArg::ReadOnly)
-      .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
-      .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
-      .Default(RuntimeMD::KernelArg::None);
-    emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAccQual, AQ, 1);
-
-    // Emit KeyArgAddrQual.
-    if (auto *PT = dyn_cast<PointerType>(T)) {
-      emitRuntimeMDIntValue(*OutStreamer, RuntimeMD::KeyArgAddrQual,
-                            PT->getAddressSpace(), 1);
+      .Default(isa<PointerType>(T) ?
+                   (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
+                   RuntimeMD::KernelArg::DynamicSharedPointer :
+                   RuntimeMD::KernelArg::GlobalBuffer) :
+                   RuntimeMD::KernelArg::ByValue);
+    emitRuntimeMetadataForKernelArg(DL, *OutStreamer, T,
+        Kind, BaseTypeName, TypeName, ArgName, TypeQual, AccQual);
+  }
+
+  // Emit hidden kernel arguments for OpenCL kernels.
+  if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
+    auto Int64T = Type::getInt64Ty(F.getContext());
+    emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T,
+                                    RuntimeMD::KernelArg::HiddenGlobalOffsetX);
+    emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T,
+                                    RuntimeMD::KernelArg::HiddenGlobalOffsetY);
+    emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int64T,
+                                    RuntimeMD::KernelArg::HiddenGlobalOffsetZ);
+    if (auto MD = F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
+      auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
+          RuntimeMD::KernelArg::Global);
+      emitRuntimeMetadataForKernelArg(DL, *OutStreamer, Int8PtrT,
+                                      RuntimeMD::KernelArg::HiddenPrintfBuffer);
     }
-
-    // Emit KeyArgEnd
-    OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
   }
 
   // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=280829&r1=280828&r2=280829&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Wed Sep  7 12:44:00 2016
@@ -65,7 +65,7 @@ namespace RuntimeMD {
     KeyArgAlign                 = 10, // Kernel arg alignment
     KeyArgTypeName              = 11, // Kernel type name
     KeyArgName                  = 12, // Kernel name
-    KeyArgTypeKind              = 13, // Kernel argument type kind
+    KeyArgKind                  = 13, // Kernel argument kind
     KeyArgValueType             = 14, // Kernel argument value type
     KeyArgAddrQual              = 15, // Kernel argument address qualifier
     KeyArgAccQual               = 16, // Kernel argument access qualifier
@@ -77,13 +77,13 @@ namespace RuntimeMD {
     KeyWorkGroupSizeHint        = 22, // Work group size hint
     KeyVecTypeHint              = 23, // Vector type hint
     KeyKernelIndex              = 24, // Kernel index for device enqueue
-    KeySGPRs                    = 25, // Number of SGPRs
-    KeyVGPRs                    = 26, // Number of VGPRs
-    KeyMinWavesPerSIMD          = 27, // Minimum number of waves per SIMD
-    KeyMaxWavesPerSIMD          = 28, // Maximum number of waves per SIMD
-    KeyFlatWorkGroupSizeLimits  = 29, // Flat work group size limits
-    KeyMaxWorkGroupSize         = 30, // Maximum work group size
-    KeyNoPartialWorkGroups      = 31, // No partial work groups
+    KeyMinWavesPerSIMD          = 25, // Minimum number of waves per SIMD
+    KeyMaxWavesPerSIMD          = 26, // Maximum number of waves per SIMD
+    KeyFlatWorkGroupSizeLimits  = 27, // Flat work group size limits
+    KeyMaxWorkGroupSize         = 28, // Maximum work group size
+    KeyNoPartialWorkGroups      = 29, // No partial work groups
+    KeyPrintfInfo               = 30, // Prinf function call information
+    KeyArgActualAcc             = 31, // The actual kernel argument access qualifier
     KeyArgPointeeAlign          = 32, // Alignment of pointee type
   };
 
@@ -103,12 +103,21 @@ namespace RuntimeMD {
   };
 
   namespace KernelArg {
-    enum TypeKind : uint8_t {
-      Value     = 0,
-      Pointer   = 1,
-      Image     = 2,
-      Sampler   = 3,
-      Queue     = 4,
+    enum Kind : uint8_t {
+      ByValue                 = 0,
+      GlobalBuffer            = 1,
+      DynamicSharedPointer    = 2,
+      Sampler                 = 3,
+      Image                   = 4,
+      Pipe                    = 5,
+      Queue                   = 6,
+      HiddenGlobalOffsetX     = 7,
+      HiddenGlobalOffsetY     = 8,
+      HiddenGlobalOffsetZ     = 9,
+      HiddenNone              = 10,
+      HiddenPrintfBuffer      = 11,
+      HiddenDefaultQueue      = 12,
+      HiddenCompletionAction  = 13,
     };
 
     enum ValueType : uint16_t {
@@ -138,6 +147,8 @@ namespace RuntimeMD {
       Global     = 1,
       Constant   = 2,
       Local      = 3,
+      Generic    = 4,
+      Region     = 5,
     };
   } // namespace KernelArg
 } // namespace RuntimeMD

Modified: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=280829&r1=280828&r2=280829&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Wed Sep  7 12:44:00 2016
@@ -16,6 +16,12 @@
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	3
 ; CHECK-NEXT: .short	200
+; CHECK-NEXT: .byte	30
+; CHECK-NEXT: .long	10
+; CHECK-NEXT: .ascii	"1:1:4:%d\\n"
+; CHECK-NEXT: .byte	30
+; CHECK-NEXT: .long	10
+; CHECK-NEXT: .ascii	"2:1:8:%g\\n"
 
 ; CHECK-LABEL:{{^}}test_char:
 ; CHECK: .section        .AMDGPU.runtime_metadata
@@ -38,6 +44,48 @@
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_char(i8 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
@@ -65,6 +113,48 @@ define amdgpu_kernel void @test_char(i8
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_ushort2(<2 x i16> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
@@ -92,6 +182,48 @@ define amdgpu_kernel void @test_ushort2(
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_int3(<3 x i32> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
@@ -119,6 +251,48 @@ define amdgpu_kernel void @test_int3(<3
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_ulong4(<4 x i64> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
@@ -146,6 +320,48 @@ define amdgpu_kernel void @test_ulong4(<
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_half8(<8 x half> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
@@ -173,6 +389,48 @@ define amdgpu_kernel void @test_half8(<8
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_float16(<16 x float> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
@@ -200,8 +458,50 @@ define amdgpu_kernel void @test_float16(
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
-; CHECK-NEXT: .byte	5
-
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	5
+
 define amdgpu_kernel void @test_double16(<16 x double> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
   ret void
 }
@@ -217,8 +517,6 @@ define amdgpu_kernel void @test_double16
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -231,6 +529,48 @@ define amdgpu_kernel void @test_double16
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
@@ -252,7 +592,7 @@ define amdgpu_kernel void @test_pointer(
 ; CHECK-NEXT: .long	9
 ; CHECK-NEXT: .ascii	"image2d_t"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	4
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	0
 ; CHECK-NEXT: .byte	16
@@ -260,6 +600,48 @@ define amdgpu_kernel void @test_pointer(
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
@@ -287,6 +669,48 @@ define amdgpu_kernel void @test_image(%o
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_sampler(i32 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
@@ -308,7 +732,7 @@ define amdgpu_kernel void @test_sampler(
 ; CHECK-NEXT: .long	7
 ; CHECK-NEXT: .ascii	"queue_t"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	4
+; CHECK-NEXT: .byte	6
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	0
 ; CHECK-NEXT: .byte	16
@@ -316,6 +740,48 @@ define amdgpu_kernel void @test_sampler(
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
@@ -333,8 +799,6 @@ define amdgpu_kernel void @test_queue(%o
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .ascii	"struct A"
@@ -347,6 +811,48 @@ define amdgpu_kernel void @test_queue(%o
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_struct(%struct.A* byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
@@ -374,6 +880,48 @@ define amdgpu_kernel void @test_struct(%
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_i128(i128 %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
@@ -431,6 +979,48 @@ define amdgpu_kernel void @test_i128(i12
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
@@ -448,57 +1038,95 @@ define amdgpu_kernel void @test_multi_ar
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	5
+; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	32
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .short	9
 ; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	7
 ; CHECK-NEXT: .byte	9
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	9
 ; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
-; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .short	9
 ; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	7
 ; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
+; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	5
-; CHECK-NEXT: .ascii	"int *"
+; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	15
-; CHECK-NEXT: .byte	3
+; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
@@ -517,8 +1145,6 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -537,8 +1163,6 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"int *"
@@ -563,7 +1187,7 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT: .ascii	"int *"
 ; CHECK-NEXT: .byte	20
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	5
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	0
 ; CHECK-NEXT: .byte	16
@@ -571,6 +1195,48 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
@@ -592,7 +1258,7 @@ define amdgpu_kernel void @test_type_qua
 ; CHECK-NEXT: .long	9
 ; CHECK-NEXT: .ascii	"image1d_t"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	4
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	0
 ; CHECK-NEXT: .byte	16
@@ -609,7 +1275,7 @@ define amdgpu_kernel void @test_type_qua
 ; CHECK-NEXT: .long	9
 ; CHECK-NEXT: .ascii	"image2d_t"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	4
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	0
 ; CHECK-NEXT: .byte	16
@@ -626,7 +1292,7 @@ define amdgpu_kernel void @test_type_qua
 ; CHECK-NEXT: .long	9
 ; CHECK-NEXT: .ascii	"image3d_t"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	2
+; CHECK-NEXT: .byte	4
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	0
 ; CHECK-NEXT: .byte	16
@@ -634,6 +1300,48 @@ define amdgpu_kernel void @test_type_qua
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
@@ -661,6 +1369,48 @@ define amdgpu_kernel void @test_access_q
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .ascii	"half"
@@ -691,6 +1441,48 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"float"
@@ -715,11 +1507,53 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .long	3
 ; CHECK-NEXT: .ascii	"int"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	6
@@ -751,6 +1585,48 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .ascii	"char"
@@ -781,6 +1657,48 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	5
 ; CHECK-NEXT: .ascii	"short"
@@ -811,6 +1729,48 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .ascii	"long"
@@ -841,6 +1801,48 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	23
 ; CHECK-NEXT: .long	7
 ; CHECK-NEXT: .ascii	"unknown"
@@ -871,6 +1873,48 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	21
 ; CHECK-NEXT: .long	1
 ; CHECK-NEXT: .long	2
@@ -892,18 +1936,60 @@ define amdgpu_kernel void @test_reqd_wgs
 ; CHECK-NEXT: .ascii	"test_wgs_hint_vec_type_hint"
 ; CHECK-NEXT: .byte	7
 ; CHECK-NEXT: .byte	9
-; CHECK-NEXT: .long	4
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	4
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .long	3
+; CHECK-NEXT: .ascii	"int"
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	6
+; CHECK-NEXT: .byte	16
+; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
-; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	11
-; CHECK-NEXT: .long	3
-; CHECK-NEXT: .ascii	"int"
+; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .byte	9
 ; CHECK-NEXT: .byte	14
-; CHECK-NEXT: .short	6
-; CHECK-NEXT: .byte	16
-; CHECK-NEXT: .byte	0
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	22
 ; CHECK-NEXT: .long	8
@@ -929,8 +2015,6 @@ define amdgpu_kernel void @test_wgs_hint
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	6
 ; CHECK-NEXT: .ascii	"int **"
@@ -943,6 +2027,48 @@ define amdgpu_kernel void @test_wgs_hint
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 * addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
@@ -960,8 +2086,6 @@ define amdgpu_kernel void @test_arg_ptr_
 ; CHECK-NEXT: .long	4
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	4
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .ascii	"struct B"
@@ -974,6 +2098,48 @@ define amdgpu_kernel void @test_arg_ptr_
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B * byval %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
@@ -1001,6 +2167,48 @@ define amdgpu_kernel void @test_arg_stru
 ; CHECK-NEXT: .byte	16
 ; CHECK-NEXT: .byte	0
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
@@ -1030,8 +2238,51 @@ define amdgpu_kernel void @test_arg_vect
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	1
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
+
 define amdgpu_kernel void @test_arg_unknown_builtin_type(%opencl.clk_event_t addrspace(1)* %a) !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
   ret void
 }
@@ -1047,8 +2298,6 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	10
 ; CHECK-NEXT: .long	8
-; CHECK-NEXT: .byte	32
-; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .byte	11
 ; CHECK-NEXT: .long	6
 ; CHECK-NEXT: .ascii	"long *"
@@ -1072,7 +2321,7 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	6
 ; CHECK-NEXT: .ascii	"char *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	16
@@ -1091,7 +2340,7 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	7
 ; CHECK-NEXT: .ascii	"char2 *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	16
@@ -1110,7 +2359,7 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	7
 ; CHECK-NEXT: .ascii	"char3 *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	16
@@ -1129,7 +2378,7 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	7
 ; CHECK-NEXT: .ascii	"char4 *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	16
@@ -1148,7 +2397,7 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	7
 ; CHECK-NEXT: .ascii	"char8 *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	16
@@ -1167,7 +2416,7 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .long	8
 ; CHECK-NEXT: .ascii	"char16 *"
 ; CHECK-NEXT: .byte	13
-; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	2
 ; CHECK-NEXT: .byte	14
 ; CHECK-NEXT: .short	1
 ; CHECK-NEXT: .byte	16
@@ -1175,12 +2424,56 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT: .byte	15
 ; CHECK-NEXT: .byte	3
 ; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	9
+; CHECK-NEXT: .byte	8
+; CHECK-NEXT: .byte	7
+; CHECK-NEXT: .byte	9
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	10
+; CHECK-NEXT: .long	8
+; CHECK-NEXT: .byte	13
+; CHECK-NEXT: .byte	11
+; CHECK-NEXT: .byte	14
+; CHECK-NEXT: .short	1
+; CHECK-NEXT: .byte	15
+; CHECK-NEXT: .byte	1
+; CHECK-NEXT: .byte	8
 ; CHECK-NEXT: .byte	5
 
 define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, <16 x i8> addrspace(3)* %g) !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
   ret void
 }
 
+!llvm.printf.fmts = !{!100, !101}
+
 !1 = !{i32 0}
 !2 = !{!"none"}
 !3 = !{!"int"}
@@ -1230,3 +2523,5 @@ define amdgpu_kernel void @test_pointee_
 !92 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none"}
 !93 = !{!"long *", !"char *", !"char2 *", !"char3 *", !"char4 *", !"char8 *", !"char16 *"}
 !94 = !{!"", !"", !"", !"", !"", !"", !""}
+!100 = !{!"1:1:4:%d\5Cn"}
+!101 = !{!"2:1:8:%g\5Cn"}




More information about the llvm-commits mailing list