[llvm] r358992 - [AMDGPU] Fix hidden argument metadata duplication for V3

Scott Linder via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 07:31:17 PDT 2019


Author: scott.linder
Date: Tue Apr 23 07:31:17 2019
New Revision: 358992

URL: http://llvm.org/viewvc/llvm-project?rev=358992&view=rev
Log:
[AMDGPU] Fix hidden argument metadata duplication for V3

Essentially complete a proper rebase of the V3 metadata change over
https://reviews.llvm.org/D49096.

Minimize the diff between the V2 and V3 variants of the relevant lit
tests, and clean up some trailing whitespace.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
Removed:
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp?rev=358992&r1=358991&r2=358992&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp Tue Apr 23 07:31:17 2019
@@ -713,36 +713,6 @@ void MetadataStreamerV3::emitKernelArgs(
 
   emitHiddenKernelArgs(Func, Offset, Args);
 
-  // TODO: What about other languages?
-  if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
-    auto &DL = Func.getParent()->getDataLayout();
-    auto Int64Ty = Type::getInt64Ty(Func.getContext());
-
-    emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
-    emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
-    emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
-
-    auto Int8PtrTy =
-        Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
-
-    // Emit "printf buffer" argument if printf is used, otherwise emit dummy
-    // "none" argument.
-    if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
-      emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
-    else
-      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
-
-    // Emit "default queue" and "completion action" arguments if enqueue kernel
-    // is used, otherwise emit dummy "none" arguments.
-    if (Func.hasFnAttribute("calls-enqueue-kernel")) {
-      emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
-      emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
-    } else {
-      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
-      emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
-    }
-  }
-
   Kern[".args"] = Args;
 }
 

Removed: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll?rev=358991&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueu-kernel-v3.ll (removed)
@@ -1,101 +0,0 @@
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK:              ---
-; CHECK:      amdhsa.kernels:  
-; CHECK:        - .args:           
-; CHECK-NEXT:       - .name:           a
-; CHECK-NEXT:         .offset:         0
-; CHECK-NEXT:         .size:           1
-; CHECK-NEXT:         .type_name:      char
-; CHECK-NEXT:         .value_kind:     by_value
-; CHECK-NEXT:         .value_type:     i8
-; CHECK-NEXT:       - .offset:         8
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
-; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .offset:         16
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
-; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .offset:         24
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
-; CHECK-NEXT:         .value_type:     i64
-; CHECK-NOT:        .value_kind:    hidden_default_queue
-; CHECK-NOT:        .value_kind:    hidden_completion_action
-; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
-; CHECK-NEXT:       - 2
-; CHECK-NEXT:       - 0
-; CHECK:          .name:           test_non_enqueue_kernel_caller
-; CHECK:          .symbol:         test_non_enqueue_kernel_caller.kd
-define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a)
-    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
-    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK:        - .args:           
-; CHECK-NEXT:       - .name:           a
-; CHECK-NEXT:         .offset:         0
-; CHECK-NEXT:         .size:           1
-; CHECK-NEXT:         .type_name:      char
-; CHECK-NEXT:         .value_kind:     by_value
-; CHECK-NEXT:         .value_type:     i8
-; CHECK-NEXT:       - .offset:         8
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
-; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .offset:         16
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
-; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .offset:         24
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
-; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .address_space:  global
-; CHECK-NEXT:         .offset:         32
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_none
-; CHECK-NEXT:         .value_type:     i8
-; CHECK-NEXT:       - .address_space:  global
-; CHECK-NEXT:         .offset:         40
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_default_queue
-; CHECK-NEXT:         .value_type:     i8
-; CHECK-NEXT:       - .address_space:  global
-; CHECK-NEXT:         .offset:         48
-; CHECK-NEXT:         .size:           8
-; CHECK-NEXT:         .value_kind:     hidden_completion_action
-; CHECK-NEXT:         .value_type:     i8
-; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
-; CHECK-NEXT:       - 2
-; CHECK-NEXT:       - 0
-; CHECK:          .name:           test_enqueue_kernel_caller
-; CHECK:          .symbol:         test_enqueue_kernel_caller.kd
-define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0
-    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
-    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
-  ret void
-}
-
-; CHECK:  amdhsa.version:
-; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
-; CHECK-NOT:  amdhsa.printf:
-
-attributes #0 = { "calls-enqueue-kernel" }
-
-!1 = !{i32 0}
-!2 = !{!"none"}
-!3 = !{!"char"}
-!4 = !{!""}
-
-!opencl.ocl.version = !{!90}
-!90 = !{i32 2, i32 0}
-
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS

Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll?rev=358992&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll Tue Apr 23 07:31:17 2019
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK:              ---
+; CHECK:      amdhsa.kernels:
+; CHECK:        - .args:
+; CHECK-NEXT:       - .name:           a
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           1
+; CHECK-NEXT:         .type_name:      char
+; CHECK-NEXT:         .value_kind:     by_value
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NOT:        .value_kind:    hidden_default_queue
+; CHECK-NOT:        .value_kind:    hidden_completion_action
+; CHECK:          .language:       OpenCL C
+; CHECK-NEXT:     .language_version:
+; CHECK-NEXT:       - 2
+; CHECK-NEXT:       - 0
+; CHECK:          .name:           test_non_enqueue_kernel_caller
+; CHECK:          .symbol:         test_non_enqueue_kernel_caller.kd
+define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:        - .args:
+; CHECK-NEXT:       - .name:           a
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           1
+; CHECK-NEXT:         .type_name:      char
+; CHECK-NEXT:         .value_kind:     by_value
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .offset:         32
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_none
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .offset:         40
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_default_queue
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .offset:         48
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_completion_action
+; CHECK-NEXT:         .value_type:     i8
+; CHECK:          .language:       OpenCL C
+; CHECK-NEXT:     .language_version:
+; CHECK-NEXT:       - 2
+; CHECK-NEXT:       - 0
+; CHECK:          .name:           test_enqueue_kernel_caller
+; CHECK:          .symbol:         test_enqueue_kernel_caller.kd
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:  amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+; CHECK-NOT:  amdhsa.printf:
+
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+attributes #1 = { "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll?rev=358992&r1=358991&r2=358992&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll Tue Apr 23 07:31:17 2019
@@ -1,9 +1,9 @@
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=+code-object-v3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
 
 %struct.A = type { i8, float }
 %opencl.image1d_t = type opaque
@@ -17,8 +17,8 @@
 @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
 
 ; CHECK:              ---
-; CHECK-NEXT: amdhsa.kernels:  
-; CHECK-NEXT:   - .args:           
+; CHECK-NEXT: amdhsa.kernels:
+; CHECK-NEXT:   - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           1
@@ -45,18 +45,18 @@
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_char
 ; CHECK:          .symbol:         test_char.kd
-define amdgpu_kernel void @test_char(i8 %a)
+define amdgpu_kernel void @test_char(i8 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -86,18 +86,18 @@ define amdgpu_kernel void @test_char(i8
 ; CHECK-NEXT:         .value_kind:     hidden_none
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_ushort2
 ; CHECK:          .symbol:         test_ushort2.kd
-define amdgpu_kernel void @test_ushort2(<2 x i16> %a)
+define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10
     !kernel_arg_base_type !10 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           16
@@ -122,18 +122,18 @@ define amdgpu_kernel void @test_ushort2(
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_int3
 ; CHECK:          .symbol:         test_int3.kd
-define amdgpu_kernel void @test_int3(<3 x i32> %a)
+define amdgpu_kernel void @test_int3(<3 x i32> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11
     !kernel_arg_base_type !11 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           32
@@ -158,18 +158,18 @@ define amdgpu_kernel void @test_int3(<3
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_ulong4
 ; CHECK:          .symbol:         test_ulong4.kd
-define amdgpu_kernel void @test_ulong4(<4 x i64> %a)
+define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12
     !kernel_arg_base_type !12 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           16
@@ -194,18 +194,18 @@ define amdgpu_kernel void @test_ulong4(<
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_half8
 ; CHECK:          .symbol:         test_half8.kd
-define amdgpu_kernel void @test_half8(<8 x half> %a)
+define amdgpu_kernel void @test_half8(<8 x half> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13
     !kernel_arg_base_type !13 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           64
@@ -230,18 +230,18 @@ define amdgpu_kernel void @test_half8(<8
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_float16
 ; CHECK:          .symbol:         test_float16.kd
-define amdgpu_kernel void @test_float16(<16 x float> %a)
+define amdgpu_kernel void @test_float16(<16 x float> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14
     !kernel_arg_base_type !14 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           128
@@ -266,18 +266,18 @@ define amdgpu_kernel void @test_float16(
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_double16
 ; CHECK:          .symbol:         test_double16.kd
-define amdgpu_kernel void @test_double16(<16 x double> %a)
+define amdgpu_kernel void @test_double16(<16 x double> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15
     !kernel_arg_base_type !15 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -303,18 +303,18 @@ define amdgpu_kernel void @test_double16
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_pointer
 ; CHECK:          .symbol:         test_pointer.kd
-define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a)
+define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16
     !kernel_arg_base_type !16 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -340,18 +340,18 @@ define amdgpu_kernel void @test_pointer(
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_image
 ; CHECK:          .symbol:         test_image.kd
-define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a)
+define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17
     !kernel_arg_base_type !17 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -376,18 +376,18 @@ define amdgpu_kernel void @test_image(%o
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_sampler
 ; CHECK:          .symbol:         test_sampler.kd
-define amdgpu_kernel void @test_sampler(i32 %a)
+define amdgpu_kernel void @test_sampler(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18
     !kernel_arg_base_type !18 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -413,18 +413,18 @@ define amdgpu_kernel void @test_sampler(
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_queue
 ; CHECK:          .symbol:         test_queue.kd
-define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a)
+define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19
     !kernel_arg_base_type !19 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  private
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -450,18 +450,18 @@ define amdgpu_kernel void @test_queue(%o
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_struct
 ; CHECK:          .symbol:         test_struct.kd
-define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a)
+define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20
     !kernel_arg_base_type !20 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           16
@@ -486,18 +486,18 @@ define amdgpu_kernel void @test_struct(%
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_i128
 ; CHECK:          .symbol:         test_i128.kd
-define amdgpu_kernel void @test_i128(i128 %a)
+define amdgpu_kernel void @test_i128(i128 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21
     !kernel_arg_base_type !21 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -534,18 +534,18 @@ define amdgpu_kernel void @test_i128(i12
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_multi_arg
 ; CHECK:          .symbol:         test_multi_arg.kd
-define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c)
+define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
     !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24
     !kernel_arg_base_type !24 !kernel_arg_type_qual !25 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           g
 ; CHECK-NEXT:         .offset:         0
@@ -586,20 +586,20 @@ define amdgpu_kernel void @test_multi_ar
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_addr_space
 ; CHECK:          .symbol:         test_addr_space.kd
 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
                                            i32 addrspace(4)* %c,
-                                           i32 addrspace(3)* %l)
+                                           i32 addrspace(3)* %l) #0
     !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .is_volatile:    true
 ; CHECK-NEXT:         .name:           a
@@ -643,20 +643,20 @@ define amdgpu_kernel void @test_addr_spa
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_type_qual
 ; CHECK:          .symbol:         test_type_qual.kd
 define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a,
                                           i32 addrspace(1)* %b,
-                                          %opencl.pipe_t addrspace(1)* %c)
+                                          %opencl.pipe_t addrspace(1)* %c) #0
     !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !70 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .access:         read_only
 ; CHECK-NEXT:         .address_space:  global
 ; CHECK-NEXT:         .name:           ro
@@ -699,20 +699,20 @@ define amdgpu_kernel void @test_type_qua
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_access_qual
 ; CHECK:          .symbol:         test_access_qual.kd
 define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro,
                                             %opencl.image2d_t addrspace(1)* %wo,
-                                            %opencl.image3d_t addrspace(1)* %rw)
+                                            %opencl.image3d_t addrspace(1)* %rw) #0
     !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62
     !kernel_arg_base_type !62 !kernel_arg_type_qual !25 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -737,19 +737,19 @@ define amdgpu_kernel void @test_access_q
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_half
 ; CHECK:          .symbol:         test_vec_type_hint_half.kd
 ; CHECK:          .vec_type_hint:  half
-define amdgpu_kernel void @test_vec_type_hint_half(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -774,19 +774,19 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_float
 ; CHECK:          .symbol:         test_vec_type_hint_float.kd
 ; CHECK:          .vec_type_hint:  float
-define amdgpu_kernel void @test_vec_type_hint_float(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -811,19 +811,19 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_double
 ; CHECK:          .symbol:         test_vec_type_hint_double.kd
 ; CHECK:          .vec_type_hint:  double
-define amdgpu_kernel void @test_vec_type_hint_double(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -848,19 +848,19 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_char
 ; CHECK:          .symbol:         test_vec_type_hint_char.kd
 ; CHECK:          .vec_type_hint:  char
-define amdgpu_kernel void @test_vec_type_hint_char(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -885,19 +885,19 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_short
 ; CHECK:          .symbol:         test_vec_type_hint_short.kd
 ; CHECK:          .vec_type_hint:  short
-define amdgpu_kernel void @test_vec_type_hint_short(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -922,19 +922,19 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_long
 ; CHECK:          .symbol:         test_vec_type_hint_long.kd
 ; CHECK:          .vec_type_hint:  long
-define amdgpu_kernel void @test_vec_type_hint_long(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -959,19 +959,19 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_vec_type_hint_unknown
 ; CHECK:          .symbol:         test_vec_type_hint_unknown.kd
 ; CHECK:          .vec_type_hint:  unknown
-define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a)
+define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -996,24 +996,24 @@ define amdgpu_kernel void @test_vec_type
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_reqd_wgs_vec_type_hint
-; CHECK:          .reqd_workgroup_size: 
+; CHECK:          .reqd_workgroup_size:
 ; CHECK-NEXT:       - 1
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 4
 ; CHECK:          .symbol:         test_reqd_wgs_vec_type_hint.kd
 ; CHECK:          .vec_type_hint:  int
-define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a)
+define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5
     !reqd_work_group_size !6 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           4
@@ -1038,24 +1038,24 @@ define amdgpu_kernel void @test_reqd_wgs
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_wgs_hint_vec_type_hint
 ; CHECK:          .symbol:         test_wgs_hint_vec_type_hint.kd
 ; CHECK:          .vec_type_hint:  uint4
-; CHECK:          .workgroup_size_hint: 
+; CHECK:          .workgroup_size_hint:
 ; CHECK-NEXT:       - 8
 ; CHECK-NEXT:       - 16
 ; CHECK-NEXT:       - 32
-define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a)
+define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
     !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7
     !work_group_size_hint !8 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -1081,18 +1081,18 @@ define amdgpu_kernel void @test_wgs_hint
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_ptr_to_ptr
 ; CHECK:          .symbol:         test_arg_ptr_to_ptr.kd
-define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a)
+define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) #0
     !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80
     !kernel_arg_base_type !80 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  private
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -1118,18 +1118,18 @@ define amdgpu_kernel void @test_arg_ptr_
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_struct_contains_ptr
 ; CHECK:          .symbol:         test_arg_struct_contains_ptr.kd
-define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a)
+define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82
     !kernel_arg_base_type !82 !kernel_arg_type_qual !4 {
  ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           16
@@ -1154,18 +1154,18 @@ define amdgpu_kernel void @test_arg_stru
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_vector_of_ptr
 ; CHECK:          .symbol:         test_arg_vector_of_ptr.kd
-define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a)
+define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) #0
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83
     !kernel_arg_base_type !83 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -1191,19 +1191,19 @@ define amdgpu_kernel void @test_arg_vect
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_arg_unknown_builtin_type
 ; CHECK:          .symbol:         test_arg_unknown_builtin_type.kd
 define amdgpu_kernel void @test_arg_unknown_builtin_type(
-    %opencl.clk_event_t addrspace(1)* %a)
+    %opencl.clk_event_t addrspace(1)* %a) #0
     !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84
     !kernel_arg_base_type !84 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           a
 ; CHECK-NEXT:         .offset:         0
@@ -1259,25 +1259,32 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT:         .type_name:      'char16  addrspace(5)*'
 ; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
 ; CHECK-NEXT:         .value_type:     i8
-; CHECK-NEXT:       - .offset:         32
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           h
+; CHECK-NEXT:         .offset:         32
+; CHECK-NEXT:         .pointee_align:  1
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     struct
+; CHECK-NEXT:       - .offset:         40
 ; CHECK-NEXT:         .size:           8
 ; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
 ; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .offset:         40
+; CHECK-NEXT:       - .offset:         48
 ; CHECK-NEXT:         .size:           8
 ; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
 ; CHECK-NEXT:         .value_type:     i64
-; CHECK-NEXT:       - .offset:         48
+; CHECK-NEXT:       - .offset:         56
 ; CHECK-NEXT:         .size:           8
 ; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
 ; CHECK-NEXT:         .value_type:     i64
 ; CHECK-NEXT:       - .address_space:  global
-; CHECK-NEXT:         .offset:         56
+; CHECK-NEXT:         .offset:         64
 ; CHECK-NEXT:         .size:           8
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_pointee_align
@@ -1288,13 +1295,112 @@ define amdgpu_kernel void @test_pointee_
                                               <3 x i8> addrspace(3)* %d,
                                               <4 x i8> addrspace(3)* %e,
                                               <8 x i8> addrspace(3)* %f,
-                                              <16 x i8> addrspace(3)* %g)
+                                              <16 x i8> addrspace(3)* %g,
+                                              {} addrspace(3)* %h) #0
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           a
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .type_name:      'long  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           b
+; CHECK-NEXT:         .offset:         8
+; CHECK-NEXT:         .pointee_align:  8
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .type_name:      'char  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           c
+; CHECK-NEXT:         .offset:         12
+; CHECK-NEXT:         .pointee_align:  32
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .type_name:      'char2  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           d
+; CHECK-NEXT:         .offset:         16
+; CHECK-NEXT:         .pointee_align:  64
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .type_name:      'char3  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           e
+; CHECK-NEXT:         .offset:         20
+; CHECK-NEXT:         .pointee_align:  256
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .type_name:      'char4  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           f
+; CHECK-NEXT:         .offset:         24
+; CHECK-NEXT:         .pointee_align:  128
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .type_name:      'char8  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           g
+; CHECK-NEXT:         .offset:         28
+; CHECK-NEXT:         .pointee_align:  1024
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .type_name:      'char16  addrspace(5)*'
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .address_space:  local
+; CHECK-NEXT:         .name:           h
+; CHECK-NEXT:         .offset:         32
+; CHECK-NEXT:         .pointee_align:  16
+; CHECK-NEXT:         .size:           4
+; CHECK-NEXT:         .value_kind:     dynamic_shared_pointer
+; CHECK-NEXT:         .value_type:     struct
+; CHECK-NEXT:       - .offset:         40
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         48
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         56
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .offset:         64
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK:          .language:       OpenCL C
+; CHECK-NEXT:     .language_version:
+; CHECK-NEXT:       - 2
+; CHECK-NEXT:       - 0
+; CHECK:          .name:           test_pointee_align_attribute
+; CHECK:          .symbol:         test_pointee_align_attribute.kd
+define amdgpu_kernel void @test_pointee_align_attribute(i64 addrspace(1)* align 16 %a,
+                                                        i8 addrspace(3)* align 8 %b,
+                                                        <2 x i8> addrspace(3)* align 32 %c,
+                                                        <3 x i8> addrspace(3)* align 64 %d,
+                                                        <4 x i8> addrspace(3)* align 256 %e,
+                                                        <8 x i8> addrspace(3)* align 128 %f,
+                                                        <16 x i8> addrspace(3)* align 1024 %g,
+                                                        {} addrspace(3)* align 16 %h) #0
+    !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
+    !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
+  ret void
+}
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           arg
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           25
@@ -1320,19 +1426,19 @@ define amdgpu_kernel void @test_pointee_
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .device_enqueue_symbol: __test_block_invoke_kernel_runtime_handle
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           __test_block_invoke_kernel
 ; CHECK:          .symbol:         __test_block_invoke_kernel.kd
 define amdgpu_kernel void @__test_block_invoke_kernel(
-    <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0
+    <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #1
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110
     !kernel_arg_base_type !110 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           a
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           1
@@ -1367,18 +1473,18 @@ define amdgpu_kernel void @__test_block_
 ; CHECK-NEXT:         .value_kind:     hidden_completion_action
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
+; CHECK-NEXT:     .language_version:
 ; CHECK-NEXT:       - 2
 ; CHECK-NEXT:       - 0
 ; CHECK:          .name:           test_enqueue_kernel_caller
 ; CHECK:          .symbol:         test_enqueue_kernel_caller.kd
-define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1
+define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2
     !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9
     !kernel_arg_base_type !9 !kernel_arg_type_qual !4 {
   ret void
 }
 
-; CHECK:        - .args:           
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .name:           ptr
 ; CHECK-NEXT:         .offset:         0
 ; CHECK-NEXT:         .size:           8
@@ -1397,8 +1503,9 @@ define amdgpu_kernel void @unknown_addrs
 ; CHECK-NEXT: - 1
 ; CHECK-NEXT: - 0
 
-attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
-attributes #1 = { "calls-enqueue-kernel" }
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+attributes #1 = { "amdgpu-implicitarg-num-bytes"="48" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
+attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" }
 
 !llvm.printf.fmts = !{!100, !101}
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll?rev=358992&r1=358991&r2=358992&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll Tue Apr 23 07:31:17 2019
@@ -1248,13 +1248,13 @@ define amdgpu_kernel void @test_arg_unkn
 ; CHECK-NEXT:       PointeeAlign:  16
 ; CHECK-NEXT:       AddrSpaceQual: Local
 ; CHECK-NEXT:       AccQual:       Default
-; CHECK-NEXT:      - Name:            h
-; CHECK-NEXT:        Size:            4
-; CHECK-NEXT:        Align:           4
-; CHECK-NEXT:        ValueKind:       DynamicSharedPointer
-; CHECK-NEXT:        ValueType:       Struct
-; CHECK-NEXT:        PointeeAlign:    1
-; CHECK-NEXT:        AddrSpaceQual:   Local
+; CHECK-NEXT:     - Name:          h
+; CHECK-NEXT:       Size:          4
+; CHECK-NEXT:       Align:         4
+; CHECK-NEXT:       ValueKind:     DynamicSharedPointer
+; CHECK-NEXT:       ValueType:     Struct
+; CHECK-NEXT:       PointeeAlign:  1
+; CHECK-NEXT:       AddrSpaceQual: Local
 ; CHECK-NEXT:     - Size:          8
 ; CHECK-NEXT:       Align:         8
 ; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll?rev=358992&r1=358991&r2=358992&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll Tue Apr 23 07:31:17 2019
@@ -1,10 +1,217 @@
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
 
 ; CHECK:              ---
-; CHECK:      amdhsa.kernels:  
-; CHECK:        - .args:           
+; CHECK:      amdhsa.kernels:
+
+; CHECK:        - .args:
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           r
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           a
+; CHECK-NEXT:         .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           b
+; CHECK-NEXT:         .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK:          .name:           test0
+; CHECK:          .symbol:         test0.kd
+define amdgpu_kernel void @test0(
+    half addrspace(1)* %r,
+    half addrspace(1)* %a,
+    half addrspace(1)* %b) {
+entry:
+  %a.val = load half, half addrspace(1)* %a
+  %b.val = load half, half addrspace(1)* %b
+  %r.val = fadd half %a.val, %b.val
+  store half %r.val, half addrspace(1)* %r
+  ret void
+}
+
+; CHECK:        - .args:
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           r
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           a
+; CHECK-NEXT:         .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           b
+; CHECK-NEXT:         .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK:          .name:           test8
+; CHECK:          .symbol:         test8.kd
+define amdgpu_kernel void @test8(
+    half addrspace(1)* %r,
+    half addrspace(1)* %a,
+    half addrspace(1)* %b) #0 {
+entry:
+  %a.val = load half, half addrspace(1)* %a
+  %b.val = load half, half addrspace(1)* %b
+  %r.val = fadd half %a.val, %b.val
+  store half %r.val, half addrspace(1)* %r
+  ret void
+}
+
+; CHECK:        - .args:
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           r
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           a
+; CHECK-NEXT:         .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           b
+; CHECK-NEXT:         .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         32
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK:          .name:           test16
+; CHECK:          .symbol:         test16.kd
+define amdgpu_kernel void @test16(
+    half addrspace(1)* %r,
+    half addrspace(1)* %a,
+    half addrspace(1)* %b) #1 {
+entry:
+  %a.val = load half, half addrspace(1)* %a
+  %b.val = load half, half addrspace(1)* %b
+  %r.val = fadd half %a.val, %b.val
+  store half %r.val, half addrspace(1)* %r
+  ret void
+}
+
+; CHECK:        - .args:
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           r
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           a
+; CHECK-NEXT:         .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           b
+; CHECK-NEXT:         .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         32
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         40
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+; CHECK:          .name:           test24
+; CHECK:          .symbol:         test24.kd
+define amdgpu_kernel void @test24(
+    half addrspace(1)* %r,
+    half addrspace(1)* %a,
+    half addrspace(1)* %b) #2 {
+entry:
+  %a.val = load half, half addrspace(1)* %a
+  %b.val = load half, half addrspace(1)* %b
+  %r.val = fadd half %a.val, %b.val
+  store half %r.val, half addrspace(1)* %r
+  ret void
+}
+
+; CHECK:        - .args:
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           r
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           a
+; CHECK-NEXT:         .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .name:           b
+; CHECK-NEXT:         .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     global_buffer
+; CHECK-NEXT:         .value_type:     f16
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         32
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         40
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .offset:         48
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_none
+; CHECK-NEXT:         .value_type:     i8
+; CHECK:          .name:           test32
+; CHECK:          .symbol:         test32.kd
+define amdgpu_kernel void @test32(
+    half addrspace(1)* %r,
+    half addrspace(1)* %a,
+    half addrspace(1)* %b) #3 {
+entry:
+  %a.val = load half, half addrspace(1)* %a
+  %b.val = load half, half addrspace(1)* %b
+  %r.val = fadd half %a.val, %b.val
+  store half %r.val, half addrspace(1)* %r
+  ret void
+}
+
+; CHECK:        - .args:
 ; CHECK-NEXT:       - .address_space:  global
 ; CHECK-NEXT:         .name:           r
 ; CHECK-NEXT:         .offset:         0
@@ -50,16 +257,12 @@
 ; CHECK-NEXT:         .size:           8
 ; CHECK-NEXT:         .value_kind:     hidden_none
 ; CHECK-NEXT:         .value_type:     i8
-; CHECK:          .language:       OpenCL C
-; CHECK-NEXT:     .language_version: 
-; CHECK-NEXT:       - 2
-; CHECK-NEXT:       - 0
-; CHECK:          .name:           test
-; CHECK:          .symbol:         test.kd
-define amdgpu_kernel void @test(
+; CHECK:          .name:           test48
+; CHECK:          .symbol:         test48.kd
+define amdgpu_kernel void @test48(
     half addrspace(1)* %r,
     half addrspace(1)* %a,
-    half addrspace(1)* %b) {
+    half addrspace(1)* %b) #4 {
 entry:
   %a.val = load half, half addrspace(1)* %a
   %b.val = load half, half addrspace(1)* %b
@@ -72,5 +275,8 @@ entry:
 ; CHECK-NEXT: - 1
 ; CHECK-NEXT: - 0
 
-!opencl.ocl.version = !{!0}
-!0 = !{i32 2, i32 0}
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" }
+attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" }
+attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" }
+attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" }
+attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" }




More information about the llvm-commits mailing list