[llvm] 52c5014 - [AMDGPU] add support for hostcall buffer pointer as hidden kernel argument

Sameer Sahasrabuddhe via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 02:24:48 PST 2019


Author: Sameer Sahasrabuddhe
Date: 2019-11-20T15:53:55+05:30
New Revision: 52c5014da099797e9f1f6c90acddf79a68aa85cb

URL: https://github.com/llvm/llvm-project/commit/52c5014da099797e9f1f6c90acddf79a68aa85cb
DIFF: https://github.com/llvm/llvm-project/commit/52c5014da099797e9f1f6c90acddf79a68aa85cb.diff

LOG: [AMDGPU] add support for hostcall buffer pointer as hidden kernel argument

Hostcall is a service that allows a kernel to submit requests to the
host using shared buffers, and block until a response is
received. This will eventually replace the shared buffer currently
used for printf, and repurposes the same hidden kernel argument. This
change introduces a new ValueKind in the HSA metadata to represent the
hostcall buffer.

Differential Revision: https://reviews.llvm.org/D70038

Added: 
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
    llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/include/llvm/Support/AMDGPUMetadata.h
    llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
    llvm/lib/Support/AMDGPUMetadata.cpp
    llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
    llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
    llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index fc624a724a86..758e65c63e8e 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1382,6 +1382,11 @@ non-AMD key names should be prefixed by "*vendor-name*.".
                                                   to the runtime printf buffer
                                                   is passed in kernarg.
 
+                                                "HiddenHostcallBuffer"
+                                                  A global address space pointer
+                                                  to the runtime hostcall buffer
+                                                  is passed in kernarg.
+
                                                 "HiddenDefaultQueue"
                                                   A global address space pointer
                                                   to the OpenCL device enqueue
@@ -1876,6 +1881,11 @@ same *vendor-name*.
                                                        to the runtime printf buffer
                                                        is passed in kernarg.
 
+                                                     "hidden_hostcall_buffer"
+                                                       A global address space pointer
+                                                       to the runtime hostcall buffer
+                                                       is passed in kernarg.
+
                                                      "hidden_default_queue"
                                                        A global address space pointer
                                                        to the OpenCL device enqueue

diff  --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h
index f7f1ec40dde9..eeef4e699c3e 100644
--- a/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -75,6 +75,7 @@ enum class ValueKind : uint8_t {
   HiddenDefaultQueue     = 12,
   HiddenCompletionAction = 13,
   HiddenMultiGridSyncArg = 14,
+  HiddenHostcallBuffer   = 15,
   Unknown                = 0xff
 };
 

diff  --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 3f36dff9f55c..d927171d556c 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -119,6 +119,7 @@ bool MetadataVerifier::verifyKernelArgs(msgpack::DocNode &Node) {
                                .Case("hidden_global_offset_z", true)
                                .Case("hidden_none", true)
                                .Case("hidden_printf_buffer", true)
+                               .Case("hidden_hostcall_buffer", true)
                                .Case("hidden_default_queue", true)
                                .Case("hidden_completion_action", true)
                                .Case("hidden_multigrid_sync_arg", true)

diff  --git a/llvm/lib/Support/AMDGPUMetadata.cpp b/llvm/lib/Support/AMDGPUMetadata.cpp
index 5f8102299f47..4ea197a97389 100644
--- a/llvm/lib/Support/AMDGPUMetadata.cpp
+++ b/llvm/lib/Support/AMDGPUMetadata.cpp
@@ -62,6 +62,7 @@ struct ScalarEnumerationTraits<ValueKind> {
     YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ);
     YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone);
     YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer);
+    YIO.enumCase(EN, "HiddenHostcallBuffer", ValueKind::HiddenHostcallBuffer);
     YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue);
     YIO.enumCase(EN, "HiddenCompletionAction",
                  ValueKind::HiddenCompletionAction);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 9f5bcd8ff5f0..511d62943189 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -421,7 +421,12 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
   if (HiddenArgNumBytes >= 32) {
     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
-    else
+    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+      // The printf runtime binding pass should have ensured that hostcall and
+      // printf are not used in the same module.
+      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+      emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer);
+    } else
       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
   }
 
@@ -854,7 +859,12 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
   if (HiddenArgNumBytes >= 32) {
     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
       emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
-    else
+    else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+      // The printf runtime binding pass should have ensured that hostcall and
+      // printf are not used in the same module.
+      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+      emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args);
+    } else
       emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 1f44012a7c06..511de96b5f7c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -582,6 +582,15 @@ bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
   if (Printfs.empty())
     return false;
 
+  if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
+    for (auto &U : HostcallFunction->uses()) {
+      if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
+        M.getContext().emitError(
+            CI, "Cannot use both printf and hostcall in the same module");
+      }
+    }
+  }
+
   TD = &M.getDataLayout();
   auto DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
index 72e96a19606a..9d34d417e77e 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
@@ -42,6 +42,7 @@
 ; CHECK-NEXT:         .size:           8
 ; CHECK-NOT:          .value_kind:     hidden_default_queue
 ; CHECK-NOT:          .value_kind:     hidden_completion_action
+; CHECK-NOT:          .value_kind:     hidden_hostcall_buffer
 ; CHECK-NEXT:         .value_kind:     hidden_printf_buffer
 ; CHECK-NEXT:         .value_type:     i8
 ; CHECK:              .value_kind:     hidden_multigrid_sync_arg

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index db1d5c5ec85d..f5a47a862d6b 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -49,6 +49,7 @@
 ; CHECK-NEXT:       ValueType:     I64
 ; CHECK-NEXT:     - Size:          8
 ; CHECK-NEXT:       Align:         8
+; CHECK-NOT:        ValueKind:     HiddenHostcallBuffer
 ; CHECK-NEXT:       ValueKind:     HiddenPrintfBuffer
 ; CHECK-NEXT:       ValueType:     I8
 ; CHECK-NEXT:       AddrSpaceQual: Global

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
new file mode 100644
index 000000000000..8741bfbc1bb6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK:              ---
+; CHECK:      amdhsa.kernels:
+; CHECK:        - .args:
+; CHECK-NEXT:       - .name:           a
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           1
+; CHECK-NEXT:         .type_name:      char
+; CHECK-NEXT:         .value_kind:     by_value
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+
+; CHECK-NOT:          .value_kind:     hidden_hostcall_buffer
+
+; CHECK:          .language:       OpenCL C
+; CHECK-NEXT:     .language_version:
+; CHECK-NEXT:       - 2
+; CHECK-NEXT:       - 0
+; CHECK:          .name:           test_kernel
+; CHECK:          .symbol:         test_kernel.kd
+
+define amdgpu_kernel void @test_kernel(i8 %a) #0
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:  amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
new file mode 100644
index 000000000000..5f1cda0fd216
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK: ---
+; CHECK:  Version: [ 1, 0 ]
+; CHECK:  Kernels:
+
+; CHECK:      - Name:            test_kernel
+; CHECK-NEXT:   SymbolName:      'test_kernel at kd'
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Name:          a
+; CHECK-NEXT:       TypeName:      char
+; CHECK-NEXT:       Size:          1
+; CHECK-NEXT:       Align:         1
+; CHECK-NEXT:       ValueKind:     ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NOT:        ValueKind:     HiddenHostcallBuffer
+; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
+; CHECK-NOT:        ValueKind:     HiddenCompletionAction
+
+define amdgpu_kernel void @test_kernel(i8 %a) #0
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
new file mode 100644
index 000000000000..1a75f3661bd4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK:              ---
+; CHECK:      amdhsa.kernels:
+; CHECK:        - .args:
+; CHECK-NEXT:       - .name:           a
+; CHECK-NEXT:         .offset:         0
+; CHECK-NEXT:         .size:           1
+; CHECK-NEXT:         .type_name:      char
+; CHECK-NEXT:         .value_kind:     by_value
+; CHECK-NEXT:         .value_type:     i8
+; CHECK-NEXT:       - .offset:         8
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_x
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         16
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_y
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .offset:         24
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_global_offset_z
+; CHECK-NEXT:         .value_type:     i64
+; CHECK-NEXT:       - .address_space:  global
+; CHECK-NEXT:         .offset:         32
+; CHECK-NEXT:         .size:           8
+; CHECK-NEXT:         .value_kind:     hidden_hostcall_buffer
+; CHECK-NEXT:         .value_type:     i8
+; CHECK:          .language:       OpenCL C
+; CHECK-NEXT:     .language_version:
+; CHECK-NEXT:       - 2
+; CHECK-NEXT:       - 0
+; CHECK:          .name:           test_kernel
+; CHECK:          .symbol:         test_kernel.kd
+
+declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
+
+define amdgpu_kernel void @test_kernel(i8 %a) #0
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+; CHECK:  amdhsa.version:
+; CHECK-NEXT: - 1
+; CHECK-NEXT: - 0
+
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
new file mode 100644
index 000000000000..b0428638e254
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s
+
+; CHECK: ---
+; CHECK:  Version: [ 1, 0 ]
+; CHECK:  Kernels:
+
+; CHECK:      - Name:            test_kernel
+; CHECK-NEXT:   SymbolName:      'test_kernel at kd'
+; CHECK-NEXT:   Language:        OpenCL C
+; CHECK-NEXT:   LanguageVersion: [ 2, 0 ]
+; CHECK-NEXT:   Args:
+; CHECK-NEXT:     - Name:          a
+; CHECK-NEXT:       TypeName:      char
+; CHECK-NEXT:       Size:          1
+; CHECK-NEXT:       Align:         1
+; CHECK-NEXT:       ValueKind:     ByValue
+; CHECK-NEXT:       ValueType:     I8
+; CHECK-NEXT:       AccQual:       Default
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetX
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetY
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:          8
+; CHECK-NEXT:       Align:         8
+; CHECK-NEXT:       ValueKind:     HiddenGlobalOffsetZ
+; CHECK-NEXT:       ValueType:     I64
+; CHECK-NEXT:     - Size:            8
+; CHECK-NEXT:       Align:           8
+; CHECK-NEXT:       ValueKind:       HiddenHostcallBuffer
+; CHECK-NEXT:       ValueType:       I8
+; CHECK-NEXT:       AddrSpaceQual:   Global
+; CHECK-NOT:        ValueKind:     HiddenDefaultQueue
+; CHECK-NOT:        ValueKind:     HiddenCompletionAction
+
+declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
+
+define amdgpu_kernel void @test_kernel(i8 %a) #0
+    !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3
+    !kernel_arg_base_type !3 !kernel_arg_type_qual !4 {
+  ret void
+}
+
+attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" }
+
+!1 = !{i32 0}
+!2 = !{!"none"}
+!3 = !{!"char"}
+!4 = !{!""}
+
+!opencl.ocl.version = !{!90}
+!90 = !{i32 2, i32 0}
+
+; PARSER: AMDGPU HSA Metadata Parser Test: PASS

diff  --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll
new file mode 100644
index 000000000000..14c29760e3f3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll
@@ -0,0 +1,18 @@
+; RUN: not opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s
+
+ at .str = private unnamed_addr addrspace(2) constant [6 x i8] c"%s:%d\00", align 1
+
+define amdgpu_kernel void @test_kernel(i32 %n) {
+entry:
+  %str = alloca [9 x i8], align 1
+  %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0
+  %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n)
+  %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
+  ret void
+}
+
+declare i32 @printf(i8 addrspace(2)*, ...)
+
+declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
+
+; CHECK: error: Cannot use both printf and hostcall in the same module


        


More information about the llvm-commits mailing list