[clang] 09f33a4 - [AMDGPU][OpenCL] Remove "printf and hostcall" diagnostic

Scott Linder via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 5 12:11:06 PDT 2022


Author: Scott Linder
Date: 2022-04-05T19:10:23Z
New Revision: 09f33a430b72fffe06fde9e07c0acd5c4cd2e59e

URL: https://github.com/llvm/llvm-project/commit/09f33a430b72fffe06fde9e07c0acd5c4cd2e59e
DIFF: https://github.com/llvm/llvm-project/commit/09f33a430b72fffe06fde9e07c0acd5c4cd2e59e.diff

LOG: [AMDGPU][OpenCL] Remove "printf and hostcall" diagnostic

The diagnostic is unreliable, and triggers even for dead uses of
hostcall that may exist when linking the device-libs at lower
optimization levels.

Eliminate the diagnostic, and directly document the limitation for
OpenCL before code object V5.

Make some NFC changes to clarify the related code in the
MetadataStreamer.

Add a clang test to tie OCL sources containing printf to the backend IR
tests for this situation.

Reviewed By: sameerds, arsenm, yaxunl

Differential Revision: https://reviews.llvm.org/D121951

Added: 
    clang/test/CodeGenOpenCL/amdgpu-printf.cl
    llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll

Modified: 
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
    llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp

Removed: 
    llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll


################################################################################
diff  --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
new file mode 100644
index 0000000000000..0659fdfc54fe1
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl
@@ -0,0 +1,46 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+
+int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
+
+// CHECK-LABEL: @test_printf_noargs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([1 x i8], [1 x i8] addrspace(4)* @.str, i64 0, i64 0)) #[[ATTR4:[0-9]+]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_printf_noargs() {
+    printf("");
+}
+
+// CHECK-LABEL: @test_printf_int(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @.str.1, i64 0, i64 0), i32 noundef [[TMP0]]) #[[ATTR4]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_printf_int(int i) {
+    printf("%d", i);
+}
+
+// CHECK-LABEL: @test_printf_str_int(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5)
+// CHECK-NEXT:    store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)*
+// CHECK-NEXT:    call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* [[TMP0]]) #[[ATTR5:[0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)*
+// CHECK-NEXT:    call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 1 [[TMP1]], i8 addrspace(4)* align 1 getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @__const.test_printf_str_int.s, i32 0, i32 0), i64 4, i1 false)
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* [[S]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]]
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str.2, i64 0, i64 0), i8 addrspace(5)* noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]]
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)*
+// CHECK-NEXT:    call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* [[TMP3]]) #[[ATTR5]]
+// CHECK-NEXT:    ret void
+//
+__kernel void test_printf_str_int(int i) {
+    char s[] = "foo";
+    printf("%s:%d", s, i);
+}

diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index a590d468afd97..63ca37811d89f 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -2823,12 +2823,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".
                                                 "HiddenPrintfBuffer"
                                                   A global address space pointer
                                                   to the runtime printf buffer
-                                                  is passed in kernarg.
+                                                  is passed in kernarg. Mutually
+                                                  exclusive with
+                                                  "HiddenHostcallBuffer".
 
                                                 "HiddenHostcallBuffer"
                                                   A global address space pointer
                                                   to the runtime hostcall buffer
-                                                  is passed in kernarg.
+                                                  is passed in kernarg. Mutually
+                                                  exclusive with
+                                                  "HiddenPrintfBuffer".
 
                                                 "HiddenDefaultQueue"
                                                   A global address space pointer
@@ -3348,12 +3352,18 @@ same *vendor-name*.
                                                      "hidden_printf_buffer"
                                                        A global address space pointer
                                                        to the runtime printf buffer
-                                                       is passed in kernarg.
+                                                       is passed in kernarg. Mutually
+                                                       exclusive with
+                                                       "hidden_hostcall_buffer"
+                                                       before Code Object V5.
 
                                                      "hidden_hostcall_buffer"
                                                        A global address space pointer
                                                        to the runtime hostcall buffer
-                                                       is passed in kernarg.
+                                                       is passed in kernarg. Mutually
+                                                       exclusive with
+                                                       "hidden_printf_buffer"
+                                                       before Code Object V5.
 
                                                      "hidden_default_queue"
                                                        A global address space pointer

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index cebdc36adb03b..7b4db853639ae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -400,17 +400,15 @@ void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func,
   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
                                       AMDGPUAS::GLOBAL_ADDRESS);
 
-  // Emit "printf buffer" argument if printf is used, otherwise emit dummy
-  // "none" argument.
   if (HiddenArgNumBytes >= 32) {
+    // We forbid the use of features requiring hostcall when compiling OpenCL
+    // before code object V5, which makes the mutual exclusion between the
+    // "printf buffer" and "hostcall buffer" here sound.
     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
-    else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
-      // The printf runtime binding pass should have ensured that hostcall and
-      // printf are not used in the same module.
-      assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+    else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
-    } else
+    else
       emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
   }
 
@@ -816,19 +814,17 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const MachineFunction &MF,
   auto Int8PtrTy =
       Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
 
-  // Emit "printf buffer" argument if printf is used, emit "hostcall buffer"
-  // if "hostcall" module flag is set, otherwise emit dummy "none" argument.
   if (HiddenArgNumBytes >= 32) {
+    // We forbid the use of features requiring hostcall when compiling OpenCL
+    // before code object V5, which makes the mutual exclusion between the
+    // "printf buffer" and "hostcall buffer" here sound.
     if (M->getNamedMetadata("llvm.printf.fmts"))
       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
                     Args);
-    else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
-      // The printf runtime binding pass should have ensured that hostcall and
-      // printf are not used in the same module.
-      assert(!M->getNamedMetadata("llvm.printf.fmts"));
+    else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
                     Args);
-    } else
+    else
       emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 2ab0f5b04f4c1..b8c29e8d98a3d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -563,15 +563,6 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
   if (Printfs.empty())
     return false;
 
-  if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
-    for (auto &U : HostcallFunction->uses()) {
-      if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
-        M.getContext().emitError(
-            CI, "Cannot use both printf and hostcall in the same module");
-      }
-    }
-  }
-
   TD = &M.getDataLayout();
 
   return lowerPrintfForGpu(M);

diff  --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll
new file mode 100644
index 0000000000000..d4c656941d238
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s
+
+ at .str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1
+
+define amdgpu_kernel void @test_kernel(i32 %n) {
+entry:
+  %str = alloca [9 x i8], align 1, addrspace(5)
+  %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0
+  %call1 = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n)
+  %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
+  ret void
+}
+
+declare i32 @printf(i8 addrspace(4)*, ...)
+
+declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
+
+; CHECK-NOT: error:
+; CHECK-NOT: warning:

diff  --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll
deleted file mode 100644
index 14c29760e3f37..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: not opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s
-
- at .str = private unnamed_addr addrspace(2) constant [6 x i8] c"%s:%d\00", align 1
-
-define amdgpu_kernel void @test_kernel(i32 %n) {
-entry:
-  %str = alloca [9 x i8], align 1
-  %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0
-  %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n)
-  %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9)
-  ret void
-}
-
-declare i32 @printf(i8 addrspace(2)*, ...)
-
-declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64)
-
-; CHECK: error: Cannot use both printf and hostcall in the same module


        


More information about the cfe-commits mailing list