[clang] [clang][AMDGPU][CUDA] Handle __builtin_printf for device printf (PR #68515)

Sun Oct 8 02:39:35 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-clang

<details>
<summary>Changes</summary>

Previously `__builtin_printf` would result to emitting call to `printf`, even though directly calling `printf` was translated.

Ref: #68478

---
Full diff: https://github.com/llvm/llvm-project/pull/68515.diff


4 Files Affected:

- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+1) 
- (modified) clang/lib/CodeGen/CGGPUBuiltin.cpp (+2-1) 
- (added) clang/test/CodeGenCUDA/printf-builtin.cu (+20) 
- (added) clang/test/CodeGenHIP/printf-builtin.hip (+21) 


``````````diff

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index bf984861bccb5cc..c16c005787ca778 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5464,6 +5464,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     Value *HalfVal = Builder.CreateLoad(Address);
     return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
   }
+  case Builtin::BI__builtin_printf:
   case Builtin::BIprintf:
     if (getTarget().getTriple().isNVPTX() ||
         getTarget().getTriple().isAMDGCN()) {
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index 75fb06de938425d..794be0520163157 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -135,7 +135,8 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
                                 llvm::Function *Decl, bool WithSizeArg) {
   CodeGenModule &CGM = CGF->CGM;
   CGBuilderTy &Builder = CGF->Builder;
-  assert(E->getBuiltinCallee() == Builtin::BIprintf);
+  assert(E->getBuiltinCallee() == Builtin::BIprintf ||
+         E->getBuiltinCallee() == Builtin::BI__builtin_printf);
   assert(E->getNumArgs() >= 1); // printf always has at least one arg.
 
   // Uses the same format as nvptx for the argument packing, but also passes
diff --git a/clang/test/CodeGenCUDA/printf-builtin.cu b/clang/test/CodeGenCUDA/printf-builtin.cu
new file mode 100644
index 000000000000000..586d00a878ddf89
--- /dev/null
+++ b/clang/test/CodeGenCUDA/printf-builtin.cu
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -disable-llvm-optzns -fno-builtin-printf -fcuda-is-device \
+// RUN:   -o - %s | FileCheck  %s
+
+#define __device__ __attribute__((device))
+
+extern "C" __device__ int printf(const char *format, ...);
+
+// CHECK-LABEL: @_Z4foo1v()
+__device__ int foo1() {
+  // CHECK-NOT: call i32 (ptr, ...) @printf
+  return __builtin_printf("Hello World\n");
+}
+
+// CHECK-LABEL: @_Z4foo2v()
+__device__ int foo2() {
+  // CHECK: call i32 (ptr, ...) @printf
+  return printf("Hello World\n");
+}
diff --git a/clang/test/CodeGenHIP/printf-builtin.hip b/clang/test/CodeGenHIP/printf-builtin.hip
new file mode 100644
index 000000000000000..76c7d41376c972d
--- /dev/null
+++ b/clang/test/CodeGenHIP/printf-builtin.hip
@@ -0,0 +1,21 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns -mprintf-kind=hostcall -fno-builtin-printf -fcuda-is-device \
+// RUN:   -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns -mprintf-kind=buffered -fno-builtin-printf -fcuda-is-device \
+// RUN:   -o - %s | FileCheck %s
+
+#define __device__ __attribute__((device))
+
+extern "C" __device__ int printf(const char *format, ...);
+
+// CHECK-LABEL: @_Z4foo1v()
+__device__ int foo1() {
+  // CHECK-NOT: call i32 (ptr, ...) @printf
+  return __builtin_printf("Hello World\n");
+}
+
+// CHECK-LABEL: @_Z4foo2v()
+__device__ int foo2() {
+  // CHECK: call i32 (ptr, ...) @printf
+  return printf("Hello World\n");
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/68515