[llvm] 8a20612 - [AMDGPU] Respect `nobuiltin` when converting `printf`

Tue Aug 22 10:48:24 PDT 2023

Author: Joseph Huber
Date: 2023-08-22T12:48:16-05:00
New Revision: 8a206124675aae7e5afc9577f651f8e88468ce43

URL: https://github.com/llvm/llvm-project/commit/8a206124675aae7e5afc9577f651f8e88468ce43
DIFF: https://github.com/llvm/llvm-project/commit/8a206124675aae7e5afc9577f651f8e88468ce43.diff

LOG: [AMDGPU] Respect `nobuiltin` when converting `printf`

The AMDGPU backend uses a pass to transform calls to the `printf`
function to a built-in verision for either HIP or OpenCL. Currently this
does not respect `-fno-builtin` and is always emitted. This allows the
user to turn off this functionality as is standard for these types of
built-in transformations. The motivation behind this change is to allow
the `libc` project to provide a linkable version of the `printf`
function in the future.

Reviewed By: sameerds

Differential Revision: https://reviews.llvm.org/D158477

Added: 
    llvm/test/CodeGen/AMDGPU/printf_builtin.ll
    llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 13f83e298cf4e2..d03820f5f9e6c9 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -439,7 +439,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
 
   for (auto &U : PrintfFunction->uses()) {
     if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
-      if (CI->isCallee(&U))
+      if (CI->isCallee(&U) && !CI->isNoBuiltin())
         Printfs.push_back(CI);
     }
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/printf_builtin.ll b/llvm/test/CodeGen/AMDGPU/printf_builtin.ll
new file mode 100644
index 00000000000000..36c1f2acbb7edc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/printf_builtin.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=amdgcn-- -passes=amdgpu-printf-runtime-binding -mcpu=gfx90a -S < %s | FileCheck --check-prefix=GCN %s
+
+ at str = private unnamed_addr constant [10 x i8] c"string %d\00", align 1
+
+define void @caller(i32 %n) {
+; GCN-LABEL: define void @caller
+; GCN-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; GCN-NEXT:  entry:
+; GCN-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr @str, i32 [[N]])
+; GCN-NEXT:    ret void
+;
+entry:
+  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n)
+  ret void
+}
+
+define void @caller_builtin(i32 %n) {
+; GCN-LABEL: define void @caller_builtin
+; GCN-SAME: (i32 [[N:%.*]]) #[[ATTR0]] {
+; GCN-NEXT:  entry:
+; GCN-NEXT:    [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 8)
+; GCN-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; GCN:       entry.split:
+; GCN-NEXT:    [[TMP0:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
+; GCN-NEXT:    br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
+; GCN:       1:
+; GCN-NEXT:    [[PRINTBUFFID:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 0
+; GCN-NEXT:    [[PRINTBUFFIDCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFID]] to ptr addrspace(1)
+; GCN-NEXT:    store i32 1, ptr addrspace(1) [[PRINTBUFFIDCAST]], align 4
+; GCN-NEXT:    [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
+; GCN-NEXT:    store i32 [[N]], ptr addrspace(1) [[PRINTBUFFGEP]], align 4
+; GCN-NEXT:    br label [[TMP2]]
+; GCN:       2:
+; GCN-NEXT:    ret void
+;
+entry:
+  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n) #0
+  ret void
+}
+
+declare i32 @printf(ptr, ...) #1
+
+attributes #0 = { builtin }
+attributes #1 = { nobuiltin }

diff  --git a/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll b/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll
new file mode 100644
index 00000000000000..8dbb24480bef6f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/printf_nobuiltin.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=amdgcn-- -passes=amdgpu-printf-runtime-binding -mcpu=gfx90a -S < %s | FileCheck --check-prefix=GCN %s
+
+ at str = private unnamed_addr constant [10 x i8] c"string %d\00", align 1
+
+define void @caller(i32 %n) {
+; GCN-LABEL: define void @caller
+; GCN-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; GCN-NEXT:  entry:
+; GCN-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr @str, i32 [[N]]) #[[ATTR2:[0-9]+]]
+; GCN-NEXT:    ret void
+;
+entry:
+  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n) #0
+  ret void
+}
+
+define void @caller_builtin(i32 %n) {
+; GCN-LABEL: define void @caller_builtin
+; GCN-SAME: (i32 [[N:%.*]]) #[[ATTR0]] {
+; GCN-NEXT:  entry:
+; GCN-NEXT:    [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 8)
+; GCN-NEXT:    br label [[ENTRY_SPLIT:%.*]]
+; GCN:       entry.split:
+; GCN-NEXT:    [[TMP0:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
+; GCN-NEXT:    br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
+; GCN:       1:
+; GCN-NEXT:    [[PRINTBUFFID:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 0
+; GCN-NEXT:    [[PRINTBUFFIDCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFID]] to ptr addrspace(1)
+; GCN-NEXT:    store i32 1, ptr addrspace(1) [[PRINTBUFFIDCAST]], align 4
+; GCN-NEXT:    [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
+; GCN-NEXT:    store i32 [[N]], ptr addrspace(1) [[PRINTBUFFGEP]], align 4
+; GCN-NEXT:    br label [[TMP2]]
+; GCN:       2:
+; GCN-NEXT:    ret void
+;
+entry:
+  %call = call i32 (ptr, ...) @printf(ptr @str, i32 %n)
+  ret void
+}
+
+declare i32 @printf(ptr, ...)
+
+attributes #0 = { nobuiltin }