[clang] fea5914 - Revert "[OpenMP][libc] Remove special handling for OpenMP printf (#98940)"
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 26 14:39:28 PDT 2024
Author: Joseph Huber
Date: 2024-07-26T16:39:12-05:00
New Revision: fea5914c926e2f013a8b5e27eaa74c7047fb2c71
URL: https://github.com/llvm/llvm-project/commit/fea5914c926e2f013a8b5e27eaa74c7047fb2c71
DIFF: https://github.com/llvm/llvm-project/commit/fea5914c926e2f013a8b5e27eaa74c7047fb2c71.diff
LOG: Revert "[OpenMP][libc] Remove special handling for OpenMP printf (#98940)"
This reverts commit 069e8bcd82c4420239f95c7e6a09e1f756317cfc.
Summary:
Some tests failing, revert this for now.
Added:
libc/src/gpu/rpc_fprintf.cpp
libc/src/gpu/rpc_fprintf.h
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/CodeGen/CGGPUBuiltin.cpp
clang/lib/CodeGen/CodeGenFunction.h
libc/config/gpu/entrypoints.txt
libc/spec/gpu_ext.td
libc/src/gpu/CMakeLists.txt
llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
offload/DeviceRTL/include/LibC.h
offload/DeviceRTL/src/LibC.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f0651c280ff95..0c4d0efb70ea5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -5986,6 +5986,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
getTarget().getTriple().isAMDGCN() ||
(getTarget().getTriple().isSPIRV() &&
getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) {
+ if (getLangOpts().OpenMPIsTargetDevice)
+ return EmitOpenMPDevicePrintfCallExpr(E);
if (getTarget().getTriple().isNVPTX())
return EmitNVPTXDevicePrintfCallExpr(E);
if ((getTarget().getTriple().isAMDGCN() ||
diff --git a/clang/lib/CodeGen/CGGPUBuiltin.cpp b/clang/lib/CodeGen/CGGPUBuiltin.cpp
index 84adf29e8db87..b2340732afeb5 100644
--- a/clang/lib/CodeGen/CGGPUBuiltin.cpp
+++ b/clang/lib/CodeGen/CGGPUBuiltin.cpp
@@ -42,6 +42,28 @@ llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, "vprintf", &M);
}
+llvm::Function *GetOpenMPVprintfDeclaration(CodeGenModule &CGM) {
+ const char *Name = "__llvm_omp_vprintf";
+ llvm::Module &M = CGM.getModule();
+ llvm::Type *ArgTypes[] = {llvm::PointerType::getUnqual(M.getContext()),
+ llvm::PointerType::getUnqual(M.getContext()),
+ llvm::Type::getInt32Ty(M.getContext())};
+ llvm::FunctionType *VprintfFuncType = llvm::FunctionType::get(
+ llvm::Type::getInt32Ty(M.getContext()), ArgTypes, false);
+
+ if (auto *F = M.getFunction(Name)) {
+ if (F->getFunctionType() != VprintfFuncType) {
+ CGM.Error(SourceLocation(),
+ "Invalid type declaration for __llvm_omp_vprintf");
+ return nullptr;
+ }
+ return F;
+ }
+
+ return llvm::Function::Create(
+ VprintfFuncType, llvm::GlobalVariable::ExternalLinkage, Name, &M);
+}
+
// Transforms a call to printf into a call to the NVPTX vprintf syscall (which
// isn't particularly special; it's invoked just like a regular function).
// vprintf takes two args: A format string, and a pointer to a buffer containing
@@ -191,3 +213,10 @@ RValue CodeGenFunction::EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E) {
Builder.SetInsertPoint(IRB.GetInsertBlock(), IRB.GetInsertPoint());
return RValue::get(Printf);
}
+
+RValue CodeGenFunction::EmitOpenMPDevicePrintfCallExpr(const CallExpr *E) {
+ assert(getTarget().getTriple().isNVPTX() ||
+ getTarget().getTriple().isAMDGCN());
+ return EmitDevicePrintfCallExpr(E, this, GetOpenMPVprintfDeclaration(CGM),
+ true);
+}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 89cc819c43bb5..bd62c65d8cce6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4536,6 +4536,7 @@ class CodeGenFunction : public CodeGenTypeCache {
RValue EmitNVPTXDevicePrintfCallExpr(const CallExpr *E);
RValue EmitAMDGPUDevicePrintfCallExpr(const CallExpr *E);
+ RValue EmitOpenMPDevicePrintfCallExpr(const CallExpr *E);
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E, ReturnValueSlot ReturnValue);
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index 157f6f8af00a9..df7aa9e319624 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -226,6 +226,7 @@ set(TARGET_LIBC_ENTRYPOINTS
# gpu/rpc.h entrypoints
libc.src.gpu.rpc_host_call
+ libc.src.gpu.rpc_fprintf
)
set(TARGET_LIBM_ENTRYPOINTS
diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td
index dce81ff778620..5400e0afa7564 100644
--- a/libc/spec/gpu_ext.td
+++ b/libc/spec/gpu_ext.td
@@ -10,6 +10,14 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
RetValSpec<VoidType>,
[ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
>,
+ FunctionSpec<
+ "rpc_fprintf",
+ RetValSpec<IntType>,
+ [ArgSpec<FILERestrictedPtr>,
+ ArgSpec<ConstCharRestrictedPtr>,
+ ArgSpec<VoidPtr>,
+ ArgSpec<SizeTType>]
+ >,
]
>;
let Headers = [
diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt
index e20228516b511..4508abea7a888 100644
--- a/libc/src/gpu/CMakeLists.txt
+++ b/libc/src/gpu/CMakeLists.txt
@@ -8,3 +8,15 @@ add_entrypoint_object(
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)
+
+add_entrypoint_object(
+ rpc_fprintf
+ SRCS
+ rpc_fprintf.cpp
+ HDRS
+ rpc_fprintf.h
+ DEPENDS
+ libc.src.stdio.gpu.gpu_file
+ libc.src.__support.RPC.rpc_client
+ libc.src.__support.GPU.utils
+)
diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp
new file mode 100644
index 0000000000000..70056daa25e2e
--- /dev/null
+++ b/libc/src/gpu/rpc_fprintf.cpp
@@ -0,0 +1,75 @@
+//===-- GPU implementation of fprintf -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "rpc_fprintf.h"
+
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/GPU/utils.h"
+#include "src/__support/RPC/rpc_client.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/gpu/file.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+template <uint16_t opcode>
+int fprintf_impl(::FILE *__restrict file, const char *__restrict format,
+ size_t format_size, void *args, size_t args_size) {
+ uint64_t mask = gpu::get_lane_mask();
+ rpc::Client::Port port = rpc::client.open<opcode>();
+
+ if constexpr (opcode == RPC_PRINTF_TO_STREAM) {
+ port.send([&](rpc::Buffer *buffer) {
+ buffer->data[0] = reinterpret_cast<uintptr_t>(file);
+ });
+ }
+
+ port.send_n(format, format_size);
+ port.recv([&](rpc::Buffer *buffer) {
+ args_size = static_cast<size_t>(buffer->data[0]);
+ });
+ port.send_n(args, args_size);
+
+ uint32_t ret = 0;
+ for (;;) {
+ const char *str = nullptr;
+ port.recv([&](rpc::Buffer *buffer) {
+ ret = static_cast<uint32_t>(buffer->data[0]);
+ str = reinterpret_cast<const char *>(buffer->data[1]);
+ });
+ // If any lanes have a string argument it needs to be copied back.
+ if (!gpu::ballot(mask, str))
+ break;
+
+ uint64_t size = str ? internal::string_length(str) + 1 : 0;
+ port.send_n(str, size);
+ }
+
+ port.close();
+ return ret;
+}
+
+// TODO: Delete this and port OpenMP to use `printf`.
+// place of varargs. Once varargs support is added we will use that to
+// implement the real version.
+LLVM_LIBC_FUNCTION(int, rpc_fprintf,
+ (::FILE *__restrict stream, const char *__restrict format,
+ void *args, size_t size)) {
+ cpp::string_view str(format);
+ if (stream == stdout)
+ return fprintf_impl<RPC_PRINTF_TO_STDOUT>(stream, format, str.size() + 1,
+ args, size);
+ else if (stream == stderr)
+ return fprintf_impl<RPC_PRINTF_TO_STDERR>(stream, format, str.size() + 1,
+ args, size);
+ else
+ return fprintf_impl<RPC_PRINTF_TO_STREAM>(stream, format, str.size() + 1,
+ args, size);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/gpu/rpc_fprintf.h b/libc/src/gpu/rpc_fprintf.h
new file mode 100644
index 0000000000000..7658b214c07c2
--- /dev/null
+++ b/libc/src/gpu/rpc_fprintf.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+
+#include "hdr/types/FILE.h"
+#include "src/__support/macros/config.h"
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format,
+ void *argc, size_t size);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 02b0d436451a3..42a6bac4fa6f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -437,8 +437,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
return false;
auto PrintfFunction = M.getFunction("printf");
- if (!PrintfFunction || !PrintfFunction->isDeclaration() ||
- M.getModuleFlag("openmp"))
+ if (!PrintfFunction || !PrintfFunction->isDeclaration())
return false;
for (auto &U : PrintfFunction->uses()) {
diff --git a/offload/DeviceRTL/include/LibC.h b/offload/DeviceRTL/include/LibC.h
index 59a795cc62e0e..dde86af783af9 100644
--- a/offload/DeviceRTL/include/LibC.h
+++ b/offload/DeviceRTL/include/LibC.h
@@ -18,6 +18,7 @@ extern "C" {
int memcmp(const void *lhs, const void *rhs, size_t count);
void memset(void *dst, int C, size_t count);
+
int printf(const char *format, ...);
}
diff --git a/offload/DeviceRTL/src/LibC.cpp b/offload/DeviceRTL/src/LibC.cpp
index 291ceb023a69c..4bca5d29643fe 100644
--- a/offload/DeviceRTL/src/LibC.cpp
+++ b/offload/DeviceRTL/src/LibC.cpp
@@ -11,33 +11,44 @@
#pragma omp begin declare target device_type(nohost)
namespace impl {
-int32_t omp_vprintf(const char *Format, __builtin_va_list vlist);
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t);
}
-#ifndef OMPTARGET_HAS_LIBC
-namespace impl {
#pragma omp begin declare variant match( \
device = {arch(nvptx, nvptx64)}, \
implementation = {extension(match_any)})
-extern "C" int vprintf(const char *format, ...);
-int omp_vprintf(const char *Format, __builtin_va_list vlist) {
- return vprintf(Format, vlist);
+extern "C" int32_t vprintf(const char *, void *);
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+ return vprintf(Format, Arguments);
}
+} // namespace impl
#pragma omp end declare variant
#pragma omp begin declare variant match(device = {arch(amdgcn)})
-int omp_vprintf(const char *Format, __builtin_va_list) { return -1; }
-#pragma omp end declare variant
-} // namespace impl
-extern "C" int printf(const char *Format, ...) {
- __builtin_va_list vlist;
- __builtin_va_start(vlist, Format);
- return impl::omp_vprintf(Format, vlist);
+#ifdef OMPTARGET_HAS_LIBC
+// TODO: Remove this handling once we have varargs support.
+extern "C" struct FILE *stdout;
+extern "C" int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t);
+
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
+ return rpc_fprintf(stdout, Format, Arguments, Size);
}
-#endif // OMPTARGET_HAS_LIBC
+} // namespace impl
+#else
+// We do not have a vprintf implementation for AMD GPU so we use a stub.
+namespace impl {
+int32_t omp_vprintf(const char *Format, void *Arguments, uint32_t) {
+ return -1;
+}
+} // namespace impl
+#endif
+#pragma omp end declare variant
extern "C" {
+
[[gnu::weak]] int memcmp(const void *lhs, const void *rhs, size_t count) {
auto *L = reinterpret_cast<const unsigned char *>(lhs);
auto *R = reinterpret_cast<const unsigned char *>(rhs);
@@ -54,6 +65,11 @@ extern "C" {
for (size_t I = 0; I < count; ++I)
dstc[I] = C;
}
+
+/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
+int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
+ return impl::omp_vprintf(Format, Arguments, Size);
+}
}
#pragma omp end declare target
More information about the cfe-commits
mailing list