[flang] [llvm] [flang-rt] Implement basic support for I/O from OpenMP GPU Offloading (PR #181039)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 12 13:49:06 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-offload

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
This PR provides the minimal support for Fortran I/O coming from a GPU
in OpenMP offloading. We use the same support the `libc` uses for its
printing through the RPC server. The helper functions `rpc::dispatch`
and `rpc::invoke` help make this mostly automatic.

Becaus Fortran I/O is not reentrant, the vast majority of complexity
comes from needing to stitch together calls from the GPU until they can
be executed all at once. This is needed not only because of the
limitations of recursive I/O, but without this the output would all be
interleaved because of the GPU's lock-step execution.

As such, the return values from the intermediate functions are
meaningless, all returning true. The final value is correct however. For
cookies we create a context pointer on the server to chain these
together.

**NOTE** This does not fully implement support. We export a new runtime
function call that does the handling, but it is not automaticaly
registered. To get this to work *now* you will need something like the
following linked into your program. I will add this later if people are
in support of this design as to not clutter the flang-rt changes.
```c
extern "C" void __tgt_register_rpc_callback(unsigned (*Callback)(void *,
                                                                 unsigned));
extern "C" uint32_t _FortranAioHandleRPCOpcodes(void *raw, uint32_t numLanes);

[[gnu::constructor]] void __my_startup() {
  __tgt_register_rpc_callback(&_FortranAioHandleRPCOpcodes);
};
```


---

Patch is 22.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/181039.diff


13 Files Affected:

- (modified) flang-rt/cmake/modules/AddFlangRT.cmake (+5-2) 
- (modified) flang-rt/include/flang-rt/runtime/memory.h (+1-1) 
- (modified) flang-rt/lib/runtime/CMakeLists.txt (+2) 
- (added) flang-rt/lib/runtime/io-api-gpu.cpp (+99) 
- (added) flang-rt/lib/runtime/io-api-gpu.h (+82) 
- (added) flang-rt/lib/runtime/io-api-server.cpp (+232) 
- (modified) flang-rt/lib/runtime/memory.cpp (+9) 
- (modified) flang/include/flang/Runtime/io-api.h (+2) 
- (modified) offload/plugins-nextgen/common/CMakeLists.txt (+4) 
- (modified) offload/plugins-nextgen/common/src/RPC.cpp (+10) 
- (modified) offload/test/lit.cfg (-2) 
- (added) offload/test/offloading/fortran/io.f90 (+58) 
- (modified) runtimes/CMakeLists.txt (+1-1) 


``````````diff
diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake
index 923507764d691..fd042f5ed9f63 100644
--- a/flang-rt/cmake/modules/AddFlangRT.cmake
+++ b/flang-rt/cmake/modules/AddFlangRT.cmake
@@ -122,6 +122,9 @@ function (add_flangrt_library name)
     list(APPEND extra_args EXCLUDE_FROM_ALL)
   endif ()
 
+  # Include the RPC utilities from the `libc` project.
+  include(FindLibcCommonUtils)
+
   # Also add header files to IDEs to list as part of the library.
   set_source_files_properties(${ARG_ADDITIONAL_HEADERS} PROPERTIES HEADER_FILE_ONLY ON)
 
@@ -139,11 +142,11 @@ function (add_flangrt_library name)
   endif ()
   if (build_static)
     add_library("${name_static}" STATIC ${extra_args} ${ARG_ADDITIONAL_HEADERS} ${ARG_UNPARSED_ARGUMENTS})
-    target_link_libraries("${name_static}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-static)
+    target_link_libraries("${name_static}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-static llvm-libc-common-utilities)
   endif ()
   if (build_shared)
     add_library("${name_shared}" SHARED ${extra_args} ${ARG_ADDITIONAL_HEADERS} ${ARG_UNPARSED_ARGUMENTS})
-    target_link_libraries("${name_shared}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-shared)
+    target_link_libraries("${name_shared}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-shared llvm-libc-common-utilities)
     if (Threads_FOUND) 
       target_link_libraries(${name_shared} PUBLIC Threads::Threads)
     endif ()
diff --git a/flang-rt/include/flang-rt/runtime/memory.h b/flang-rt/include/flang-rt/runtime/memory.h
index 93b477afa9814..07c443271903b 100644
--- a/flang-rt/include/flang-rt/runtime/memory.h
+++ b/flang-rt/include/flang-rt/runtime/memory.h
@@ -44,7 +44,7 @@ template <typename A> RT_API_ATTRS void FreeMemoryAndNullify(A *&p) {
 // and does not support array objects with runtime length.
 template <typename A> class OwningPtr {
 public:
-  using pointer_type = A *;
+  using pointer_type = std::remove_extent_t<A> *;
 
   OwningPtr() = default;
   RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index 787d0dbbfb5ca..da990419fe2a8 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -70,6 +70,7 @@ set(supported_sources
   unit.cpp
   utf.cpp
   work-queue.cpp
+  io-api-server.cpp
 )
 
 # List of source not used for GPU offloading.
@@ -137,6 +138,7 @@ set(gpu_sources
   reduce.cpp
   reduction.cpp
   temporary-stack.cpp
+  io-api-gpu.cpp
 )
 
 file(GLOB_RECURSE public_headers
diff --git a/flang-rt/lib/runtime/io-api-gpu.cpp b/flang-rt/lib/runtime/io-api-gpu.cpp
new file mode 100644
index 0000000000000..77a62e07b3877
--- /dev/null
+++ b/flang-rt/lib/runtime/io-api-gpu.cpp
@@ -0,0 +1,99 @@
+//===-- lib/runtime/io-api-gpu.cpp ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Implements the subset of the I/O statement API needed for basic
+// list-directed output (PRINT *) of intrinsic types for the GPU.
+//
+// The RPC interface forwards each runtime call from the client to the server
+// using a shared buffer. These calls are buffered on the server, so only the
+// return value from 'BeginExternalListOutput' and 'EndIoStatement' are
+// meaningful.
+
+#include "io-api-gpu.h"
+#include "flang/Runtime/io-api.h"
+
+#include <shared/rpc.h>
+#include <shared/rpc_dispatch.h>
+
+namespace Fortran::runtime::io {
+// A weak reference to the RPC client used to submit calls to the server.
+[[gnu::weak, gnu::visibility("protected")]] rpc::Client client asm(
+    "__llvm_rpc_client");
+
+RT_EXT_API_GROUP_BEGIN
+
+Cookie IODEF(BeginExternalListOutput)(
+    ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
+  return rpc::dispatch<BeginExternalListOutput_Opcode>(client,
+      IODEF(BeginExternalListOutput), unitNumber, sourceFile, sourceLine);
+}
+
+enum Iostat IODEF(EndIoStatement)(Cookie cookie) {
+  return rpc::dispatch<EndIoStatement_Opcode>(
+      client, IODEF(EndIoStatement), cookie);
+}
+
+bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) {
+  return rpc::dispatch<OutputInteger8_Opcode>(
+      client, IODEF(OutputInteger8), cookie, n);
+}
+
+bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) {
+  return rpc::dispatch<OutputInteger16_Opcode>(
+      client, IODEF(OutputInteger16), cookie, n);
+}
+
+bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) {
+  return rpc::dispatch<OutputInteger32_Opcode>(
+      client, IODEF(OutputInteger32), cookie, n);
+}
+
+bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) {
+  return rpc::dispatch<OutputInteger64_Opcode>(
+      client, IODEF(OutputInteger64), cookie, n);
+}
+
+#ifdef __SIZEOF_INT128__
+bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) {
+  return rpc::dispatch<OutputInteger128_Opcode>(
+      client, IODEF(OutputInteger128), cookie, n);
+}
+#endif
+
+bool IODEF(OutputReal32)(Cookie cookie, float x) {
+  return rpc::dispatch<OutputReal32_Opcode>(
+      client, IODEF(OutputReal32), cookie, x);
+}
+
+bool IODEF(OutputReal64)(Cookie cookie, double x) {
+  return rpc::dispatch<OutputReal64_Opcode>(
+      client, IODEF(OutputReal64), cookie, x);
+}
+
+bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) {
+  return rpc::dispatch<OutputComplex32_Opcode>(
+      client, IODEF(OutputComplex32), cookie, re, im);
+}
+
+bool IODEF(OutputComplex64)(Cookie cookie, double re, double im) {
+  return rpc::dispatch<OutputComplex64_Opcode>(
+      client, IODEF(OutputComplex64), cookie, re, im);
+}
+
+bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) {
+  return rpc::dispatch<OutputAscii_Opcode>(
+      client, IODEF(OutputAscii), cookie, x, length);
+}
+
+bool IODEF(OutputLogical)(Cookie cookie, bool truth) {
+  return rpc::dispatch<OutputLogical_Opcode>(
+      client, IODEF(OutputLogical), cookie, truth);
+}
+
+RT_EXT_API_GROUP_END
+} // namespace Fortran::runtime::io
diff --git a/flang-rt/lib/runtime/io-api-gpu.h b/flang-rt/lib/runtime/io-api-gpu.h
new file mode 100644
index 0000000000000..c2b55f40daf36
--- /dev/null
+++ b/flang-rt/lib/runtime/io-api-gpu.h
@@ -0,0 +1,82 @@
+//===-- lib/runtime/io-api-gpu.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FLANG_RT_RUNTIME_IO_API_GPU_H_
+#define FLANG_RT_RUNTIME_IO_API_GPU_H_
+
+#include "flang-rt/runtime/memory.h"
+#include "flang-rt/runtime/terminator.h"
+#include <cstdint>
+#include <utility>
+
+namespace Fortran::runtime::io {
+// We reserve the RPC opcodes with 'f' in the MSB for Fortran usage.
+constexpr std::uint32_t MakeOpcode(std::uint32_t base) {
+  return ('f' << 24) | base;
+}
+
+// Opcodes shared between the client and server for each function we support.
+constexpr std::uint32_t BeginExternalListOutput_Opcode = MakeOpcode(0);
+constexpr std::uint32_t EndIoStatement_Opcode = MakeOpcode(1);
+constexpr std::uint32_t OutputInteger8_Opcode = MakeOpcode(2);
+constexpr std::uint32_t OutputInteger16_Opcode = MakeOpcode(3);
+constexpr std::uint32_t OutputInteger32_Opcode = MakeOpcode(4);
+constexpr std::uint32_t OutputInteger64_Opcode = MakeOpcode(5);
+constexpr std::uint32_t OutputInteger128_Opcode = MakeOpcode(6);
+constexpr std::uint32_t OutputReal32_Opcode = MakeOpcode(7);
+constexpr std::uint32_t OutputReal64_Opcode = MakeOpcode(8);
+constexpr std::uint32_t OutputComplex32_Opcode = MakeOpcode(9);
+constexpr std::uint32_t OutputComplex64_Opcode = MakeOpcode(10);
+constexpr std::uint32_t OutputAscii_Opcode = MakeOpcode(11);
+constexpr std::uint32_t OutputLogical_Opcode = MakeOpcode(12);
+
+// A simple dynamic array that only supports appending to avoid std::vector.
+template <typename T> struct DynamicArray {
+  ~DynamicArray() {
+    for (std::size_t i = 0; i < size_; ++i) {
+      data_[i].~T();
+    }
+    FreeMemory(data_);
+  }
+
+  void emplace_back(T &&value) {
+    if (size_ == capacity_) {
+      reserve(capacity_ ? capacity_ * 2 : 4);
+    }
+    new (data_ + size_) T(std::move(value));
+    ++size_;
+  }
+
+  void reserve(std::size_t newCap) {
+    if (newCap <= capacity_) {
+      return;
+    }
+    T *new_data = static_cast<T *>(
+        AllocateMemoryOrCrash(terminator_, newCap * sizeof(T)));
+    for (std::size_t i = 0; i < size_; ++i) {
+      new (new_data + i) T(std::move(data_[i]));
+      data_[i].~T();
+    }
+    FreeMemory(data_);
+    data_ = new_data;
+    capacity_ = newCap;
+  }
+
+  T *begin() const { return data_; }
+  T *end() const { return data_ + size_; }
+
+private:
+  T *data_ = nullptr;
+  std::size_t size_ = 0;
+  std::size_t capacity_ = 0;
+  Terminator terminator_{__FILE__, __LINE__};
+};
+
+} // namespace Fortran::runtime::io
+
+#endif // FLANG_RT_RUNTIME_IO_API_GPU_H_
diff --git a/flang-rt/lib/runtime/io-api-server.cpp b/flang-rt/lib/runtime/io-api-server.cpp
new file mode 100644
index 0000000000000..dd0a612572812
--- /dev/null
+++ b/flang-rt/lib/runtime/io-api-server.cpp
@@ -0,0 +1,232 @@
+//===-- lib/runtime/io-api-server.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Implements the RPC server-side handlling of the I/O statement API needed for
+// basic list-directed output (PRINT *) of intrinsic types for the GPU.
+
+#include "io-api-gpu.h"
+#include "flang-rt/runtime/memory.h"
+#include "flang-rt/runtime/terminator.h"
+#include "flang/Runtime/io-api.h"
+#include <cstdlib>
+#include <cstring>
+#include <tuple>
+
+#include <shared/rpc.h>
+#include <shared/rpc_dispatch.h>
+
+namespace Fortran::runtime::io {
+namespace {
+
+// Context used to chain the IO operations once run.
+struct IOContext {
+  Cookie cookie = nullptr;
+  enum Iostat result = IostatOk;
+};
+
+// The virtual base class to store deferred execution of a function.
+struct DeferredFunctionBase {
+  virtual ~DeferredFunctionBase() = default;
+  virtual void execute(IOContext &ctx) = 0;
+
+  static OwningPtr<char[]> TempString(const char *str) {
+    if (!str) {
+      return {};
+    }
+
+    const auto size = std::strlen(str) + 1;
+
+    Terminator terminator{__FILE__, __LINE__};
+    OwningPtr<char> temp = SizedNew<char>{terminator}(size);
+    std::memcpy(temp.get(), str, size);
+    return OwningPtr<char[]>(temp.release());
+  }
+};
+
+// Fortran does not support nested or recursive I/O, which is problematic for
+// parallel execution on a GPU. To support this, we defer execution of runtime
+// functions coming from the GPU's client until the end of that sequence is
+// reached. This allows us to finish them in a single pass.
+template <typename FnTy, typename... Args>
+struct DeferredFunction final : DeferredFunctionBase {
+  FnTy fn_;
+  std::tuple<std::decay_t<Args>...> args_;
+
+  DeferredFunction(FnTy &&fn, Args &&...args)
+      : fn_(std::forward<FnTy>(fn)), args_(std::forward<Args>(args)...) {}
+
+  // When executing the final command queue we need to replace the temporary
+  // values obtained from the GPU with the returned values from the actual
+  // runtime functions.
+  void execute(IOContext &ctx) override {
+    auto caller = [&](auto &&...args) { return fn_(Rewrite(args, ctx)...); };
+
+    using RetTy = std::invoke_result_t<FnTy,
+        decltype(Rewrite(std::declval<Args &>(), ctx))...>;
+    if constexpr (std::is_same_v<RetTy, Cookie>) {
+      ctx.cookie = std::apply(caller, args_);
+    } else if constexpr (std::is_same_v<RetTy, Iostat>) {
+      ctx.result = std::apply(caller, args_);
+    } else {
+      std::apply(caller, args_);
+    }
+  }
+
+private:
+  template <typename T> T &Rewrite(T &v, IOContext &) { return v; }
+
+  Cookie Rewrite(Cookie, IOContext &ctx) {
+    return reinterpret_cast<Cookie>(ctx.cookie);
+  }
+
+  const char *Rewrite(OwningPtr<char[]> &p, IOContext &) { return p.get(); }
+};
+
+template <typename Fn, typename... Args>
+OwningPtr<DeferredFunctionBase> MakeDeferred(Fn &&fn, Args &&...args) {
+  Terminator terminator{__FILE__, __LINE__};
+  using Ty = DeferredFunction<Fn, Args...>;
+  auto derived = SizedNew<Ty>{terminator}(
+      sizeof(Ty), std::forward<Fn>(fn), std::forward<Args>(args)...);
+
+  return OwningPtr<DeferredFunctionBase>{derived.release()};
+}
+
+// The context associated with the queue of deferred functions. This serves as
+// our cookie object while executing this on the GPU.
+struct DeferredContext {
+  IOContext ioCtx;
+  DynamicArray<OwningPtr<DeferredFunctionBase>> commands;
+};
+
+template <typename FnTy, typename... Args>
+bool EnqueueDeferred(FnTy &&fn, Cookie cookie, Args &&...args) {
+  DeferredContext *ctx = reinterpret_cast<DeferredContext *>(cookie);
+  ctx->commands.emplace_back(
+      MakeDeferred(fn, cookie, std::forward<Args>(args)...));
+  return true;
+}
+
+template <std::uint32_t NumLanes>
+rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
+  switch (port.get_opcode()) {
+  case BeginExternalListOutput_Opcode:
+    rpc::invoke<NumLanes>(port,
+        [](ExternalUnit unitNumber, const char *sourceFile,
+            int sourceLine) -> Cookie {
+          DeferredContext *ctx = new (AllocateMemoryOrCrash(
+              Terminator{__FILE__, __LINE__}, sizeof(DeferredContext)))
+              DeferredContext;
+
+          ctx->commands.emplace_back(
+              MakeDeferred(IODECL(BeginExternalListOutput), unitNumber,
+                  DeferredFunctionBase::TempString(sourceFile), sourceLine));
+
+          return reinterpret_cast<Cookie>(ctx);
+        });
+    break;
+  case EndIoStatement_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie) -> Iostat {
+      DeferredContext *ctx = reinterpret_cast<DeferredContext *>(cookie);
+
+      ctx->commands.emplace_back(
+          MakeDeferred(_FortranAioEndIoStatement, cookie));
+      for (auto &fn : ctx->commands)
+        fn->execute(ctx->ioCtx);
+      Iostat result = ctx->ioCtx.result;
+
+      ctx->~DeferredContext();
+      FreeMemory(ctx);
+
+      return result;
+    });
+    break;
+  case OutputAscii_Opcode:
+    rpc::invoke<NumLanes>(
+        port, [](Cookie cookie, const char *x, std::size_t length) -> bool {
+          return EnqueueDeferred(IODECL(OutputAscii), cookie,
+              DeferredFunctionBase::TempString(x), length);
+        });
+    break;
+  case OutputInteger8_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int8_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger8), cookie, n);
+    });
+    break;
+  case OutputInteger16_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int16_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger16), cookie, n);
+    });
+    break;
+  case OutputInteger32_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int32_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger32), cookie, n);
+    });
+    break;
+  case OutputInteger64_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int64_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger64), cookie, n);
+    });
+    break;
+#ifdef __SIZEOF_INT128__
+  case OutputInteger128_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, common::int128_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger128), cookie, n);
+    });
+    break;
+#endif
+  case OutputReal32_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, float x) -> bool {
+      return EnqueueDeferred(IODECL(OutputReal32), cookie, x);
+    });
+    break;
+  case OutputReal64_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, double x) -> bool {
+      return EnqueueDeferred(IODECL(OutputReal64), cookie, x);
+    });
+    break;
+  case OutputComplex32_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, float re, float im) -> bool {
+      return EnqueueDeferred(IODECL(OutputComplex32), cookie, re, im);
+    });
+    break;
+  case OutputComplex64_Opcode:
+    rpc::invoke<NumLanes>(
+        port, [](Cookie cookie, double re, double im) -> bool {
+          return EnqueueDeferred(IODECL(OutputComplex64), cookie, re, im);
+        });
+    break;
+  case OutputLogical_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, bool truth) -> bool {
+      return EnqueueDeferred(IODECL(OutputLogical), cookie, truth);
+    });
+    break;
+  default:
+    return rpc::RPC_UNHANDLED_OPCODE;
+  }
+
+  return rpc::RPC_SUCCESS;
+}
+} // namespace
+
+RT_EXT_API_GROUP_BEGIN
+std::uint32_t IODECL(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) {
+  rpc::Server::Port &Port = *reinterpret_cast<rpc::Server::Port *>(raw);
+  if (numLanes == 1) {
+    return HandleOpcodesImpl<1>(Port);
+  }
+  if (numLanes == 32) {
+    return HandleOpcodesImpl<32>(Port);
+  }
+  if (numLanes == 64) {
+    return HandleOpcodesImpl<64>(Port);
+  }
+  return rpc::RPC_ERROR;
+}
+RT_EXT_API_GROUP_END
+} // namespace Fortran::runtime::io
diff --git a/flang-rt/lib/runtime/memory.cpp b/flang-rt/lib/runtime/memory.cpp
index 79c7e33777569..feb72ea3edbef 100644
--- a/flang-rt/lib/runtime/memory.cpp
+++ b/flang-rt/lib/runtime/memory.cpp
@@ -44,3 +44,12 @@ void FreeMemory(void *p) { std::free(p); }
 
 RT_OFFLOAD_API_GROUP_END
 } // namespace Fortran::runtime
+
+// Freestanding support of C++ sized new / delete usage.
+void *operator new(std::size_t size) {
+  return Fortran::runtime::AllocateMemoryOrCrash(
+      Fortran::runtime::Terminator{__FILE__, __LINE__}, size);
+}
+void operator delete(void *p, std::size_t) noexcept {
+  Fortran::runtime::FreeMemory(p);
+}
diff --git a/flang/include/flang/Runtime/io-api.h b/flang/include/flang/Runtime/io-api.h
index 988fe536705e6..fe49af2f61683 100644
--- a/flang/include/flang/Runtime/io-api.h
+++ b/flang/include/flang/Runtime/io-api.h
@@ -364,6 +364,8 @@ bool IODECL(InquireInteger64)(
 // rather than by terminating the image.
 enum Iostat IODECL(EndIoStatement)(Cookie);
 
+// Used for I/O from the offloading device.
+std::uint32_t IODECL(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes);
 } // extern "C"
 } // namespace Fortran::runtime::io
 
diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt
index 23000783270f8..d02366ee72c26 100644
--- a/offload/plugins-nextgen/common/CMakeLists.txt
+++ b/offload/plugins-nextgen/common/CMakeLists.txt
@@ -35,6 +35,10 @@ endif()
 include(FindLibcCommonUtils)
 target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities)
 
+if (TARGET flang_rt.runtime.static)
+  target_link_libraries(PluginCommon PRIVATE flang_rt.runtime.static)
+endif()
+
 # Define the TARGET_NAME and DEBUG_PREFIX.
 target_compile_definitions(PluginCommon PRIVATE
   TARGET_NAME=PluginInterface
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 234bd10614654..c7c435a03274e 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -17,6 +17,10 @@
 #include "shared/rpc_opcodes.h"
 #include "shared/rpc_server.h"
 
+#if __has_include("flang/Runtime/io-api.h")
+#include "flang/Runtime/io-api.h"
+#endif
+
 using namespace llvm;
 using namespace omp;
 using namespace target;
@@ -111,6 +115,12 @@ runServer(plugin::GenericDeviceTy &Device, void *Buffer,
   if (Status == rpc::RPC_UNHANDLED_OPCODE)
     Status = LIBC_NAMESPACE::shared::handle_libc_opcodes(*Port, NumLanes);
 
+#if __has_include("flang/Runtime/io-api.h")
+  if (Status == rpc::RPC_UNHANDLED_OPCODE)
+    Status = static_cast<rpc::Status>(
+        Fortran::runtime::io::IODECL(HandleRPCOpcodes)(&*Port, NumLanes));
+#endif
+
   Port->close();
   return St...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/181039


More information about the llvm-commits mailing list