[flang] [llvm] [flang-rt] Implement basic support for I/O from OpenMP GPU Offloading (PR #181039)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 14 07:14:04 PST 2026


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/181039

>From 0a066e3882d5413909eff6ec4cda7022e94b6547 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 11 Feb 2026 08:13:41 -0600
Subject: [PATCH 1/8] [flang-rt] Implement basic support for I/O from OpenMP
 GPU Offloading

Summary:
This PR provides the minimal support for Fortran I/O coming from a GPU
in OpenMP offloading. We use the same support the `libc` uses for its
printing through the RPC server. The helper functions `rpc::dispatch`
and `rpc::invoke` help make this mostly automatic.

Becaus Fortran I/O is not reentrant, the vast majority of complexity
comes from needing to stitch together calls from the GPU until they can
be executed all at once. This is needed not only because of the
limitations of recursive I/O, but without this the output would all be
interleaved because of the GPU's lock-step execution.

As such, the return values from the intermediate functions are
meaningless, all returning true. The final value is correct however. For
cookies we create a context pointer on the server to chain these
together.

**NOTE** This does not fully implement support. We export a new runtime
function call that does the handling, but it is not automaticaly
registered. To get this to work *now* you will need something like the
following linked into your program. I will add this later if people are
in support of this design as to not clutter the flang-rt changes.
```c
extern "C" void __tgt_register_rpc_callback(unsigned (*Callback)(void *,
                                                                 unsigned));
extern "C" uint32_t _FortranAioHandleRPCOpcodes(void *raw, uint32_t numLanes);

[[gnu::constructor]] void __my_startup() {
  __tgt_register_rpc_callback(&_FortranAioHandleRPCOpcodes);
};
```
---
 flang-rt/cmake/modules/AddFlangRT.cmake    |   7 +-
 flang-rt/include/flang-rt/runtime/memory.h |   2 +-
 flang-rt/lib/runtime/CMakeLists.txt        |   2 +
 flang-rt/lib/runtime/io-api-gpu.cpp        |  99 +++++++++
 flang-rt/lib/runtime/io-api-gpu.h          |  82 ++++++++
 flang-rt/lib/runtime/io-api-server.cpp     | 232 +++++++++++++++++++++
 flang-rt/lib/runtime/memory.cpp            |   9 +
 flang/include/flang/Runtime/io-api.h       |   2 +
 8 files changed, 432 insertions(+), 3 deletions(-)
 create mode 100644 flang-rt/lib/runtime/io-api-gpu.cpp
 create mode 100644 flang-rt/lib/runtime/io-api-gpu.h
 create mode 100644 flang-rt/lib/runtime/io-api-server.cpp

diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake
index 923507764d691..fd042f5ed9f63 100644
--- a/flang-rt/cmake/modules/AddFlangRT.cmake
+++ b/flang-rt/cmake/modules/AddFlangRT.cmake
@@ -122,6 +122,9 @@ function (add_flangrt_library name)
     list(APPEND extra_args EXCLUDE_FROM_ALL)
   endif ()
 
+  # Include the RPC utilities from the `libc` project.
+  include(FindLibcCommonUtils)
+
   # Also add header files to IDEs to list as part of the library.
   set_source_files_properties(${ARG_ADDITIONAL_HEADERS} PROPERTIES HEADER_FILE_ONLY ON)
 
@@ -139,11 +142,11 @@ function (add_flangrt_library name)
   endif ()
   if (build_static)
     add_library("${name_static}" STATIC ${extra_args} ${ARG_ADDITIONAL_HEADERS} ${ARG_UNPARSED_ARGUMENTS})
-    target_link_libraries("${name_static}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-static)
+    target_link_libraries("${name_static}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-static llvm-libc-common-utilities)
   endif ()
   if (build_shared)
     add_library("${name_shared}" SHARED ${extra_args} ${ARG_ADDITIONAL_HEADERS} ${ARG_UNPARSED_ARGUMENTS})
-    target_link_libraries("${name_shared}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-shared)
+    target_link_libraries("${name_shared}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-shared llvm-libc-common-utilities)
     if (Threads_FOUND) 
       target_link_libraries(${name_shared} PUBLIC Threads::Threads)
     endif ()
diff --git a/flang-rt/include/flang-rt/runtime/memory.h b/flang-rt/include/flang-rt/runtime/memory.h
index 93b477afa9814..07c443271903b 100644
--- a/flang-rt/include/flang-rt/runtime/memory.h
+++ b/flang-rt/include/flang-rt/runtime/memory.h
@@ -44,7 +44,7 @@ template <typename A> RT_API_ATTRS void FreeMemoryAndNullify(A *&p) {
 // and does not support array objects with runtime length.
 template <typename A> class OwningPtr {
 public:
-  using pointer_type = A *;
+  using pointer_type = std::remove_extent_t<A> *;
 
   OwningPtr() = default;
   RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index 787d0dbbfb5ca..da990419fe2a8 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -70,6 +70,7 @@ set(supported_sources
   unit.cpp
   utf.cpp
   work-queue.cpp
+  io-api-server.cpp
 )
 
 # List of source not used for GPU offloading.
@@ -137,6 +138,7 @@ set(gpu_sources
   reduce.cpp
   reduction.cpp
   temporary-stack.cpp
+  io-api-gpu.cpp
 )
 
 file(GLOB_RECURSE public_headers
diff --git a/flang-rt/lib/runtime/io-api-gpu.cpp b/flang-rt/lib/runtime/io-api-gpu.cpp
new file mode 100644
index 0000000000000..77a62e07b3877
--- /dev/null
+++ b/flang-rt/lib/runtime/io-api-gpu.cpp
@@ -0,0 +1,99 @@
+//===-- lib/runtime/io-api-gpu.cpp ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Implements the subset of the I/O statement API needed for basic
+// list-directed output (PRINT *) of intrinsic types for the GPU.
+//
+// The RPC interface forwards each runtime call from the client to the server
+// using a shared buffer. These calls are buffered on the server, so only the
+// return value from 'BeginExternalListOutput' and 'EndIoStatement' are
+// meaningful.
+
+#include "io-api-gpu.h"
+#include "flang/Runtime/io-api.h"
+
+#include <shared/rpc.h>
+#include <shared/rpc_dispatch.h>
+
+namespace Fortran::runtime::io {
+// A weak reference to the RPC client used to submit calls to the server.
+[[gnu::weak, gnu::visibility("protected")]] rpc::Client client asm(
+    "__llvm_rpc_client");
+
+RT_EXT_API_GROUP_BEGIN
+
+Cookie IODEF(BeginExternalListOutput)(
+    ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
+  return rpc::dispatch<BeginExternalListOutput_Opcode>(client,
+      IODEF(BeginExternalListOutput), unitNumber, sourceFile, sourceLine);
+}
+
+enum Iostat IODEF(EndIoStatement)(Cookie cookie) {
+  return rpc::dispatch<EndIoStatement_Opcode>(
+      client, IODEF(EndIoStatement), cookie);
+}
+
+bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) {
+  return rpc::dispatch<OutputInteger8_Opcode>(
+      client, IODEF(OutputInteger8), cookie, n);
+}
+
+bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) {
+  return rpc::dispatch<OutputInteger16_Opcode>(
+      client, IODEF(OutputInteger16), cookie, n);
+}
+
+bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) {
+  return rpc::dispatch<OutputInteger32_Opcode>(
+      client, IODEF(OutputInteger32), cookie, n);
+}
+
+bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) {
+  return rpc::dispatch<OutputInteger64_Opcode>(
+      client, IODEF(OutputInteger64), cookie, n);
+}
+
+#ifdef __SIZEOF_INT128__
+bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) {
+  return rpc::dispatch<OutputInteger128_Opcode>(
+      client, IODEF(OutputInteger128), cookie, n);
+}
+#endif
+
+bool IODEF(OutputReal32)(Cookie cookie, float x) {
+  return rpc::dispatch<OutputReal32_Opcode>(
+      client, IODEF(OutputReal32), cookie, x);
+}
+
+bool IODEF(OutputReal64)(Cookie cookie, double x) {
+  return rpc::dispatch<OutputReal64_Opcode>(
+      client, IODEF(OutputReal64), cookie, x);
+}
+
+bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) {
+  return rpc::dispatch<OutputComplex32_Opcode>(
+      client, IODEF(OutputComplex32), cookie, re, im);
+}
+
+bool IODEF(OutputComplex64)(Cookie cookie, double re, double im) {
+  return rpc::dispatch<OutputComplex64_Opcode>(
+      client, IODEF(OutputComplex64), cookie, re, im);
+}
+
+bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) {
+  return rpc::dispatch<OutputAscii_Opcode>(
+      client, IODEF(OutputAscii), cookie, x, length);
+}
+
+bool IODEF(OutputLogical)(Cookie cookie, bool truth) {
+  return rpc::dispatch<OutputLogical_Opcode>(
+      client, IODEF(OutputLogical), cookie, truth);
+}
+
+RT_EXT_API_GROUP_END
+} // namespace Fortran::runtime::io
diff --git a/flang-rt/lib/runtime/io-api-gpu.h b/flang-rt/lib/runtime/io-api-gpu.h
new file mode 100644
index 0000000000000..c2b55f40daf36
--- /dev/null
+++ b/flang-rt/lib/runtime/io-api-gpu.h
@@ -0,0 +1,82 @@
+//===-- lib/runtime/io-api-gpu.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FLANG_RT_RUNTIME_IO_API_GPU_H_
+#define FLANG_RT_RUNTIME_IO_API_GPU_H_
+
+#include "flang-rt/runtime/memory.h"
+#include "flang-rt/runtime/terminator.h"
+#include <cstdint>
+#include <utility>
+
+namespace Fortran::runtime::io {
+// We reserve the RPC opcodes with 'f' in the MSB for Fortran usage.
+constexpr std::uint32_t MakeOpcode(std::uint32_t base) {
+  return ('f' << 24) | base;
+}
+
+// Opcodes shared between the client and server for each function we support.
+constexpr std::uint32_t BeginExternalListOutput_Opcode = MakeOpcode(0);
+constexpr std::uint32_t EndIoStatement_Opcode = MakeOpcode(1);
+constexpr std::uint32_t OutputInteger8_Opcode = MakeOpcode(2);
+constexpr std::uint32_t OutputInteger16_Opcode = MakeOpcode(3);
+constexpr std::uint32_t OutputInteger32_Opcode = MakeOpcode(4);
+constexpr std::uint32_t OutputInteger64_Opcode = MakeOpcode(5);
+constexpr std::uint32_t OutputInteger128_Opcode = MakeOpcode(6);
+constexpr std::uint32_t OutputReal32_Opcode = MakeOpcode(7);
+constexpr std::uint32_t OutputReal64_Opcode = MakeOpcode(8);
+constexpr std::uint32_t OutputComplex32_Opcode = MakeOpcode(9);
+constexpr std::uint32_t OutputComplex64_Opcode = MakeOpcode(10);
+constexpr std::uint32_t OutputAscii_Opcode = MakeOpcode(11);
+constexpr std::uint32_t OutputLogical_Opcode = MakeOpcode(12);
+
+// A simple dynamic array that only supports appending to avoid std::vector.
+template <typename T> struct DynamicArray {
+  ~DynamicArray() {
+    for (std::size_t i = 0; i < size_; ++i) {
+      data_[i].~T();
+    }
+    FreeMemory(data_);
+  }
+
+  void emplace_back(T &&value) {
+    if (size_ == capacity_) {
+      reserve(capacity_ ? capacity_ * 2 : 4);
+    }
+    new (data_ + size_) T(std::move(value));
+    ++size_;
+  }
+
+  void reserve(std::size_t newCap) {
+    if (newCap <= capacity_) {
+      return;
+    }
+    T *new_data = static_cast<T *>(
+        AllocateMemoryOrCrash(terminator_, newCap * sizeof(T)));
+    for (std::size_t i = 0; i < size_; ++i) {
+      new (new_data + i) T(std::move(data_[i]));
+      data_[i].~T();
+    }
+    FreeMemory(data_);
+    data_ = new_data;
+    capacity_ = newCap;
+  }
+
+  T *begin() const { return data_; }
+  T *end() const { return data_ + size_; }
+
+private:
+  T *data_ = nullptr;
+  std::size_t size_ = 0;
+  std::size_t capacity_ = 0;
+  Terminator terminator_{__FILE__, __LINE__};
+};
+
+} // namespace Fortran::runtime::io
+
+#endif // FLANG_RT_RUNTIME_IO_API_GPU_H_
diff --git a/flang-rt/lib/runtime/io-api-server.cpp b/flang-rt/lib/runtime/io-api-server.cpp
new file mode 100644
index 0000000000000..dd0a612572812
--- /dev/null
+++ b/flang-rt/lib/runtime/io-api-server.cpp
@@ -0,0 +1,232 @@
+//===-- lib/runtime/io-api-server.cpp ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Implements the RPC server-side handlling of the I/O statement API needed for
+// basic list-directed output (PRINT *) of intrinsic types for the GPU.
+
+#include "io-api-gpu.h"
+#include "flang-rt/runtime/memory.h"
+#include "flang-rt/runtime/terminator.h"
+#include "flang/Runtime/io-api.h"
+#include <cstdlib>
+#include <cstring>
+#include <tuple>
+
+#include <shared/rpc.h>
+#include <shared/rpc_dispatch.h>
+
+namespace Fortran::runtime::io {
+namespace {
+
+// Context used to chain the IO operations once run.
+struct IOContext {
+  Cookie cookie = nullptr;
+  enum Iostat result = IostatOk;
+};
+
+// The virtual base class to store deferred execution of a function.
+struct DeferredFunctionBase {
+  virtual ~DeferredFunctionBase() = default;
+  virtual void execute(IOContext &ctx) = 0;
+
+  static OwningPtr<char[]> TempString(const char *str) {
+    if (!str) {
+      return {};
+    }
+
+    const auto size = std::strlen(str) + 1;
+
+    Terminator terminator{__FILE__, __LINE__};
+    OwningPtr<char> temp = SizedNew<char>{terminator}(size);
+    std::memcpy(temp.get(), str, size);
+    return OwningPtr<char[]>(temp.release());
+  }
+};
+
+// Fortran does not support nested or recursive I/O, which is problematic for
+// parallel execution on a GPU. To support this, we defer execution of runtime
+// functions coming from the GPU's client until the end of that sequence is
+// reached. This allows us to finish them in a single pass.
+template <typename FnTy, typename... Args>
+struct DeferredFunction final : DeferredFunctionBase {
+  FnTy fn_;
+  std::tuple<std::decay_t<Args>...> args_;
+
+  DeferredFunction(FnTy &&fn, Args &&...args)
+      : fn_(std::forward<FnTy>(fn)), args_(std::forward<Args>(args)...) {}
+
+  // When executing the final command queue we need to replace the temporary
+  // values obtained from the GPU with the returned values from the actual
+  // runtime functions.
+  void execute(IOContext &ctx) override {
+    auto caller = [&](auto &&...args) { return fn_(Rewrite(args, ctx)...); };
+
+    using RetTy = std::invoke_result_t<FnTy,
+        decltype(Rewrite(std::declval<Args &>(), ctx))...>;
+    if constexpr (std::is_same_v<RetTy, Cookie>) {
+      ctx.cookie = std::apply(caller, args_);
+    } else if constexpr (std::is_same_v<RetTy, Iostat>) {
+      ctx.result = std::apply(caller, args_);
+    } else {
+      std::apply(caller, args_);
+    }
+  }
+
+private:
+  template <typename T> T &Rewrite(T &v, IOContext &) { return v; }
+
+  Cookie Rewrite(Cookie, IOContext &ctx) {
+    return reinterpret_cast<Cookie>(ctx.cookie);
+  }
+
+  const char *Rewrite(OwningPtr<char[]> &p, IOContext &) { return p.get(); }
+};
+
+template <typename Fn, typename... Args>
+OwningPtr<DeferredFunctionBase> MakeDeferred(Fn &&fn, Args &&...args) {
+  Terminator terminator{__FILE__, __LINE__};
+  using Ty = DeferredFunction<Fn, Args...>;
+  auto derived = SizedNew<Ty>{terminator}(
+      sizeof(Ty), std::forward<Fn>(fn), std::forward<Args>(args)...);
+
+  return OwningPtr<DeferredFunctionBase>{derived.release()};
+}
+
+// The context associated with the queue of deferred functions. This serves as
+// our cookie object while executing this on the GPU.
+struct DeferredContext {
+  IOContext ioCtx;
+  DynamicArray<OwningPtr<DeferredFunctionBase>> commands;
+};
+
+template <typename FnTy, typename... Args>
+bool EnqueueDeferred(FnTy &&fn, Cookie cookie, Args &&...args) {
+  DeferredContext *ctx = reinterpret_cast<DeferredContext *>(cookie);
+  ctx->commands.emplace_back(
+      MakeDeferred(fn, cookie, std::forward<Args>(args)...));
+  return true;
+}
+
+template <std::uint32_t NumLanes>
+rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
+  switch (port.get_opcode()) {
+  case BeginExternalListOutput_Opcode:
+    rpc::invoke<NumLanes>(port,
+        [](ExternalUnit unitNumber, const char *sourceFile,
+            int sourceLine) -> Cookie {
+          DeferredContext *ctx = new (AllocateMemoryOrCrash(
+              Terminator{__FILE__, __LINE__}, sizeof(DeferredContext)))
+              DeferredContext;
+
+          ctx->commands.emplace_back(
+              MakeDeferred(IODECL(BeginExternalListOutput), unitNumber,
+                  DeferredFunctionBase::TempString(sourceFile), sourceLine));
+
+          return reinterpret_cast<Cookie>(ctx);
+        });
+    break;
+  case EndIoStatement_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie) -> Iostat {
+      DeferredContext *ctx = reinterpret_cast<DeferredContext *>(cookie);
+
+      ctx->commands.emplace_back(
+          MakeDeferred(_FortranAioEndIoStatement, cookie));
+      for (auto &fn : ctx->commands)
+        fn->execute(ctx->ioCtx);
+      Iostat result = ctx->ioCtx.result;
+
+      ctx->~DeferredContext();
+      FreeMemory(ctx);
+
+      return result;
+    });
+    break;
+  case OutputAscii_Opcode:
+    rpc::invoke<NumLanes>(
+        port, [](Cookie cookie, const char *x, std::size_t length) -> bool {
+          return EnqueueDeferred(IODECL(OutputAscii), cookie,
+              DeferredFunctionBase::TempString(x), length);
+        });
+    break;
+  case OutputInteger8_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int8_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger8), cookie, n);
+    });
+    break;
+  case OutputInteger16_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int16_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger16), cookie, n);
+    });
+    break;
+  case OutputInteger32_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int32_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger32), cookie, n);
+    });
+    break;
+  case OutputInteger64_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int64_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger64), cookie, n);
+    });
+    break;
+#ifdef __SIZEOF_INT128__
+  case OutputInteger128_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, common::int128_t n) -> bool {
+      return EnqueueDeferred(IODECL(OutputInteger128), cookie, n);
+    });
+    break;
+#endif
+  case OutputReal32_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, float x) -> bool {
+      return EnqueueDeferred(IODECL(OutputReal32), cookie, x);
+    });
+    break;
+  case OutputReal64_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, double x) -> bool {
+      return EnqueueDeferred(IODECL(OutputReal64), cookie, x);
+    });
+    break;
+  case OutputComplex32_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, float re, float im) -> bool {
+      return EnqueueDeferred(IODECL(OutputComplex32), cookie, re, im);
+    });
+    break;
+  case OutputComplex64_Opcode:
+    rpc::invoke<NumLanes>(
+        port, [](Cookie cookie, double re, double im) -> bool {
+          return EnqueueDeferred(IODECL(OutputComplex64), cookie, re, im);
+        });
+    break;
+  case OutputLogical_Opcode:
+    rpc::invoke<NumLanes>(port, [](Cookie cookie, bool truth) -> bool {
+      return EnqueueDeferred(IODECL(OutputLogical), cookie, truth);
+    });
+    break;
+  default:
+    return rpc::RPC_UNHANDLED_OPCODE;
+  }
+
+  return rpc::RPC_SUCCESS;
+}
+} // namespace
+
+RT_EXT_API_GROUP_BEGIN
+std::uint32_t IODECL(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) {
+  rpc::Server::Port &Port = *reinterpret_cast<rpc::Server::Port *>(raw);
+  if (numLanes == 1) {
+    return HandleOpcodesImpl<1>(Port);
+  }
+  if (numLanes == 32) {
+    return HandleOpcodesImpl<32>(Port);
+  }
+  if (numLanes == 64) {
+    return HandleOpcodesImpl<64>(Port);
+  }
+  return rpc::RPC_ERROR;
+}
+RT_EXT_API_GROUP_END
+} // namespace Fortran::runtime::io
diff --git a/flang-rt/lib/runtime/memory.cpp b/flang-rt/lib/runtime/memory.cpp
index 79c7e33777569..feb72ea3edbef 100644
--- a/flang-rt/lib/runtime/memory.cpp
+++ b/flang-rt/lib/runtime/memory.cpp
@@ -44,3 +44,12 @@ void FreeMemory(void *p) { std::free(p); }
 
 RT_OFFLOAD_API_GROUP_END
 } // namespace Fortran::runtime
+
+// Freestanding support of C++ sized new / delete usage.
+void *operator new(std::size_t size) {
+  return Fortran::runtime::AllocateMemoryOrCrash(
+      Fortran::runtime::Terminator{__FILE__, __LINE__}, size);
+}
+void operator delete(void *p, std::size_t) noexcept {
+  Fortran::runtime::FreeMemory(p);
+}
diff --git a/flang/include/flang/Runtime/io-api.h b/flang/include/flang/Runtime/io-api.h
index 988fe536705e6..fe49af2f61683 100644
--- a/flang/include/flang/Runtime/io-api.h
+++ b/flang/include/flang/Runtime/io-api.h
@@ -364,6 +364,8 @@ bool IODECL(InquireInteger64)(
 // rather than by terminating the image.
 enum Iostat IODECL(EndIoStatement)(Cookie);
 
+// Used for I/O from the offloading device.
+std::uint32_t IODECL(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes);
 } // extern "C"
 } // namespace Fortran::runtime::io
 

>From 8f430145100306cecf5ec033ee62f71d04fb753f Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 12 Feb 2026 15:44:16 -0600
Subject: [PATCH 2/8] Link in and use the runtime call from the RPC handler

---
 offload/plugins-nextgen/common/CMakeLists.txt |  4 ++
 offload/plugins-nextgen/common/src/RPC.cpp    | 10 ++++
 offload/test/lit.cfg                          |  2 -
 offload/test/offloading/fortran/io.f90        | 58 +++++++++++++++++++
 runtimes/CMakeLists.txt                       |  2 +-
 5 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 offload/test/offloading/fortran/io.f90

diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt
index 23000783270f8..d02366ee72c26 100644
--- a/offload/plugins-nextgen/common/CMakeLists.txt
+++ b/offload/plugins-nextgen/common/CMakeLists.txt
@@ -35,6 +35,10 @@ endif()
 include(FindLibcCommonUtils)
 target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities)
 
+if (TARGET flang_rt.runtime.static)
+  target_link_libraries(PluginCommon PRIVATE flang_rt.runtime.static)
+endif()
+
 # Define the TARGET_NAME and DEBUG_PREFIX.
 target_compile_definitions(PluginCommon PRIVATE
   TARGET_NAME=PluginInterface
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 234bd10614654..c7c435a03274e 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -17,6 +17,10 @@
 #include "shared/rpc_opcodes.h"
 #include "shared/rpc_server.h"
 
+#if __has_include("flang/Runtime/io-api.h")
+#include "flang/Runtime/io-api.h"
+#endif
+
 using namespace llvm;
 using namespace omp;
 using namespace target;
@@ -111,6 +115,12 @@ runServer(plugin::GenericDeviceTy &Device, void *Buffer,
   if (Status == rpc::RPC_UNHANDLED_OPCODE)
     Status = LIBC_NAMESPACE::shared::handle_libc_opcodes(*Port, NumLanes);
 
+#if __has_include("flang/Runtime/io-api.h")
+  if (Status == rpc::RPC_UNHANDLED_OPCODE)
+    Status = static_cast<rpc::Status>(
+        Fortran::runtime::io::IODECL(HandleRPCOpcodes)(&*Port, NumLanes));
+#endif
+
   Port->close();
   return Status;
 }
diff --git a/offload/test/lit.cfg b/offload/test/lit.cfg
index 0d5a9c95c1d95..0e0d9abd3d8ff 100644
--- a/offload/test/lit.cfg
+++ b/offload/test/lit.cfg
@@ -166,8 +166,6 @@ elif config.operating_system == 'Darwin':
     config.test_flags += " -Wl,-rpath," + config.library_dir
     config.test_flags += " -Wl,-rpath," + config.omp_host_rtl_directory
 else: # Unices
-    if config.libomptarget_current_target != "nvptx64-nvidia-cuda":
-        config.test_flags += " -nogpulib"
     config.test_flags += " -Wl,-rpath," + config.library_dir
     config.test_flags += " -Wl,-rpath," + config.omp_host_rtl_directory
     config.test_flags += " -Wl,-rpath," + config.llvm_library_intdir
diff --git a/offload/test/offloading/fortran/io.f90 b/offload/test/offloading/fortran/io.f90
new file mode 100644
index 0000000000000..4b7612b37bf70
--- /dev/null
+++ b/offload/test/offloading/fortran/io.f90
@@ -0,0 +1,58 @@
+! REQUIRES: flang, libc
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+! REQUIRES: flang, libc
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+
+program hello_gpu
+  implicit none
+
+  integer :: i
+  real :: r
+  complex :: c
+  logical :: l
+
+  i = 42
+  r = 3.14
+  c = (1.0, -1.0)
+  l = .true.
+
+  ! CHECK: Hello from GPU
+  ! CHECK: Hello from GPU
+  ! CHECK: Hello from GPU
+  ! CHECK: Hello from GPU
+  !$omp target teams num_teams(4)
+  !$omp parallel num_threads(1)
+    print *, "Hello from GPU"
+  !$omp end parallel
+  !$omp end target teams
+
+  ! CHECK: 42
+  !$omp target teams num_teams(1)
+  !$omp parallel num_threads(1)
+    print *, i
+  !$omp end parallel
+  !$omp end target teams
+
+  ! CHECK: 3.14
+  !$omp target teams num_teams(1)
+  !$omp parallel num_threads(1)
+    print *, r
+  !$omp end parallel
+  !$omp end target teams
+
+  ! CHECK: (1.,-1.)
+  !$omp target teams num_teams(1)
+  !$omp parallel num_threads(1)
+    print *, c
+  !$omp end parallel
+  !$omp end target teams
+
+  ! CHECK: T
+  !$omp target teams num_teams(1)
+  !$omp parallel num_threads(1)
+    print *, l
+  !$omp end parallel
+  !$omp end target teams
+
+end program hello_gpu
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index 33f6da32fe6ff..aae5bc78a12f7 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -36,7 +36,7 @@ list(INSERT CMAKE_MODULE_PATH 0
 # We order libraries to mirror roughly how they are layered, except that compiler-rt can depend
 # on libc++, so we put it after.
 set(LLVM_DEFAULT_RUNTIMES "libc;libunwind;libcxxabi;libcxx;compiler-rt;libclc;openmp;offload")
-set(LLVM_SUPPORTED_RUNTIMES "${LLVM_DEFAULT_RUNTIMES};llvm-libgcc;flang-rt;libsycl;orc-rt")
+set(LLVM_SUPPORTED_RUNTIMES "libc;libunwind;libcxxabi;libcxx;compiler-rt;libclc;openmp;flang-rt;offload;llvm-libgcc;libsycl;orc-rt")
 set(LLVM_ENABLE_RUNTIMES "" CACHE STRING
   "Semicolon-separated list of runtimes to build, or \"all\" (${LLVM_DEFAULT_RUNTIMES}). Supported runtimes are ${LLVM_SUPPORTED_RUNTIMES}.")
 if(LLVM_ENABLE_RUNTIMES STREQUAL "all" )

>From 16184541ea3b7d878548e1a1fb8de07f609e7429 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 12 Feb 2026 22:06:10 -0600
Subject: [PATCH 3/8] update test

---
 offload/test/offloading/fortran/io.f90 | 45 ++++----------------------
 1 file changed, 7 insertions(+), 38 deletions(-)

diff --git a/offload/test/offloading/fortran/io.f90 b/offload/test/offloading/fortran/io.f90
index 4b7612b37bf70..67bf5e915a20a 100644
--- a/offload/test/offloading/fortran/io.f90
+++ b/offload/test/offloading/fortran/io.f90
@@ -1,10 +1,7 @@
 ! REQUIRES: flang, libc
 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
 
-! REQUIRES: flang, libc
-! RUN: %libomptarget-compile-fortran-run-and-check-generic
-
-program hello_gpu
+program io_test
   implicit none
 
   integer :: i
@@ -17,42 +14,14 @@ program hello_gpu
   c = (1.0, -1.0)
   l = .true.
 
-  ! CHECK: Hello from GPU
-  ! CHECK: Hello from GPU
-  ! CHECK: Hello from GPU
-  ! CHECK: Hello from GPU
+  ! CHECK: Text 42 3.14 (1.,-1.) T
+  ! CHECK: Text 42 3.14 (1.,-1.) T
+  ! CHECK: Text 42 3.14 (1.,-1.) T
+  ! CHECK: Text 42 3.14 (1.,-1.) T
   !$omp target teams num_teams(4)
   !$omp parallel num_threads(1)
-    print *, "Hello from GPU"
-  !$omp end parallel
-  !$omp end target teams
-
-  ! CHECK: 42
-  !$omp target teams num_teams(1)
-  !$omp parallel num_threads(1)
-    print *, i
-  !$omp end parallel
-  !$omp end target teams
-
-  ! CHECK: 3.14
-  !$omp target teams num_teams(1)
-  !$omp parallel num_threads(1)
-    print *, r
-  !$omp end parallel
-  !$omp end target teams
-
-  ! CHECK: (1.,-1.)
-  !$omp target teams num_teams(1)
-  !$omp parallel num_threads(1)
-    print *, c
-  !$omp end parallel
-  !$omp end target teams
-
-  ! CHECK: T
-  !$omp target teams num_teams(1)
-  !$omp parallel num_threads(1)
-    print *, l
+    print *, "Text", " ", i, " ", r, " ", c, " ", l
   !$omp end parallel
   !$omp end target teams
 
-end program hello_gpu
+end program io_test

>From c22c240ca089a2b253392ae2c515b5cbd514074f Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 13 Feb 2026 16:05:40 -0600
Subject: [PATCH 4/8] comments

---
 flang-rt/lib/runtime/CMakeLists.txt        |  2 +-
 flang-rt/lib/runtime/io-api-server.cpp     | 35 ++++++++++------------
 offload/plugins-nextgen/common/src/RPC.cpp |  2 +-
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index da990419fe2a8..f9b0ba2feef4b 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -70,7 +70,6 @@ set(supported_sources
   unit.cpp
   utf.cpp
   work-queue.cpp
-  io-api-server.cpp
 )
 
 # List of source not used for GPU offloading.
@@ -90,6 +89,7 @@ set(host_sources
   temporary-stack.cpp
   time-intrinsic.cpp
   unit-map.cpp
+  io-api-server.cpp
 )
 
 # Sources that can be compiled directly for the GPU.
diff --git a/flang-rt/lib/runtime/io-api-server.cpp b/flang-rt/lib/runtime/io-api-server.cpp
index dd0a612572812..d10d1fc3b3105 100644
--- a/flang-rt/lib/runtime/io-api-server.cpp
+++ b/flang-rt/lib/runtime/io-api-server.cpp
@@ -80,11 +80,9 @@ struct DeferredFunction final : DeferredFunctionBase {
 private:
   template <typename T> T &Rewrite(T &v, IOContext &) { return v; }
 
-  Cookie Rewrite(Cookie, IOContext &ctx) {
-    return reinterpret_cast<Cookie>(ctx.cookie);
-  }
-
   const char *Rewrite(OwningPtr<char[]> &p, IOContext &) { return p.get(); }
+
+  Cookie Rewrite(Cookie, IOContext &ctx) { return ctx.cookie; }
 };
 
 template <typename Fn, typename... Args>
@@ -124,7 +122,7 @@ rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
               DeferredContext;
 
           ctx->commands.emplace_back(
-              MakeDeferred(IODECL(BeginExternalListOutput), unitNumber,
+              MakeDeferred(IONAME(BeginExternalListOutput), unitNumber,
                   DeferredFunctionBase::TempString(sourceFile), sourceLine));
 
           return reinterpret_cast<Cookie>(ctx);
@@ -134,8 +132,7 @@ rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
     rpc::invoke<NumLanes>(port, [](Cookie cookie) -> Iostat {
       DeferredContext *ctx = reinterpret_cast<DeferredContext *>(cookie);
 
-      ctx->commands.emplace_back(
-          MakeDeferred(_FortranAioEndIoStatement, cookie));
+      ctx->commands.emplace_back(MakeDeferred(IONAME(EndIoStatement), cookie));
       for (auto &fn : ctx->commands)
         fn->execute(ctx->ioCtx);
       Iostat result = ctx->ioCtx.result;
@@ -149,61 +146,61 @@ rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
   case OutputAscii_Opcode:
     rpc::invoke<NumLanes>(
         port, [](Cookie cookie, const char *x, std::size_t length) -> bool {
-          return EnqueueDeferred(IODECL(OutputAscii), cookie,
+          return EnqueueDeferred(IONAME(OutputAscii), cookie,
               DeferredFunctionBase::TempString(x), length);
         });
     break;
   case OutputInteger8_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int8_t n) -> bool {
-      return EnqueueDeferred(IODECL(OutputInteger8), cookie, n);
+      return EnqueueDeferred(IONAME(OutputInteger8), cookie, n);
     });
     break;
   case OutputInteger16_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int16_t n) -> bool {
-      return EnqueueDeferred(IODECL(OutputInteger16), cookie, n);
+      return EnqueueDeferred(IONAME(OutputInteger16), cookie, n);
     });
     break;
   case OutputInteger32_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int32_t n) -> bool {
-      return EnqueueDeferred(IODECL(OutputInteger32), cookie, n);
+      return EnqueueDeferred(IONAME(OutputInteger32), cookie, n);
     });
     break;
   case OutputInteger64_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, std::int64_t n) -> bool {
-      return EnqueueDeferred(IODECL(OutputInteger64), cookie, n);
+      return EnqueueDeferred(IONAME(OutputInteger64), cookie, n);
     });
     break;
 #ifdef __SIZEOF_INT128__
   case OutputInteger128_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, common::int128_t n) -> bool {
-      return EnqueueDeferred(IODECL(OutputInteger128), cookie, n);
+      return EnqueueDeferred(IONAME(OutputInteger128), cookie, n);
     });
     break;
 #endif
   case OutputReal32_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, float x) -> bool {
-      return EnqueueDeferred(IODECL(OutputReal32), cookie, x);
+      return EnqueueDeferred(IONAME(OutputReal32), cookie, x);
     });
     break;
   case OutputReal64_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, double x) -> bool {
-      return EnqueueDeferred(IODECL(OutputReal64), cookie, x);
+      return EnqueueDeferred(IONAME(OutputReal64), cookie, x);
     });
     break;
   case OutputComplex32_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, float re, float im) -> bool {
-      return EnqueueDeferred(IODECL(OutputComplex32), cookie, re, im);
+      return EnqueueDeferred(IONAME(OutputComplex32), cookie, re, im);
     });
     break;
   case OutputComplex64_Opcode:
     rpc::invoke<NumLanes>(
         port, [](Cookie cookie, double re, double im) -> bool {
-          return EnqueueDeferred(IODECL(OutputComplex64), cookie, re, im);
+          return EnqueueDeferred(IONAME(OutputComplex64), cookie, re, im);
         });
     break;
   case OutputLogical_Opcode:
     rpc::invoke<NumLanes>(port, [](Cookie cookie, bool truth) -> bool {
-      return EnqueueDeferred(IODECL(OutputLogical), cookie, truth);
+      return EnqueueDeferred(IONAME(OutputLogical), cookie, truth);
     });
     break;
   default:
@@ -215,7 +212,7 @@ rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
 } // namespace
 
 RT_EXT_API_GROUP_BEGIN
-std::uint32_t IODECL(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) {
+std::uint32_t IONAME(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) {
   rpc::Server::Port &Port = *reinterpret_cast<rpc::Server::Port *>(raw);
   if (numLanes == 1) {
     return HandleOpcodesImpl<1>(Port);
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index c7c435a03274e..adef5978b42c3 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -118,7 +118,7 @@ runServer(plugin::GenericDeviceTy &Device, void *Buffer,
 #if __has_include("flang/Runtime/io-api.h")
   if (Status == rpc::RPC_UNHANDLED_OPCODE)
     Status = static_cast<rpc::Status>(
-        Fortran::runtime::io::IODECL(HandleRPCOpcodes)(&*Port, NumLanes));
+        Fortran::runtime::io::IONAME(HandleRPCOpcodes)(&*Port, NumLanes));
 #endif
 
   Port->close();

>From 992b6628e9c5cd3ba822a6b03b7b80bae241a5db Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 13 Feb 2026 16:06:39 -0600
Subject: [PATCH 5/8] forgot this one

---
 flang-rt/lib/runtime/io-api-server.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang-rt/lib/runtime/io-api-server.cpp b/flang-rt/lib/runtime/io-api-server.cpp
index d10d1fc3b3105..5de323adcf0f4 100644
--- a/flang-rt/lib/runtime/io-api-server.cpp
+++ b/flang-rt/lib/runtime/io-api-server.cpp
@@ -212,7 +212,7 @@ rpc::Status HandleOpcodesImpl(rpc::Server::Port &port) {
 } // namespace
 
 RT_EXT_API_GROUP_BEGIN
-std::uint32_t IONAME(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) {
+std::uint32_t IODEF(HandleRPCOpcodes)(void *raw, std::uint32_t numLanes) {
   rpc::Server::Port &Port = *reinterpret_cast<rpc::Server::Port *>(raw);
   if (numLanes == 1) {
     return HandleOpcodesImpl<1>(Port);

>From 608b50a2fa59ebc7f9b4095724926a467dcc8bdd Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 13 Feb 2026 16:15:32 -0600
Subject: [PATCH 6/8] And for the GPU code

---
 flang-rt/lib/runtime/io-api-gpu.cpp | 52 ++++++++++++++---------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/flang-rt/lib/runtime/io-api-gpu.cpp b/flang-rt/lib/runtime/io-api-gpu.cpp
index 77a62e07b3877..5b14d829afb92 100644
--- a/flang-rt/lib/runtime/io-api-gpu.cpp
+++ b/flang-rt/lib/runtime/io-api-gpu.cpp
@@ -27,72 +27,72 @@ namespace Fortran::runtime::io {
 
 RT_EXT_API_GROUP_BEGIN
 
-Cookie IODEF(BeginExternalListOutput)(
+Cookie IONAME(BeginExternalListOutput)(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
   return rpc::dispatch<BeginExternalListOutput_Opcode>(client,
-      IODEF(BeginExternalListOutput), unitNumber, sourceFile, sourceLine);
+      IONAME(BeginExternalListOutput), unitNumber, sourceFile, sourceLine);
 }
 
-enum Iostat IODEF(EndIoStatement)(Cookie cookie) {
+enum Iostat IONAME(EndIoStatement)(Cookie cookie) {
   return rpc::dispatch<EndIoStatement_Opcode>(
-      client, IODEF(EndIoStatement), cookie);
+      client, IONAME(EndIoStatement), cookie);
 }
 
-bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) {
+bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) {
   return rpc::dispatch<OutputInteger8_Opcode>(
-      client, IODEF(OutputInteger8), cookie, n);
+      client, IONAME(OutputInteger8), cookie, n);
 }
 
-bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) {
+bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) {
   return rpc::dispatch<OutputInteger16_Opcode>(
-      client, IODEF(OutputInteger16), cookie, n);
+      client, IONAME(OutputInteger16), cookie, n);
 }
 
-bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) {
+bool IONAME(OutputInteger32)(Cookie cookie, std::int32_t n) {
   return rpc::dispatch<OutputInteger32_Opcode>(
-      client, IODEF(OutputInteger32), cookie, n);
+      client, IONAME(OutputInteger32), cookie, n);
 }
 
-bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) {
+bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) {
   return rpc::dispatch<OutputInteger64_Opcode>(
-      client, IODEF(OutputInteger64), cookie, n);
+      client, IONAME(OutputInteger64), cookie, n);
 }
 
 #ifdef __SIZEOF_INT128__
-bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) {
+bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) {
   return rpc::dispatch<OutputInteger128_Opcode>(
-      client, IODEF(OutputInteger128), cookie, n);
+      client, IONAME(OutputInteger128), cookie, n);
 }
 #endif
 
-bool IODEF(OutputReal32)(Cookie cookie, float x) {
+bool IONAME(OutputReal32)(Cookie cookie, float x) {
   return rpc::dispatch<OutputReal32_Opcode>(
-      client, IODEF(OutputReal32), cookie, x);
+      client, IONAME(OutputReal32), cookie, x);
 }
 
-bool IODEF(OutputReal64)(Cookie cookie, double x) {
+bool IONAME(OutputReal64)(Cookie cookie, double x) {
   return rpc::dispatch<OutputReal64_Opcode>(
-      client, IODEF(OutputReal64), cookie, x);
+      client, IONAME(OutputReal64), cookie, x);
 }
 
-bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) {
+bool IONAME(OutputComplex32)(Cookie cookie, float re, float im) {
   return rpc::dispatch<OutputComplex32_Opcode>(
-      client, IODEF(OutputComplex32), cookie, re, im);
+      client, IONAME(OutputComplex32), cookie, re, im);
 }
 
-bool IODEF(OutputComplex64)(Cookie cookie, double re, double im) {
+bool IONAME(OutputComplex64)(Cookie cookie, double re, double im) {
   return rpc::dispatch<OutputComplex64_Opcode>(
-      client, IODEF(OutputComplex64), cookie, re, im);
+      client, IONAME(OutputComplex64), cookie, re, im);
 }
 
-bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) {
+bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) {
   return rpc::dispatch<OutputAscii_Opcode>(
-      client, IODEF(OutputAscii), cookie, x, length);
+      client, IONAME(OutputAscii), cookie, x, length);
 }
 
-bool IODEF(OutputLogical)(Cookie cookie, bool truth) {
+bool IONAME(OutputLogical)(Cookie cookie, bool truth) {
   return rpc::dispatch<OutputLogical_Opcode>(
-      client, IODEF(OutputLogical), cookie, truth);
+      client, IONAME(OutputLogical), cookie, truth);
 }
 
 RT_EXT_API_GROUP_END

>From 896782493162304ecb70441aab77badb1815ed65 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 13 Feb 2026 18:31:02 -0600
Subject: [PATCH 7/8] Make libc dependency optional

---
 flang-rt/cmake/modules/AddFlangRT.cmake | 8 +++++---
 flang-rt/lib/runtime/CMakeLists.txt     | 7 ++++++-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/flang-rt/cmake/modules/AddFlangRT.cmake b/flang-rt/cmake/modules/AddFlangRT.cmake
index fd042f5ed9f63..3775fe8494634 100644
--- a/flang-rt/cmake/modules/AddFlangRT.cmake
+++ b/flang-rt/cmake/modules/AddFlangRT.cmake
@@ -123,7 +123,9 @@ function (add_flangrt_library name)
   endif ()
 
   # Include the RPC utilities from the `libc` project.
-  include(FindLibcCommonUtils)
+  if (TARGET llvm-libc-common-utilities)
+    set(extra_deps llvm-libc-common-utilities)
+  endif()
 
   # Also add header files to IDEs to list as part of the library.
   set_source_files_properties(${ARG_ADDITIONAL_HEADERS} PROPERTIES HEADER_FILE_ONLY ON)
@@ -142,11 +144,11 @@ function (add_flangrt_library name)
   endif ()
   if (build_static)
     add_library("${name_static}" STATIC ${extra_args} ${ARG_ADDITIONAL_HEADERS} ${ARG_UNPARSED_ARGUMENTS})
-    target_link_libraries("${name_static}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-static llvm-libc-common-utilities)
+    target_link_libraries("${name_static}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-static ${extra_deps})
   endif ()
   if (build_shared)
     add_library("${name_shared}" SHARED ${extra_args} ${ARG_ADDITIONAL_HEADERS} ${ARG_UNPARSED_ARGUMENTS})
-    target_link_libraries("${name_shared}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-shared llvm-libc-common-utilities)
+    target_link_libraries("${name_shared}" PRIVATE flang-rt-libcxx-headers flang-rt-libc-headers flang-rt-libc-shared  ${extra_deps})
     if (Threads_FOUND) 
       target_link_libraries(${name_shared} PUBLIC Threads::Threads)
     endif ()
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index f9b0ba2feef4b..45dca9e4076d3 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -12,6 +12,9 @@ find_package(Backtrace)
 set(HAVE_BACKTRACE ${Backtrace_FOUND})
 set(BACKTRACE_HEADER ${Backtrace_HEADER})
 
+# Include the RPC utilities from the `libc` project.
+include(FindLibcCommonUtils)
+
 # List of files that are buildable for all devices.
 set(supported_sources
   ${FLANG_SOURCE_DIR}/lib/Decimal/binary-to-decimal.cpp
@@ -89,8 +92,10 @@ set(host_sources
   temporary-stack.cpp
   time-intrinsic.cpp
   unit-map.cpp
-  io-api-server.cpp
 )
+if (TARGET llvm-libc-common-utilities)
+  list(APPEND host_sources io-api-server.cpp)
+endif()
 
 # Sources that can be compiled directly for the GPU.
 set(gpu_sources

>From 129927ee31867f0e12971ab7cb955a45ee8aaa57 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sat, 14 Feb 2026 09:13:50 -0600
Subject: [PATCH 8/8] Use IODEF right

---
 flang-rt/lib/runtime/io-api-gpu.cpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/flang-rt/lib/runtime/io-api-gpu.cpp b/flang-rt/lib/runtime/io-api-gpu.cpp
index 5b14d829afb92..ce79b145251c1 100644
--- a/flang-rt/lib/runtime/io-api-gpu.cpp
+++ b/flang-rt/lib/runtime/io-api-gpu.cpp
@@ -27,70 +27,70 @@ namespace Fortran::runtime::io {
 
 RT_EXT_API_GROUP_BEGIN
 
-Cookie IONAME(BeginExternalListOutput)(
+Cookie IODEF(BeginExternalListOutput)(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
   return rpc::dispatch<BeginExternalListOutput_Opcode>(client,
       IONAME(BeginExternalListOutput), unitNumber, sourceFile, sourceLine);
 }
 
-enum Iostat IONAME(EndIoStatement)(Cookie cookie) {
+enum Iostat IODEF(EndIoStatement)(Cookie cookie) {
   return rpc::dispatch<EndIoStatement_Opcode>(
       client, IONAME(EndIoStatement), cookie);
 }
 
-bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) {
+bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) {
   return rpc::dispatch<OutputInteger8_Opcode>(
       client, IONAME(OutputInteger8), cookie, n);
 }
 
-bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) {
+bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) {
   return rpc::dispatch<OutputInteger16_Opcode>(
       client, IONAME(OutputInteger16), cookie, n);
 }
 
-bool IONAME(OutputInteger32)(Cookie cookie, std::int32_t n) {
+bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) {
   return rpc::dispatch<OutputInteger32_Opcode>(
       client, IONAME(OutputInteger32), cookie, n);
 }
 
-bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) {
+bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) {
   return rpc::dispatch<OutputInteger64_Opcode>(
       client, IONAME(OutputInteger64), cookie, n);
 }
 
 #ifdef __SIZEOF_INT128__
-bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) {
+bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) {
   return rpc::dispatch<OutputInteger128_Opcode>(
       client, IONAME(OutputInteger128), cookie, n);
 }
 #endif
 
-bool IONAME(OutputReal32)(Cookie cookie, float x) {
+bool IODEF(OutputReal32)(Cookie cookie, float x) {
   return rpc::dispatch<OutputReal32_Opcode>(
       client, IONAME(OutputReal32), cookie, x);
 }
 
-bool IONAME(OutputReal64)(Cookie cookie, double x) {
+bool IODEF(OutputReal64)(Cookie cookie, double x) {
   return rpc::dispatch<OutputReal64_Opcode>(
       client, IONAME(OutputReal64), cookie, x);
 }
 
-bool IONAME(OutputComplex32)(Cookie cookie, float re, float im) {
+bool IODEF(OutputComplex32)(Cookie cookie, float re, float im) {
   return rpc::dispatch<OutputComplex32_Opcode>(
       client, IONAME(OutputComplex32), cookie, re, im);
 }
 
-bool IONAME(OutputComplex64)(Cookie cookie, double re, double im) {
+bool IODEF(OutputComplex64)(Cookie cookie, double re, double im) {
   return rpc::dispatch<OutputComplex64_Opcode>(
       client, IONAME(OutputComplex64), cookie, re, im);
 }
 
-bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) {
+bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) {
   return rpc::dispatch<OutputAscii_Opcode>(
       client, IONAME(OutputAscii), cookie, x, length);
 }
 
-bool IONAME(OutputLogical)(Cookie cookie, bool truth) {
+bool IODEF(OutputLogical)(Cookie cookie, bool truth) {
   return rpc::dispatch<OutputLogical_Opcode>(
       client, IONAME(OutputLogical), cookie, truth);
 }



More information about the llvm-commits mailing list