[llvm] [OpenMP] Unconditionally provide an RPC client interface for OpenMP (PR #117933)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 2 12:06:03 PST 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/117933

>From 54a6c9163d494586c692df3c77628869c25ce015 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 27 Nov 2024 16:18:35 -0600
Subject: [PATCH 1/7] [OpenMP] Unconditionally provide an RPC client interface
 for OpenMP

Summary:
This patch adds an RPC interface that lives directly in the OpenMP
device runtime. This allows OpenMP to implement custom opcodes.
Currently this is only providing the host call interface, which is the
raw version of reverse offloading. Previously this lived in `libc/` as
an extension which is not the correct place.

The interface here uses a weak symbol for the RPC client by the same
name that the `libc` interface uses. This means that it will defer to
the libc one if both are present so we don't need to set up multiple
instances.

The presense of this symbol is what controls whether or not we set up
the RPC server. Because this is an external symbol it normally won't be
optimized out, so there's a special pass in OpenMPOpt that deletes this
symbol if it is unused during linking. That means at `O0` the RPC server
will always be present now, but will be removed trivially if it's not
used at O1 and higher.
---
 offload/DeviceRTL/CMakeLists.txt              |  2 ++
 offload/DeviceRTL/src/Misc.cpp                | 22 ++++++++++++++++
 offload/DeviceRTL/src/exports                 |  1 +
 offload/include/Shared/RPCOpcodes.h           | 25 +++++++++++++++++++
 offload/plugins-nextgen/common/src/RPC.cpp    | 17 +++++++++++++
 .../{libc/host_call.c => api/omp_host_call.c} | 11 ++++----
 6 files changed, 72 insertions(+), 6 deletions(-)
 create mode 100644 offload/include/Shared/RPCOpcodes.h
 rename offload/test/{libc/host_call.c => api/omp_host_call.c} (82%)

diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index c76ad018ab4fe7..3da83e5c307132 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -131,6 +131,7 @@ set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
              -DOMPTARGET_DEVICE_RUNTIME
              -I${include_directory}
              -I${devicertl_base_directory}/../include
+             -I${LLVM_MAIN_SRC_DIR}/../libc
              ${LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL}
 )
 
@@ -275,6 +276,7 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
     target_compile_definitions(${ide_target_name} PRIVATE SHARED_SCRATCHPAD_SIZE=512)
     target_include_directories(${ide_target_name} PRIVATE
       ${include_directory}
+      ${LLVM_MAIN_SRC_DIR}/../libc
       ${devicertl_base_directory}/../include
       ${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
     )
diff --git a/offload/DeviceRTL/src/Misc.cpp b/offload/DeviceRTL/src/Misc.cpp
index 8e690f6fd8e7ce..c1df477365bcb6 100644
--- a/offload/DeviceRTL/src/Misc.cpp
+++ b/offload/DeviceRTL/src/Misc.cpp
@@ -12,6 +12,8 @@
 #include "Allocator.h"
 #include "Configuration.h"
 #include "DeviceTypes.h"
+#include "Shared/RPCOpcodes.h"
+#include "shared/rpc.h"
 
 #include "Debug.h"
 
@@ -110,6 +112,12 @@ void *indirectCallLookup(void *HstPtr) {
   return HstPtr;
 }
 
+/// The openmp client instance used to communicate with the server.
+/// FIXME: This is marked as 'retain' so that it is not removed via
+/// `-mlink-builtin-bitcode`
+[[gnu::visibility("protected"), gnu::weak,
+  gnu::retain]] rpc::Client Client asm("__llvm_rpc_client");
+
 } // namespace impl
 } // namespace ompx
 
@@ -156,6 +164,20 @@ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
     return;
   }
 }
+
+unsigned long long __llvm_omp_host_call(void *fn, void *data, size_t size) {
+  rpc::Client::Port Port = ompx::impl::Client.open<OFFLOAD_HOST_CALL>();
+  Port.send_n(data, size);
+  Port.send([=](rpc::Buffer *buffer, uint32_t) {
+    buffer->data[0] = reinterpret_cast<uintptr_t>(fn);
+  });
+  unsigned long long Ret;
+  Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
+    Ret = static_cast<unsigned long long>(Buffer->data[0]);
+  });
+  Port.close();
+  return Ret;
+}
 }
 
 ///}
diff --git a/offload/DeviceRTL/src/exports b/offload/DeviceRTL/src/exports
index 288ddf90b4a9f2..01667e7aba827a 100644
--- a/offload/DeviceRTL/src/exports
+++ b/offload/DeviceRTL/src/exports
@@ -15,4 +15,5 @@ malloc
 free
 memcmp
 printf
+__llvm_rpc_client
 __assert_fail
diff --git a/offload/include/Shared/RPCOpcodes.h b/offload/include/Shared/RPCOpcodes.h
new file mode 100644
index 00000000000000..beee29df1f7076
--- /dev/null
+++ b/offload/include/Shared/RPCOpcodes.h
@@ -0,0 +1,25 @@
+//===-- Shared/RPCOpcodes.h - Offload specific RPC opcodes ----- C++ ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines RPC opcodes that are specifically used by the OpenMP device runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPTARGET_SHARED_RPC_OPCODES_H
+#define OMPTARGET_SHARED_RPC_OPCODES_H
+
+#define LLVM_OFFLOAD_RPC_BASE 'o'
+#define LLVM_OFFLOAD_OPCODE(n) (LLVM_OFFLOAD_RPC_BASE << 24 | n)
+
+typedef enum {
+  OFFLOAD_HOST_CALL = LLVM_OFFLOAD_OPCODE(0),
+} offload_opcode_t;
+
+#undef LLVM_OFFLOAD_OPCODE
+
+#endif // OMPTARGET_SHARED_RPC_OPCODES_H
diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index c35431da69eb65..38509b5d78e615 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -9,6 +9,7 @@
 #include "RPC.h"
 
 #include "Shared/Debug.h"
+#include "Shared/RPCOpcodes.h"
 
 #include "PluginInterface.h"
 
@@ -93,6 +94,22 @@ Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
     });
     break;
   }
+  case OFFLOAD_HOST_CALL: {
+    uint64_t Sizes[64] = {0};
+    unsigned long long Results[64] = {0};
+    void *Args[64] = {nullptr};
+    Port->recv_n(Args, Sizes, [&](uint64_t Size) { return new char[Size]; });
+    Port->recv([&](rpc::Buffer *buffer, uint32_t ID) {
+      using FuncPtrTy = unsigned long long (*)(void *);
+      auto Func = reinterpret_cast<FuncPtrTy>(buffer->data[0]);
+      Results[ID] = Func(Args[ID]);
+    });
+    Port->send([&](rpc::Buffer *Buffer, uint32_t ID) {
+      Buffer->data[0] = static_cast<uint64_t>(Results[ID]);
+      delete[] reinterpret_cast<char *>(Args[ID]);
+    });
+    break;
+  }
   default:
     // Let the `libc` library handle any other unhandled opcodes.
     Status = handle_libc_opcodes(*Port, Device.getWarpSize());
diff --git a/offload/test/libc/host_call.c b/offload/test/api/omp_host_call.c
similarity index 82%
rename from offload/test/libc/host_call.c
rename to offload/test/api/omp_host_call.c
index 61c4e14d5b3881..a3d1a97822b31b 100644
--- a/offload/test/libc/host_call.c
+++ b/offload/test/api/omp_host_call.c
@@ -1,20 +1,18 @@
 // RUN: %libomptarget-compile-run-and-check-generic
 
-// REQUIRES: libc
-
 #include <assert.h>
 #include <omp.h>
 #include <stdio.h>
 
 #pragma omp begin declare variant match(device = {kind(gpu)})
 // Extension provided by the 'libc' project.
-unsigned long long rpc_host_call(void *fn, void *args, size_t size);
-#pragma omp declare target to(rpc_host_call) device_type(nohost)
+unsigned long long __llvm_omp_host_call(void *fn, void *args, size_t size);
+#pragma omp declare target to(__llvm_omp_host_call) device_type(nohost)
 #pragma omp end declare variant
 
 #pragma omp begin declare variant match(device = {kind(cpu)})
 // Dummy host implementation to make this work for all targets.
-unsigned long long rpc_host_call(void *fn, void *args, size_t size) {
+unsigned long long __llvm_omp_host_call(void *fn, void *args, size_t size) {
   return ((unsigned long long (*)(void *))fn)(args);
 }
 #pragma omp end declare variant
@@ -58,7 +56,8 @@ int main() {
 #pragma omp parallel num_threads(2)
   {
     args_t args = {omp_get_thread_num(), omp_get_team_num()};
-    unsigned long long res = rpc_host_call(fn_ptr, &args, sizeof(args_t));
+    unsigned long long res =
+        __llvm_omp_host_call(fn_ptr, &args, sizeof(args_t));
     printf("Result: %d\n", (int)res);
   }
 }

>From 144b86576a0e49c392762cea499d29dc7b38c797 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 28 Nov 2024 09:12:39 -0600
Subject: [PATCH 2/7] Move to dispatch function thing

---
 offload/plugins-nextgen/common/src/RPC.cpp | 95 ++++++++++++++--------
 1 file changed, 59 insertions(+), 36 deletions(-)

diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 38509b5d78e615..639800b2086d1a 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -23,6 +23,62 @@ using namespace llvm;
 using namespace omp;
 using namespace target;
 
+template <uint32_t NumLanes>
+rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
+                                   rpc::Server::Port &Port) {
+
+  int Status = rpc::SUCCESS;
+  switch (Port.get_opcode()) {
+  case RPC_MALLOC: {
+    Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
+      Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
+          Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
+    });
+    break;
+  }
+  case RPC_FREE: {
+    Port.recv([&](rpc::Buffer *Buffer, uint32_t) {
+      Device.free(reinterpret_cast<void *>(Buffer->data[0]),
+                  TARGET_ALLOC_DEVICE_NON_BLOCKING);
+    });
+    break;
+  }
+  case OFFLOAD_HOST_CALL: {
+    uint64_t Sizes[NumLanes] = {0};
+    unsigned long long Results[NumLanes] = {0};
+    void *Args[NumLanes] = {nullptr};
+    Port.recv_n(Args, Sizes, [&](uint64_t Size) { return new char[Size]; });
+    Port.recv([&](rpc::Buffer *buffer, uint32_t ID) {
+      using FuncPtrTy = unsigned long long (*)(void *);
+      auto Func = reinterpret_cast<FuncPtrTy>(buffer->data[0]);
+      Results[ID] = Func(Args[ID]);
+    });
+    Port.send([&](rpc::Buffer *Buffer, uint32_t ID) {
+      Buffer->data[0] = static_cast<uint64_t>(Results[ID]);
+      delete[] reinterpret_cast<char *>(Args[ID]);
+    });
+    break;
+  }
+  default:
+    return rpc::UNHANDLED_OPCODE;
+    break;
+  }
+  return rpc::UNHANDLED_OPCODE;
+}
+
+static rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
+                                          rpc::Server::Port &Port,
+                                          uint32_t NumLanes) {
+  if (NumLanes == 1)
+    return handle_offload_opcodes<1>(Device, Port);
+  else if (NumLanes == 32)
+    return handle_offload_opcodes<32>(Device, Port);
+  else if (NumLanes == 64)
+    return handle_offload_opcodes<64>(Device, Port);
+  else
+    return rpc::ERROR;
+}
+
 RPCServerTy::RPCServerTy(plugin::GenericPluginTy &Plugin)
     : Buffers(Plugin.getNumDevices()) {}
 
@@ -78,43 +134,10 @@ Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
   if (!Port)
     return Error::success();
 
-  int Status = rpc::SUCCESS;
-  switch (Port->get_opcode()) {
-  case RPC_MALLOC: {
-    Port->recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
-      Buffer->data[0] = reinterpret_cast<uintptr_t>(Device.allocate(
-          Buffer->data[0], nullptr, TARGET_ALLOC_DEVICE_NON_BLOCKING));
-    });
-    break;
-  }
-  case RPC_FREE: {
-    Port->recv([&](rpc::Buffer *Buffer, uint32_t) {
-      Device.free(reinterpret_cast<void *>(Buffer->data[0]),
-                  TARGET_ALLOC_DEVICE_NON_BLOCKING);
-    });
-    break;
-  }
-  case OFFLOAD_HOST_CALL: {
-    uint64_t Sizes[64] = {0};
-    unsigned long long Results[64] = {0};
-    void *Args[64] = {nullptr};
-    Port->recv_n(Args, Sizes, [&](uint64_t Size) { return new char[Size]; });
-    Port->recv([&](rpc::Buffer *buffer, uint32_t ID) {
-      using FuncPtrTy = unsigned long long (*)(void *);
-      auto Func = reinterpret_cast<FuncPtrTy>(buffer->data[0]);
-      Results[ID] = Func(Args[ID]);
-    });
-    Port->send([&](rpc::Buffer *Buffer, uint32_t ID) {
-      Buffer->data[0] = static_cast<uint64_t>(Results[ID]);
-      delete[] reinterpret_cast<char *>(Args[ID]);
-    });
-    break;
-  }
-  default:
-    // Let the `libc` library handle any other unhandled opcodes.
+  int Status = handle_offload_opcodes(Device, *Port, Device.getWarpSize());
+  // Let the `libc` library handle any other unhandled opcodes.
+  if (Status == rpc::UNHANDLED_OPCODE)
     Status = handle_libc_opcodes(*Port, Device.getWarpSize());
-    break;
-  }
   Port->close();
 
   if (Status != rpc::SUCCESS)

>From 3d5d76e0108f2bd6e337e94acf1358eddc858fa2 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 2 Dec 2024 07:25:49 -0600
Subject: [PATCH 3/7] Actually let RPC run if you dont hav elibc

---
 offload/plugins-nextgen/common/src/RPC.cpp | 24 +++++-----------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 639800b2086d1a..1e1cca7e28697d 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -13,11 +13,8 @@
 
 #include "PluginInterface.h"
 
-// TODO: This should be included unconditionally and cleaned up.
-#if defined(LIBOMPTARGET_RPC_SUPPORT)
 #include "shared/rpc.h"
 #include "shared/rpc_opcodes.h"
-#endif
 
 using namespace llvm;
 using namespace omp;
@@ -63,7 +60,7 @@ rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
     return rpc::UNHANDLED_OPCODE;
     break;
   }
-  return rpc::UNHANDLED_OPCODE;
+  return rpc::SUCCESS;
 }
 
 static rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
@@ -86,17 +83,12 @@ llvm::Expected<bool>
 RPCServerTy::isDeviceUsingRPC(plugin::GenericDeviceTy &Device,
                               plugin::GenericGlobalHandlerTy &Handler,
                               plugin::DeviceImageTy &Image) {
-#ifdef LIBOMPTARGET_RPC_SUPPORT
   return Handler.isSymbolInImage(Device, Image, "__llvm_rpc_client");
-#else
-  return false;
-#endif
 }
 
 Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
                               plugin::GenericGlobalHandlerTy &Handler,
                               plugin::DeviceImageTy &Image) {
-#ifdef LIBOMPTARGET_RPC_SUPPORT
   uint64_t NumPorts =
       std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
   void *RPCBuffer = Device.allocate(
@@ -119,13 +111,9 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device,
   Buffers[Device.getDeviceId()] = RPCBuffer;
 
   return Error::success();
-
-#endif
-  return Error::success();
 }
 
 Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
-#ifdef LIBOMPTARGET_RPC_SUPPORT
   uint64_t NumPorts =
       std::min(Device.requestedRPCPortCount(), rpc::MAX_PORT_COUNT);
   rpc::Server Server(NumPorts, Buffers[Device.getDeviceId()]);
@@ -135,23 +123,21 @@ Error RPCServerTy::runServer(plugin::GenericDeviceTy &Device) {
     return Error::success();
 
   int Status = handle_offload_opcodes(Device, *Port, Device.getWarpSize());
+
   // Let the `libc` library handle any other unhandled opcodes.
+#ifdef LIBOMPTARGET_RPC_SUPPORT
   if (Status == rpc::UNHANDLED_OPCODE)
     Status = handle_libc_opcodes(*Port, Device.getWarpSize());
-  Port->close();
+#endif
 
+  Port->close();
   if (Status != rpc::SUCCESS)
     return createStringError("RPC server given invalid opcode!");
 
-  return Error::success();
-#endif
   return Error::success();
 }
 
 Error RPCServerTy::deinitDevice(plugin::GenericDeviceTy &Device) {
-#ifdef LIBOMPTARGET_RPC_SUPPORT
   Device.free(Buffers[Device.getDeviceId()], TARGET_ALLOC_HOST);
   return Error::success();
-#endif
-  return Error::success();
 }

>From a2b1fd79c9b93669506647fb4bbd0ec90e21241e Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 2 Dec 2024 11:08:22 -0600
Subject: [PATCH 4/7] fix not including the header

---
 offload/plugins-nextgen/common/CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt
index 3a861a47eedabc..f9598a1718b3e5 100644
--- a/offload/plugins-nextgen/common/CMakeLists.txt
+++ b/offload/plugins-nextgen/common/CMakeLists.txt
@@ -23,14 +23,15 @@ endif()
 
 # Include the RPC server from the `libc` project if availible.
 include(FindLibcCommonUtils)
+target_link_libraries(PluginCommon PRIVATE llvm-libc-common-utilities)
 if(TARGET llvmlibc_rpc_server AND ${LIBOMPTARGET_GPU_LIBC_SUPPORT})
-	target_link_libraries(PluginCommon PRIVATE llvmlibc_rpc_server llvm-libc-common-utilities)
+	target_link_libraries(PluginCommon PRIVATE llvmlibc_rpc_server)
 	target_compile_definitions(PluginCommon PRIVATE LIBOMPTARGET_RPC_SUPPORT)
 elseif(${LIBOMPTARGET_GPU_LIBC_SUPPORT})
   find_library(llvmlibc_rpc_server NAMES llvmlibc_rpc_server
                PATHS ${LIBOMPTARGET_LLVM_LIBRARY_DIR} NO_DEFAULT_PATH)
   if(llvmlibc_rpc_server)
-    target_link_libraries(PluginCommon PRIVATE ${llvmlibc_rpc_server} llvm-libc-common-utilities)
+    target_link_libraries(PluginCommon PRIVATE ${llvmlibc_rpc_server})
 		target_compile_definitions(PluginCommon PRIVATE LIBOMPTARGET_RPC_SUPPORT)
   endif()
 endif()

>From f603e23d0151686a5e460bf9c2ef103392869619 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 2 Dec 2024 13:18:23 -0600
Subject: [PATCH 5/7] Fix not setting it up w/o libc

---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp               | 4 +---
 offload/plugins-nextgen/common/include/PluginInterface.h | 3 ---
 offload/plugins-nextgen/common/src/PluginInterface.cpp   | 8 --------
 offload/plugins-nextgen/cuda/src/rtl.cpp                 | 4 +---
 4 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 6356fa0554a9c1..22c8079ab5812f 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2148,9 +2148,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
   /// We want to set up the RPC server for host services to the GPU if it is
   /// availible.
-  bool shouldSetupRPCServer() const override {
-    return libomptargetSupportsRPC();
-  }
+  bool shouldSetupRPCServer() const override { return true; }
 
   /// The RPC interface should have enough space for all availible parallelism.
   uint64_t requestedRPCPortCount() const override {
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 41cc0f286a581f..97540d5a3e2b3d 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -1580,9 +1580,6 @@ template <typename ResourceRef> class GenericDeviceResourceManagerTy {
   std::deque<ResourceRef> ResourcePool;
 };
 
-/// A static check on whether or not we support RPC in libomptarget.
-bool libomptargetSupportsRPC();
-
 } // namespace plugin
 } // namespace target
 } // namespace omp
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 25b815b7f96694..5cdf12176a0d66 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2179,11 +2179,3 @@ int32_t GenericPluginTy::get_function(__tgt_device_binary Binary,
   *KernelPtr = &Kernel;
   return OFFLOAD_SUCCESS;
 }
-
-bool llvm::omp::target::plugin::libomptargetSupportsRPC() {
-#ifdef LIBOMPTARGET_RPC_SUPPORT
-  return true;
-#else
-  return false;
-#endif
-}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 015c7775ba3513..9af71b06ce97d3 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -496,9 +496,7 @@ struct CUDADeviceTy : public GenericDeviceTy {
 
   /// We want to set up the RPC server for host services to the GPU if it is
   /// availible.
-  bool shouldSetupRPCServer() const override {
-    return libomptargetSupportsRPC();
-  }
+  bool shouldSetupRPCServer() const override { return true; }
 
   /// The RPC interface should have enough space for all availible parallelism.
   uint64_t requestedRPCPortCount() const override {

>From 3ab14aa08214fb619d72b3af9d769f877b9062a9 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 2 Dec 2024 13:44:03 -0600
Subject: [PATCH 6/7] Fix test for non-libc runners

---
 offload/plugins-nextgen/common/src/RPC.cpp |  1 -
 offload/test/api/omp_host_call.c           | 21 ++++++++++-----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/offload/plugins-nextgen/common/src/RPC.cpp b/offload/plugins-nextgen/common/src/RPC.cpp
index 1e1cca7e28697d..71a3a7690396ef 100644
--- a/offload/plugins-nextgen/common/src/RPC.cpp
+++ b/offload/plugins-nextgen/common/src/RPC.cpp
@@ -24,7 +24,6 @@ template <uint32_t NumLanes>
 rpc::Status handle_offload_opcodes(plugin::GenericDeviceTy &Device,
                                    rpc::Server::Port &Port) {
 
-  int Status = rpc::SUCCESS;
   switch (Port.get_opcode()) {
   case RPC_MALLOC: {
     Port.recv_and_send([&](rpc::Buffer *Buffer, uint32_t) {
diff --git a/offload/test/api/omp_host_call.c b/offload/test/api/omp_host_call.c
index a3d1a97822b31b..fcfb2a9f2a6ec6 100644
--- a/offload/test/api/omp_host_call.c
+++ b/offload/test/api/omp_host_call.c
@@ -23,22 +23,14 @@ typedef struct args_s {
 } args_t;
 
 // CHECK-DAG: Thread: 0, Block: 0
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 1, Block: 0
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 0, Block: 1
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 1, Block: 1
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 0, Block: 2
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 1, Block: 2
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 0, Block: 3
-// CHECK-DAG: Result: 42
 // CHECK-DAG: Thread: 1, Block: 3
-// CHECK-DAG: Result: 42
-long long foo(void *data) {
+unsigned long long foo(void *data) {
   assert(omp_is_initial_device() && "Not executing on host?");
   args_t *args = (args_t *)data;
   printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id);
@@ -52,12 +44,19 @@ int main() {
   fn_ptr = (void *)&foo;
 #pragma omp target update to(fn_ptr)
 
-#pragma omp target teams num_teams(4)
+  int failed = 0;
+#pragma omp target teams num_teams(4) map(from : failed)
 #pragma omp parallel num_threads(2)
   {
     args_t args = {omp_get_thread_num(), omp_get_team_num()};
     unsigned long long res =
         __llvm_omp_host_call(fn_ptr, &args, sizeof(args_t));
-    printf("Result: %d\n", (int)res);
+    if (res != 42)
+#pragma omp atomic write
+      failed = 1;
   }
+
+  // CHECK: PASS
+  if (!failed)
+    printf("PASS\n");
 }

>From 5f0dbec87ffc42ebf21badfd8d0d329a7917fcd4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 2 Dec 2024 14:05:49 -0600
Subject: [PATCH 7/7] No copy in, UB

---
 offload/test/api/omp_host_call.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/test/api/omp_host_call.c b/offload/test/api/omp_host_call.c
index fcfb2a9f2a6ec6..40d3dc72582916 100644
--- a/offload/test/api/omp_host_call.c
+++ b/offload/test/api/omp_host_call.c
@@ -45,7 +45,7 @@ int main() {
 #pragma omp target update to(fn_ptr)
 
   int failed = 0;
-#pragma omp target teams num_teams(4) map(from : failed)
+#pragma omp target teams num_teams(4) map(tofrom : failed)
 #pragma omp parallel num_threads(2)
   {
     args_t args = {omp_get_thread_num(), omp_get_team_num()};



More information about the llvm-commits mailing list