[libc-commits] [libc] e537c83 - [libc] Add basic support for calling host functions from the GPU

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Wed Jul 19 08:11:58 PDT 2023


Author: Joseph Huber
Date: 2023-07-19T10:11:46-05:00
New Revision: e537c839757c6bae91bd5adbf65eb4e06a040840

URL: https://github.com/llvm/llvm-project/commit/e537c839757c6bae91bd5adbf65eb4e06a040840
DIFF: https://github.com/llvm/llvm-project/commit/e537c839757c6bae91bd5adbf65eb4e06a040840.diff

LOG: [libc] Add basic support for calling host functions from the GPU

This patch adds the `rpc_host_call` function as a GPU extension. This is
exported from the `libc` project to use the RPC interface to call a
function pointer via RPC any copying the arguments by-value. The
interface can only support a single void pointer argument much like
pthreads. The function call here is the bare-bones version of what's
required for OpenMP reverse offloading. Full support will require
interfacing with the mapping table, nowait support, etc.

I decided to test this interface in `libomptarget` as that will be the
primary consumer and it would be more difficult to make a test in `libc`
due to the testing infrastructure not really having a concept of the
"host" as it runs directly on the GPU as if it were a CPU target.

Reviewed By: jplehr

Differential Revision: https://reviews.llvm.org/D155003

Added: 
    libc/src/gpu/rpc_host_call.cpp
    libc/src/gpu/rpc_host_call.h
    openmp/libomptarget/test/libc/host_call.c

Modified: 
    libc/config/gpu/entrypoints.txt
    libc/include/llvm-libc-types/rpc_opcodes_t.h
    libc/spec/gpu_ext.td
    libc/src/gpu/CMakeLists.txt
    libc/utils/gpu/server/rpc_server.cpp

Removed: 
    


################################################################################
diff  --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index e475c80d26b205..6202e70f657053 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -97,6 +97,7 @@ set(TARGET_LIBC_ENTRYPOINTS
 
     # gpu/rpc.h entrypoints
     libc.src.gpu.rpc_reset
+    libc.src.gpu.rpc_host_call
 )
 
 set(TARGET_LIBM_ENTRYPOINTS

diff  --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index f53bda7d3a8568..33a657deeff7f3 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -19,6 +19,7 @@ typedef enum : unsigned short {
   RPC_CLOSE_FILE = 6,
   RPC_MALLOC = 7,
   RPC_FREE = 8,
+  RPC_HOST_CALL = 9,
   // TODO: Move these out of here and handle then with custom handlers in the
   // loader.
   RPC_TEST_INCREMENT = 1000,

diff  --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td
index 69117bbde49933..dca1e9f80f71e6 100644
--- a/libc/spec/gpu_ext.td
+++ b/libc/spec/gpu_ext.td
@@ -10,6 +10,11 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
             RetValSpec<VoidType>,
             [ArgSpec<UnsignedIntType>, ArgSpec<VoidPtr>]
         >,
+        FunctionSpec<
+            "rpc_host_call",
+            RetValSpec<VoidType>,
+            [ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
+        >,
     ]
   >;
   let Headers = [

diff  --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt
index 8994fe05f7c7ec..a0701c835bf46f 100644
--- a/libc/src/gpu/CMakeLists.txt
+++ b/libc/src/gpu/CMakeLists.txt
@@ -8,3 +8,14 @@ add_entrypoint_object(
     libc.src.__support.RPC.rpc_client
     libc.src.__support.GPU.utils
 )
+
+add_entrypoint_object(
+  rpc_host_call
+  SRCS
+    rpc_host_call.cpp
+  HDRS
+    rpc_host_call.h
+  DEPENDS
+    libc.src.__support.RPC.rpc_client
+    libc.src.__support.GPU.utils
+)

diff  --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp
new file mode 100644
index 00000000000000..67b839dd4c4eae
--- /dev/null
+++ b/libc/src/gpu/rpc_host_call.cpp
@@ -0,0 +1,30 @@
+//===---------- GPU implementation of the external RPC call function ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/gpu/rpc_host_call.h"
+
+#include "llvm-libc-types/rpc_opcodes_t.h"
+#include "src/__support/GPU/utils.h"
+#include "src/__support/RPC/rpc_client.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+// This calls the associated function pointer on the RPC server with the given
+// arguments. We expect that the pointer here is a valid pointer on the server.
+LLVM_LIBC_FUNCTION(void, rpc_host_call, (void *fn, void *data, size_t size)) {
+  rpc::Client::Port port = rpc::client.open<RPC_HOST_CALL>();
+  port.send_n(data, size);
+  port.send([=](rpc::Buffer *buffer) {
+    buffer->data[0] = reinterpret_cast<uintptr_t>(fn);
+  });
+  port.recv([](rpc::Buffer *) {});
+  port.close();
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h
new file mode 100644
index 00000000000000..f8e2cdd31902a2
--- /dev/null
+++ b/libc/src/gpu/rpc_host_call.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+void rpc_host_call(void *fn, void *buffer, size_t size);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_GPU_RPC_H_HOST_CALL

diff  --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index da9f50603f1181..721b29361619e9 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -129,6 +129,18 @@ struct Server {
       });
       break;
     }
+    case RPC_HOST_CALL: {
+      uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
+      void *args[rpc::MAX_LANE_SIZE] = {nullptr};
+      port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
+      port->recv([&](rpc::Buffer *buffer, uint32_t id) {
+        reinterpret_cast<void (*)(void *)>(buffer->data[0])(args[id]);
+      });
+      port->send([&](rpc::Buffer *, uint32_t id) {
+        delete[] reinterpret_cast<uint8_t *>(args[id]);
+      });
+      break;
+    }
     // TODO: Move handling of these  test cases to the loader implementation.
     case RPC_TEST_INCREMENT: {
       port->recv_and_send([](rpc::Buffer *buffer) {
@@ -341,7 +353,7 @@ uint64_t rpc_get_client_size() { return sizeof(rpc::Client); }
 using ServerPort = std::variant<rpc::Server<1>::Port *, rpc::Server<32>::Port *,
                                 rpc::Server<64>::Port *>;
 
-ServerPort getPort(rpc_port_t ref) {
+ServerPort get_port(rpc_port_t ref) {
   if (ref.lane_size == 1)
     return reinterpret_cast<rpc::Server<1>::Port *>(ref.handle);
   else if (ref.lane_size == 32)
@@ -353,7 +365,7 @@ ServerPort getPort(rpc_port_t ref) {
 }
 
 void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
-  auto port = getPort(ref);
+  auto port = get_port(ref);
   std::visit(
       [=](auto &port) {
         port->send([=](rpc::Buffer *buffer) {
@@ -364,7 +376,7 @@ void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
 }
 
 void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
-  auto port = getPort(ref);
+  auto port = get_port(ref);
   std::visit(
       [=](auto &port) {
         port->recv([=](rpc::Buffer *buffer) {
@@ -376,7 +388,7 @@ void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
 
 void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback,
                        void *data) {
-  auto port = getPort(ref);
+  auto port = get_port(ref);
   std::visit(
       [=](auto &port) {
         port->recv_and_send([=](rpc::Buffer *buffer) {

diff  --git a/openmp/libomptarget/test/libc/host_call.c b/openmp/libomptarget/test/libc/host_call.c
new file mode 100644
index 00000000000000..11260cc285765d
--- /dev/null
+++ b/openmp/libomptarget/test/libc/host_call.c
@@ -0,0 +1,54 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+// REQUIRES: libc
+
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp begin declare variant match(device = {kind(gpu)})
+// Extension provided by the 'libc' project.
+void rpc_host_call(void *fn, void *args, size_t size);
+#pragma omp declare target to(rpc_host_call) device_type(nohost)
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(cpu)})
+// Dummy host implementation to make this work for all targets.
+void rpc_host_call(void *fn, void *args, size_t size) {
+  ((void (*)(void *))fn)(args);
+}
+#pragma omp end declare variant
+
+typedef struct args_s {
+  int thread_id;
+  int block_id;
+} args_t;
+
+// CHECK-DAG: Thread: 0, Block: 0
+// CHECK-DAG: Thread: 1, Block: 0
+// CHECK-DAG: Thread: 0, Block: 1
+// CHECK-DAG: Thread: 1, Block: 1
+// CHECK-DAG: Thread: 0, Block: 2
+// CHECK-DAG: Thread: 1, Block: 2
+// CHECK-DAG: Thread: 0, Block: 3
+// CHECK-DAG: Thread: 1, Block: 3
+void foo(void *data) {
+  assert(omp_is_initial_device() && "Not executing on host?");
+  args_t *args = (args_t *)data;
+  printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id);
+}
+
+void *fn_ptr = NULL;
+#pragma omp declare target to(fn_ptr)
+
+int main() {
+  fn_ptr = (void *)&foo;
+#pragma omp target update to(fn_ptr)
+
+#pragma omp target teams num_teams(4)
+#pragma omp parallel num_threads(2)
+  {
+    args_t args = {omp_get_thread_num(), omp_get_team_num()};
+    rpc_host_call(fn_ptr, &args, sizeof(args_t));
+  }
+}


        


More information about the libc-commits mailing list