[Openmp-commits] [openmp] e537c83 - [libc] Add basic support for calling host functions from the GPU
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Wed Jul 19 08:11:57 PDT 2023
Author: Joseph Huber
Date: 2023-07-19T10:11:46-05:00
New Revision: e537c839757c6bae91bd5adbf65eb4e06a040840
URL: https://github.com/llvm/llvm-project/commit/e537c839757c6bae91bd5adbf65eb4e06a040840
DIFF: https://github.com/llvm/llvm-project/commit/e537c839757c6bae91bd5adbf65eb4e06a040840.diff
LOG: [libc] Add basic support for calling host functions from the GPU
This patch adds the `rpc_host_call` function as a GPU extension. This is
exported from the `libc` project to use the RPC interface to call a
function pointer via RPC any copying the arguments by-value. The
interface can only support a single void pointer argument much like
pthreads. The function call here is the bare-bones version of what's
required for OpenMP reverse offloading. Full support will require
interfacing with the mapping table, nowait support, etc.
I decided to test this interface in `libomptarget` as that will be the
primary consumer and it would be more difficult to make a test in `libc`
due to the testing infrastructure not really having a concept of the
"host" as it runs directly on the GPU as if it were a CPU target.
Reviewed By: jplehr
Differential Revision: https://reviews.llvm.org/D155003
Added:
libc/src/gpu/rpc_host_call.cpp
libc/src/gpu/rpc_host_call.h
openmp/libomptarget/test/libc/host_call.c
Modified:
libc/config/gpu/entrypoints.txt
libc/include/llvm-libc-types/rpc_opcodes_t.h
libc/spec/gpu_ext.td
libc/src/gpu/CMakeLists.txt
libc/utils/gpu/server/rpc_server.cpp
Removed:
################################################################################
diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index e475c80d26b205..6202e70f657053 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -97,6 +97,7 @@ set(TARGET_LIBC_ENTRYPOINTS
# gpu/rpc.h entrypoints
libc.src.gpu.rpc_reset
+ libc.src.gpu.rpc_host_call
)
set(TARGET_LIBM_ENTRYPOINTS
diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h
index f53bda7d3a8568..33a657deeff7f3 100644
--- a/libc/include/llvm-libc-types/rpc_opcodes_t.h
+++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h
@@ -19,6 +19,7 @@ typedef enum : unsigned short {
RPC_CLOSE_FILE = 6,
RPC_MALLOC = 7,
RPC_FREE = 8,
+ RPC_HOST_CALL = 9,
// TODO: Move these out of here and handle then with custom handlers in the
// loader.
RPC_TEST_INCREMENT = 1000,
diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td
index 69117bbde49933..dca1e9f80f71e6 100644
--- a/libc/spec/gpu_ext.td
+++ b/libc/spec/gpu_ext.td
@@ -10,6 +10,11 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> {
RetValSpec<VoidType>,
[ArgSpec<UnsignedIntType>, ArgSpec<VoidPtr>]
>,
+ FunctionSpec<
+ "rpc_host_call",
+ RetValSpec<VoidType>,
+ [ArgSpec<VoidPtr>, ArgSpec<VoidPtr>, ArgSpec<SizeTType>]
+ >,
]
>;
let Headers = [
diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt
index 8994fe05f7c7ec..a0701c835bf46f 100644
--- a/libc/src/gpu/CMakeLists.txt
+++ b/libc/src/gpu/CMakeLists.txt
@@ -8,3 +8,14 @@ add_entrypoint_object(
libc.src.__support.RPC.rpc_client
libc.src.__support.GPU.utils
)
+
+add_entrypoint_object(
+ rpc_host_call
+ SRCS
+ rpc_host_call.cpp
+ HDRS
+ rpc_host_call.h
+ DEPENDS
+ libc.src.__support.RPC.rpc_client
+ libc.src.__support.GPU.utils
+)
diff --git a/libc/src/gpu/rpc_host_call.cpp b/libc/src/gpu/rpc_host_call.cpp
new file mode 100644
index 00000000000000..67b839dd4c4eae
--- /dev/null
+++ b/libc/src/gpu/rpc_host_call.cpp
@@ -0,0 +1,30 @@
+//===---------- GPU implementation of the external RPC call function ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/gpu/rpc_host_call.h"
+
+#include "llvm-libc-types/rpc_opcodes_t.h"
+#include "src/__support/GPU/utils.h"
+#include "src/__support/RPC/rpc_client.h"
+#include "src/__support/common.h"
+
+namespace __llvm_libc {
+
+// This calls the associated function pointer on the RPC server with the given
+// arguments. We expect that the pointer here is a valid pointer on the server.
+LLVM_LIBC_FUNCTION(void, rpc_host_call, (void *fn, void *data, size_t size)) {
+ rpc::Client::Port port = rpc::client.open<RPC_HOST_CALL>();
+ port.send_n(data, size);
+ port.send([=](rpc::Buffer *buffer) {
+ buffer->data[0] = reinterpret_cast<uintptr_t>(fn);
+ });
+ port.recv([](rpc::Buffer *) {});
+ port.close();
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/gpu/rpc_host_call.h b/libc/src/gpu/rpc_host_call.h
new file mode 100644
index 00000000000000..f8e2cdd31902a2
--- /dev/null
+++ b/libc/src/gpu/rpc_host_call.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for RPC functions -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H
+
+#include <stddef.h> // size_t
+
+namespace __llvm_libc {
+
+void rpc_host_call(void *fn, void *buffer, size_t size);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_GPU_RPC_H_HOST_CALL
diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index da9f50603f1181..721b29361619e9 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -129,6 +129,18 @@ struct Server {
});
break;
}
+ case RPC_HOST_CALL: {
+ uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
+ void *args[rpc::MAX_LANE_SIZE] = {nullptr};
+ port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
+ port->recv([&](rpc::Buffer *buffer, uint32_t id) {
+ reinterpret_cast<void (*)(void *)>(buffer->data[0])(args[id]);
+ });
+ port->send([&](rpc::Buffer *, uint32_t id) {
+ delete[] reinterpret_cast<uint8_t *>(args[id]);
+ });
+ break;
+ }
// TODO: Move handling of these test cases to the loader implementation.
case RPC_TEST_INCREMENT: {
port->recv_and_send([](rpc::Buffer *buffer) {
@@ -341,7 +353,7 @@ uint64_t rpc_get_client_size() { return sizeof(rpc::Client); }
using ServerPort = std::variant<rpc::Server<1>::Port *, rpc::Server<32>::Port *,
rpc::Server<64>::Port *>;
-ServerPort getPort(rpc_port_t ref) {
+ServerPort get_port(rpc_port_t ref) {
if (ref.lane_size == 1)
return reinterpret_cast<rpc::Server<1>::Port *>(ref.handle);
else if (ref.lane_size == 32)
@@ -353,7 +365,7 @@ ServerPort getPort(rpc_port_t ref) {
}
void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
- auto port = getPort(ref);
+ auto port = get_port(ref);
std::visit(
[=](auto &port) {
port->send([=](rpc::Buffer *buffer) {
@@ -364,7 +376,7 @@ void rpc_send(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
}
void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
- auto port = getPort(ref);
+ auto port = get_port(ref);
std::visit(
[=](auto &port) {
port->recv([=](rpc::Buffer *buffer) {
@@ -376,7 +388,7 @@ void rpc_recv(rpc_port_t ref, rpc_port_callback_ty callback, void *data) {
void rpc_recv_and_send(rpc_port_t ref, rpc_port_callback_ty callback,
void *data) {
- auto port = getPort(ref);
+ auto port = get_port(ref);
std::visit(
[=](auto &port) {
port->recv_and_send([=](rpc::Buffer *buffer) {
diff --git a/openmp/libomptarget/test/libc/host_call.c b/openmp/libomptarget/test/libc/host_call.c
new file mode 100644
index 00000000000000..11260cc285765d
--- /dev/null
+++ b/openmp/libomptarget/test/libc/host_call.c
@@ -0,0 +1,54 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+// REQUIRES: libc
+
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp begin declare variant match(device = {kind(gpu)})
+// Extension provided by the 'libc' project.
+void rpc_host_call(void *fn, void *args, size_t size);
+#pragma omp declare target to(rpc_host_call) device_type(nohost)
+#pragma omp end declare variant
+
+#pragma omp begin declare variant match(device = {kind(cpu)})
+// Dummy host implementation to make this work for all targets.
+void rpc_host_call(void *fn, void *args, size_t size) {
+ ((void (*)(void *))fn)(args);
+}
+#pragma omp end declare variant
+
+typedef struct args_s {
+ int thread_id;
+ int block_id;
+} args_t;
+
+// CHECK-DAG: Thread: 0, Block: 0
+// CHECK-DAG: Thread: 1, Block: 0
+// CHECK-DAG: Thread: 0, Block: 1
+// CHECK-DAG: Thread: 1, Block: 1
+// CHECK-DAG: Thread: 0, Block: 2
+// CHECK-DAG: Thread: 1, Block: 2
+// CHECK-DAG: Thread: 0, Block: 3
+// CHECK-DAG: Thread: 1, Block: 3
+void foo(void *data) {
+ assert(omp_is_initial_device() && "Not executing on host?");
+ args_t *args = (args_t *)data;
+ printf("Thread: %d, Block: %d\n", args->thread_id, args->block_id);
+}
+
+void *fn_ptr = NULL;
+#pragma omp declare target to(fn_ptr)
+
+int main() {
+ fn_ptr = (void *)&foo;
+#pragma omp target update to(fn_ptr)
+
+#pragma omp target teams num_teams(4)
+#pragma omp parallel num_threads(2)
+ {
+ args_t args = {omp_get_thread_num(), omp_get_team_num()};
+ rpc_host_call(fn_ptr, &args, sizeof(args_t));
+ }
+}
More information about the Openmp-commits
mailing list