[libc-commits] [libc] 7fd9f0f - [libc] Remove `MAX_LANE_SIZE` definition from the RPC server

Wed Aug 23 10:10:15 PDT 2023

Author: Joseph Huber
Date: 2023-08-23T12:09:30-05:00
New Revision: 7fd9f0f4e0cb0ccdc39d5ed9485569566b61b720

URL: https://github.com/llvm/llvm-project/commit/7fd9f0f4e0cb0ccdc39d5ed9485569566b61b720
DIFF: https://github.com/llvm/llvm-project/commit/7fd9f0f4e0cb0ccdc39d5ed9485569566b61b720.diff

LOG: [libc] Remove `MAX_LANE_SIZE` definition from the RPC server

This `MAX_LANE_SIZE` was a hack from the days when we used a single
instance of the server and had some GPU state handle it. Now that we
have everything templated this really shouldn't be used. This patch
removes its use and replaces it with template arguments.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D158633

Added: 
    

Modified: 
    libc/src/__support/RPC/rpc_util.h
    libc/utils/gpu/loader/Loader.h
    libc/utils/gpu/loader/amdgpu/Loader.cpp
    libc/utils/gpu/loader/nvptx/Loader.cpp
    libc/utils/gpu/server/rpc_server.cpp
    libc/utils/gpu/server/rpc_server.h

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/RPC/rpc_util.h b/libc/src/__support/RPC/rpc_util.h
index 381bfaf261594d..1e2c53880cb747 100644

--- a/libc/src/__support/RPC/rpc_util.h
+++ b/libc/src/__support/RPC/rpc_util.h
@@ -17,9 +17,6 @@
 namespace __llvm_libc {
 namespace rpc {
 
-/// Maximum amount of data a single lane can use.
-constexpr uint64_t MAX_LANE_SIZE = 64;
-
 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
 LIBC_INLINE void sleep_briefly() {
 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700

diff  --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h
index 349045448b394a..4eef88bf0463c1 100644
--- a/libc/utils/gpu/loader/Loader.h
+++ b/libc/utils/gpu/loader/Loader.h
@@ -107,7 +107,9 @@ inline void handle_error(rpc_status_t) {
   handle_error("Failure in the RPC server\n");
 }
 
+template <uint32_t lane_size>
 inline void register_rpc_callbacks(uint32_t device_id) {
+  static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size");
   // Register the ping test for the `libc` tests.
   rpc_register_callback(
       device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
@@ -207,14 +209,14 @@ inline void register_rpc_callbacks(uint32_t device_id) {
   rpc_register_callback(
       device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
       [](rpc_port_t port, void *data) {
-        uint64_t sizes[RPC_MAXIMUM_LANE_SIZE] = {0};
-        void *dst[RPC_MAXIMUM_LANE_SIZE] = {nullptr};
+        uint64_t sizes[lane_size] = {0};
+        void *dst[lane_size] = {nullptr};
         rpc_recv_n(
             port, dst, sizes,
             [](uint64_t size, void *) -> void * { return new char[size]; },
             nullptr);
         rpc_send_n(port, dst, sizes);
-        for (uint64_t i = 0; i < RPC_MAXIMUM_LANE_SIZE; ++i) {
+        for (uint64_t i = 0; i < lane_size; ++i) {
           if (dst[i])
             delete[] reinterpret_cast<uint8_t *>(dst[i]);
         }

diff  --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index baf7f0b5ec13bf..2243d1974f77b1 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -150,8 +150,6 @@ hsa_status_t launch_kernel(hsa_agent_t dev_agent, hsa_executable_t executable,
 
   // Register RPC callbacks for the malloc and free functions on HSA.
   uint32_t device_id = 0;
-  register_rpc_callbacks(device_id);
-
   auto tuple = std::make_tuple(dev_agent, coarsegrained_pool);
   rpc_register_callback(
       device_id, RPC_MALLOC,
@@ -424,6 +422,14 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
                                          wavefront_size, rpc_alloc, &tuple))
     handle_error(err);
 
+  // Register callbacks for the RPC unit tests.
+  if (wavefront_size == 32)
+    register_rpc_callbacks<32>(device_id);
+  else if (wavefront_size == 64)
+    register_rpc_callbacks<64>(device_id);
+  else
+    handle_error("Invalid wavefront size");
+
   // Obtain the GPU's fixed-frequency clock rate and copy it to the GPU.
   // If the clock_freq symbol is missing, no work to do.
   hsa_executable_symbol_t freq_sym;

diff  --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
index ff70cd10d75a90..8b2132bc3c6e6a 100644
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -177,7 +177,7 @@ CUresult launch_kernel(CUmodule binary, CUstream stream,
 
   // Register RPC callbacks for the malloc and free functions on HSA.
   uint32_t device_id = 0;
-  register_rpc_callbacks(device_id);
+  register_rpc_callbacks<32>(device_id);
 
   rpc_register_callback(
       device_id, RPC_MALLOC,

diff  --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp
index 7ae14bac1c5b53..2a5494f7d3e978 100644
--- a/libc/utils/gpu/server/rpc_server.cpp
+++ b/libc/utils/gpu/server/rpc_server.cpp
@@ -26,9 +26,6 @@ static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer),
 static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT,
               "Incorrect maximum port count");
 
-static_assert(RPC_MAXIMUM_LANE_SIZE == rpc::MAX_LANE_SIZE,
-              "Incorrect maximum port count");
-
 // The client needs to support 
diff erent lane sizes for the SIMT model. Because
 // of this we need to select between the possible sizes that the client can use.
 struct Server {
@@ -80,9 +77,9 @@ struct Server {
     case RPC_WRITE_TO_STREAM:
     case RPC_WRITE_TO_STDERR:
     case RPC_WRITE_TO_STDOUT: {
-      uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
-      void *strs[rpc::MAX_LANE_SIZE] = {nullptr};
-      FILE *files[rpc::MAX_LANE_SIZE] = {nullptr};
+      uint64_t sizes[lane_size] = {0};
+      void *strs[lane_size] = {nullptr};
+      FILE *files[lane_size] = {nullptr};
       if (port->get_opcode() == RPC_WRITE_TO_STREAM)
         port->recv([&](rpc::Buffer *buffer, uint32_t id) {
           files[id] = reinterpret_cast<FILE *>(buffer->data[0]);
@@ -96,18 +93,15 @@ struct Server {
                                                              : files[id]);
         uint64_t ret = fwrite(strs[id], 1, sizes[id], file);
         std::memcpy(buffer->data, &ret, sizeof(uint64_t));
+        delete[] reinterpret_cast<uint8_t *>(strs[id]);
       });
-      for (uint64_t i = 0; i < rpc::MAX_LANE_SIZE; ++i) {
-        if (strs[i])
-          delete[] reinterpret_cast<uint8_t *>(strs[i]);
-      }
       break;
     }
     case RPC_READ_FROM_STREAM:
     case RPC_READ_FROM_STDIN: {
-      uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
-      void *data[rpc::MAX_LANE_SIZE] = {nullptr};
-      uint64_t rets[rpc::MAX_LANE_SIZE] = {0};
+      uint64_t sizes[lane_size] = {0};
+      void *data[lane_size] = {nullptr};
+      uint64_t rets[lane_size] = {0};
       port->recv([&](rpc::Buffer *buffer, uint32_t id) {
         sizes[id] = buffer->data[0];
         data[id] = new char[sizes[id]];
@@ -124,8 +118,8 @@ struct Server {
       break;
     }
     case RPC_OPEN_FILE: {
-      uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
-      void *paths[rpc::MAX_LANE_SIZE] = {nullptr};
+      uint64_t sizes[lane_size] = {0};
+      void *paths[lane_size] = {nullptr};
       port->recv_n(paths, sizes, [&](uint64_t size) { return new char[size]; });
       port->recv_and_send([&](rpc::Buffer *buffer, uint32_t id) {
         FILE *file = fopen(reinterpret_cast<char *>(paths[id]),
@@ -152,8 +146,8 @@ struct Server {
       break;
     }
     case RPC_HOST_CALL: {
-      uint64_t sizes[rpc::MAX_LANE_SIZE] = {0};
-      void *args[rpc::MAX_LANE_SIZE] = {nullptr};
+      uint64_t sizes[lane_size] = {0};
+      void *args[lane_size] = {nullptr};
       port->recv_n(args, sizes, [&](uint64_t size) { return new char[size]; });
       port->recv([&](rpc::Buffer *buffer, uint32_t id) {
         reinterpret_cast<void (*)(void *)>(buffer->data[0])(args[id]);

diff  --git a/libc/utils/gpu/server/rpc_server.h b/libc/utils/gpu/server/rpc_server.h
index c0c70d67bb13eb..f4f4f31265843b 100644
--- a/libc/utils/gpu/server/rpc_server.h
+++ b/libc/utils/gpu/server/rpc_server.h
@@ -20,9 +20,6 @@ extern "C" {
 /// The maxium number of ports that can be opened for any server.
 const uint64_t RPC_MAXIMUM_PORT_COUNT = 512;
 
-/// The maximum number of parallel lanes that we can support.
-const uint64_t RPC_MAXIMUM_LANE_SIZE = 64;
-
 /// The symbol name associated with the client for use with the LLVM C library
 /// implementation.
 const char *const rpc_client_symbol_name = "__llvm_libc_rpc_client";