[libc-commits] [libc] f497611 - [libc][rpc] Allocate locks array within process

Wed May 10 16:42:01 PDT 2023

Author: Jon Chesterfield
Date: 2023-05-11T00:41:51+01:00
New Revision: f497611f436cbf5ae0157edcf498f62a136799cb

URL: https://github.com/llvm/llvm-project/commit/f497611f436cbf5ae0157edcf498f62a136799cb
DIFF: https://github.com/llvm/llvm-project/commit/f497611f436cbf5ae0157edcf498f62a136799cb.diff

LOG: [libc][rpc] Allocate locks array within process

Replaces the globals currently used. Worth changing to a bitmap
before allowing runtime number of ports >> 64. One bit per port is likely
to be cheap enough that sizing for the worst case is always fine, otherwise
in the future we can change to dynamically allocating it.

Reviewed By: jhuber6

Differential Revision: https://reviews.llvm.org/D150309

Added: 
    

Modified: 
    libc/src/__support/RPC/rpc.h
    libc/startup/gpu/amdgpu/start.cpp
    libc/startup/gpu/nvptx/start.cpp
    libc/utils/gpu/loader/Server.h
    libc/utils/gpu/loader/amdgpu/Loader.cpp
    libc/utils/gpu/loader/nvptx/Loader.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h
index ae905bccbc657..d448e190214ff 100644

--- a/libc/src/__support/RPC/rpc.h
+++ b/libc/src/__support/RPC/rpc.h
@@ -106,20 +106,20 @@ template <bool InvertInbox> struct Process {
 
   uint64_t port_count;
   uint32_t lane_size;
-  cpp::Atomic<uint32_t> *lock;
   cpp::Atomic<uint32_t> *inbox;
   cpp::Atomic<uint32_t> *outbox;
   Packet *packet;
 
+  cpp::Atomic<uint32_t> lock[default_port_count] = {0};
+
   /// Initialize the communication channels.
-  LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *lock,
-                         void *inbox, void *outbox, void *packet) {
-    *this = {port_count,
-             lane_size,
-             reinterpret_cast<cpp::Atomic<uint32_t> *>(lock),
-             reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox),
-             reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox),
-             reinterpret_cast<Packet *>(packet)};
+  LIBC_INLINE void reset(uint64_t port_count, uint32_t lane_size, void *inbox,
+                         void *outbox, void *packet) {
+    this->port_count = port_count;
+    this->lane_size = lane_size;
+    this->inbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(inbox);
+    this->outbox = reinterpret_cast<cpp::Atomic<uint32_t> *>(outbox);
+    this->packet = reinterpret_cast<Packet *>(packet);
   }
 
   /// The length of the packet is flexible because the server needs to look up

diff  --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp
index 84adb3b97527b..9761c64cb318f 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp);
 
 namespace __llvm_libc {
 
-static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0};
-
 extern "C" uintptr_t __init_array_start[];
 extern "C" uintptr_t __init_array_end[];
 extern "C" uintptr_t __fini_array_start[];
@@ -44,8 +42,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) {
   // We need to set up the RPC client first in case any of the constructors
   // require it.
   __llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count,
-                                 __llvm_libc::gpu::get_lane_size(),
-                                 &__llvm_libc::lock, in, out, buffer);
+                                 __llvm_libc::gpu::get_lane_size(), in, out,
+                                 buffer);
 
   // We want the fini array callbacks to be run after other atexit
   // callbacks are run. So, we register them before running the init

diff  --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp
index 1d366dc829dfb..78cdc64ed9677 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -15,8 +15,6 @@ extern "C" int main(int argc, char **argv, char **envp);
 
 namespace __llvm_libc {
 
-static cpp::Atomic<uint32_t> lock[rpc::default_port_count] = {0};
-
 extern "C" {
 // Nvidia's 'nvlink' linker does not provide these symbols. We instead need
 // to manually create them and update the globals in the loader implememtation.
@@ -48,8 +46,8 @@ _begin(int argc, char **argv, char **env, void *in, void *out, void *buffer) {
   // We need to set up the RPC client first in case any of the constructors
   // require it.
   __llvm_libc::rpc::client.reset(__llvm_libc::rpc::default_port_count,
-                                 __llvm_libc::gpu::get_lane_size(),
-                                 &__llvm_libc::lock, in, out, buffer);
+                                 __llvm_libc::gpu::get_lane_size(), in, out,
+                                 buffer);
 
   // We want the fini array callbacks to be run after other atexit
   // callbacks are run. So, we register them before running the init

diff  --git a/libc/utils/gpu/loader/Server.h b/libc/utils/gpu/loader/Server.h
index f77bf256618a4..89ef712e85963 100644
--- a/libc/utils/gpu/loader/Server.h
+++ b/libc/utils/gpu/loader/Server.h
@@ -19,9 +19,6 @@
 
 static __llvm_libc::rpc::Server server;
 
-static __llvm_libc::cpp::Atomic<uint32_t>
-    lock[__llvm_libc::rpc::default_port_count] = {0};
-
 /// Queries the RPC client at least once and performs server-side work if there
 /// are any active requests.
 void handle_server() {

diff  --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 07fa1ae7fe161..ad5e021169184 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -359,8 +359,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
   hsa_amd_agents_allow_access(1, &dev_agent, nullptr, buffer);
 
   // Initialize the RPC server's buffer for host-device communication.
-  server.reset(port_size, wavefront_size, &lock, server_inbox, server_outbox,
-               buffer);
+  server.reset(port_size, wavefront_size, server_inbox, server_outbox, buffer);
 
   // Obtain a queue with the minimum (power of two) size, used to send commands
   // to the HSA runtime and launch execution on the device.

diff  --git a/libc/utils/gpu/loader/nvptx/Loader.cpp b/libc/utils/gpu/loader/nvptx/Loader.cpp
index 314f5a8055fb9..2230f55ea24e4 100644
--- a/libc/utils/gpu/loader/nvptx/Loader.cpp
+++ b/libc/utils/gpu/loader/nvptx/Loader.cpp
@@ -260,8 +260,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
     handle_error("Failed to allocate memory the RPC client / server.");
 
   // Initialize the RPC server's buffer for host-device communication.
-  server.reset(port_size, warp_size, &lock, server_inbox, server_outbox,
-               buffer);
+  server.reset(port_size, warp_size, server_inbox, server_outbox, buffer);
 
   LaunchParameters single_threaded_params = {1, 1, 1, 1, 1, 1};
   // Call the kernel to