[libc-commits] [PATCH] D153571: [libc] Add memory fences to device-local locking calls

Jon Chesterfield via Phabricator via libc-commits libc-commits at lists.llvm.org
Thu Jun 22 09:30:15 PDT 2023


JonChesterfield created this revision.
JonChesterfield added a reviewer: jhuber6.
Herald added projects: libc-project, All.
Herald added a subscriber: libc-commits.
JonChesterfield requested review of this revision.

This makes the interface less error prone. The acquire was previously
forgotten. Release is currently missing if recv() is the last operation made
before close.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D153571

Files:
  libc/src/__support/RPC/rpc.h


Index: libc/src/__support/RPC/rpc.h
===================================================================
--- libc/src/__support/RPC/rpc.h
+++ libc/src/__support/RPC/rpc.h
@@ -184,13 +184,25 @@
     //
     // mask != packed implies at least one of the threads got the lock
     // atomic semantics of fetch_or mean at most one of the threads for the lock
-    return lane_mask != packed;
+    bool holding_lock = lane_mask != packed;
+    if (holding_lock) {
+      // Then the caller can load values knowing said loads won't move past
+      // the lock. No such guarantee is needed if the lock acquire failed.
+      // This conditional branch is expected to fold in the caller after
+      // inlining the current function.
+      atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
+    }
+
+    return holding_lock;
   }
 
   /// Unlock the lock at index. We need a lane sync to keep this function
   /// convergent, otherwise the compiler will sink the store and deadlock.
   [[clang::convergent]] LIBC_INLINE void unlock(uint64_t lane_mask,
                                                 uint64_t index) {
+    // Do not move any writes past the unlock
+    atomic_thread_fence(cpp::MemoryOrder::RELEASE);
+
     // Wait for other threads in the warp to finish using the lock
     gpu::sync_lane(lane_mask);
 
@@ -219,7 +231,8 @@
   LIBC_INLINE void invoke_rpc(cpp::function<void(Buffer *, uint32_t)> fn,
                               Packet<lane_size> &packet) {
     if constexpr (is_process_gpu()) {
-      fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id());
+
+        fn(&packet.payload.slot[gpu::get_lane_id()], gpu::get_lane_id());
     } else {
       for (uint32_t i = 0; i < lane_size; i += gpu::get_lane_size())
         if (packet.header.mask & 1ul << i)
@@ -269,6 +282,7 @@
   LIBC_INLINE Port(Port &&) = default;
   LIBC_INLINE Port &operator=(Port &&) = default;
 
+  
   friend struct Client;
   template <uint32_t U> friend struct Server;
   friend class cpp::optional<Port<T, S>>;
@@ -479,9 +493,6 @@
     if (!this->try_lock(lane_mask, index))
       continue;
 
-    // The mailbox state must be read with the lock held.
-    atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
-
     uint32_t in = this->load_inbox(index);
     uint32_t out = this->load_outbox(index);
 
@@ -528,13 +539,9 @@
 
     // Attempt to acquire the lock on this index.
     uint64_t lane_mask = gpu::get_lane_mask();
-    // Attempt to acquire the lock on this index.
     if (!this->try_lock(lane_mask, index))
       continue;
 
-    // The mailbox state must be read with the lock held.
-    atomic_thread_fence(cpp::MemoryOrder::ACQUIRE);
-
     in = this->load_inbox(index);
     out = this->load_outbox(index);
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D153571.533654.patch
Type: text/x-patch
Size: 2731 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20230622/59856b1d/attachment.bin>


More information about the libc-commits mailing list