[libc-commits] [libc] 0058952 - [libc] Simplifiy slab waiting in GPU memory allocator (#152872)
via libc-commits
libc-commits at lists.llvm.org
Mon Aug 11 11:11:42 PDT 2025
Author: Joseph Huber
Date: 2025-08-11T13:11:39-05:00
New Revision: 005895290d5925c5e7458418ed75d9c2dad3adf0
URL: https://github.com/llvm/llvm-project/commit/005895290d5925c5e7458418ed75d9c2dad3adf0
DIFF: https://github.com/llvm/llvm-project/commit/005895290d5925c5e7458418ed75d9c2dad3adf0.diff
LOG: [libc] Simplifiy slab waiting in GPU memory allocator (#152872)
Summary:
This moves the waiting to be done inside of the `try_lock` routine
instead. This makes the logic much simpler since it's just a single loop
on a load. We should have the same effect here, and since we don't care
about this being a generic interface it shouldn't matter that it waits
abit. Still wait free since it's guaranteed to make progress
*eventually*.
Added:
Modified:
libc/src/__support/GPU/allocator.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 534a309fec7b4..3da339c63ccd0 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -166,7 +166,11 @@ static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) {
// We use a sentinal value to indicate a failed or in-progress allocation.
template <typename T> bool is_sentinel(const T &x) {
- return x == cpp::numeric_limits<T>::max();
+ if constexpr (cpp::is_pointer_v<T>)
+ return reinterpret_cast<uintptr_t>(x) ==
+ cpp::numeric_limits<uintptr_t>::max();
+ else
+ return x == cpp::numeric_limits<T>::max();
}
} // namespace impl
@@ -446,7 +450,13 @@ struct GuardPtr {
return new (raw) Slab(cpp::forward<Args>(args)...);
}
- if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected)))
+ // If there is a slab allocation in progress we retry a few times.
+ for (uint32_t t = 0; impl::is_sentinel(expected) && t < MAX_TRIES; ++t) {
+ sleep_briefly();
+ expected = ptr.load(cpp::MemoryOrder::RELAXED);
+ }
+
+ if (!expected || impl::is_sentinel(expected))
return nullptr;
if (!ref.acquire(n, count))
@@ -557,16 +567,6 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform,
Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask,
reserved, chunk_size, index);
- // If there is a slab allocation in progress we retry a few times.
- for (uint32_t retries = 0;
- !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES;
- retries++) {
- uint64_t lane_mask = gpu::get_lane_mask();
- slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved,
- chunk_size, index);
- sleep_briefly();
- }
-
// If we find a slab with a matching chunk size then we store the result.
// Otherwise, we need to free the claimed lock and continue. In the case
// of out-of-memory we receive a sentinel value and return a failure.
More information about the libc-commits
mailing list