[libc-commits] [libc] 739e997 - [libc] Remove ballot on slab find (#176606)
via libc-commits
libc-commits at lists.llvm.org
Sat Jan 17 15:40:10 PST 2026
Author: Joseph Huber
Date: 2026-01-17T17:40:06-06:00
New Revision: 739e997c3e7ba4cc8f467d61d26ae64e60e81d70
URL: https://github.com/llvm/llvm-project/commit/739e997c3e7ba4cc8f467d61d26ae64e60e81d70
DIFF: https://github.com/llvm/llvm-project/commit/739e997c3e7ba4cc8f467d61d26ae64e60e81d70.diff
LOG: [libc] Remove ballot on slab find (#176606)
Summary:
This negatively impacts performance, while the other changes in the
initial PR slightly improved it. This was originally done to make Volta
independent thread scheduling work, but that doesn't seem to work
correctly all the time either so we should make this faster.
Added:
Modified:
libc/src/__support/GPU/allocator.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp
index 7182180ab3613..f8a3b464ea220 100644
--- a/libc/src/__support/GPU/allocator.cpp
+++ b/libc/src/__support/GPU/allocator.cpp
@@ -498,20 +498,21 @@ struct GuardPtr {
result = gpu::shuffle(lane_mask, cpp::countr_zero(uniform), result);
count = gpu::shuffle(lane_mask, cpp::countr_zero(uniform), count);
+ if (!result)
+ return nullptr;
+
// We defer storing the newly allocated slab until now so that we can use
// multiple lanes to initialize it and release it for use.
- uint64_t slab_mask =
- gpu::ballot(lane_mask, result && impl::is_sentinel(count));
- if (slab_mask & impl::id_in_mask()) {
- result->initialize(slab_mask, uniform);
+ if (impl::is_sentinel(count)) {
+ uint64_t count_mask = gpu::get_lane_mask();
+ result->initialize(count_mask, uniform);
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
finalize(result, cpp::popcount(uniform), count);
- count = gpu::shuffle(slab_mask, cpp::countr_zero(uniform), count);
+ count = gpu::shuffle(count_mask, cpp::countr_zero(uniform), count);
}
- if (result)
- count = count - cpp::popcount(uniform) +
- impl::lane_count(uniform, gpu::get_lane_id());
+ count = count - cpp::popcount(uniform) +
+ impl::lane_count(uniform, gpu::get_lane_id());
return result;
}
More information about the libc-commits
mailing list