[llvm] [openmp][nfc] Refactor shared/lds smartstack for spirv (PR #131905)
Jon Chesterfield via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 18 13:15:46 PDT 2025
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/131905
Spirv doesn't have implicit conversions between address spaces (at least at present, we might need to change that) and address space qualified *this pointers are not handled well by clang. This commit changes the single instance of the smartstack to be explicitly a singleton, for fractionally simpler IR generation (no this pointer) and to sidestep the work in progress spirv64-- openmp target not being able to compile the original version.
>From 6c80b92649df26a14379104f04f6a7f7be1c9deb Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield at gmail.com>
Date: Tue, 18 Mar 2025 19:59:08 +0000
Subject: [PATCH] [openmp][nfc] Refactor shared/lds smartstack for spirv
---
offload/DeviceRTL/src/State.cpp | 37 +++++++++++++++++++--------------
1 file changed, 21 insertions(+), 16 deletions(-)
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index cbe9735145340..7995b37cdec67 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -75,19 +75,19 @@ extern "C" {
///
struct SharedMemorySmartStackTy {
/// Initialize the stack. Must be called by all threads.
- void init(bool IsSPMD);
+ static void init(bool IsSPMD);
/// Allocate \p Bytes on the stack for the encountering thread. Each thread
/// can call this function.
- void *push(uint64_t Bytes);
+ static void *push(uint64_t Bytes);
/// Deallocate the last allocation made by the encountering thread and pointed
/// to by \p Ptr from the stack. Each thread can call this function.
- void pop(void *Ptr, uint64_t Bytes);
+ static void pop(void *Ptr, uint64_t Bytes);
private:
/// Compute the size of the storage space reserved for a thread.
- uint32_t computeThreadStorageTotal() {
+ static uint32_t computeThreadStorageTotal() {
uint32_t NumLanesInBlock = mapping::getNumberOfThreadsInBlock();
return utils::alignDown((state::SharedScratchpadSize / NumLanesInBlock),
allocator::ALIGNMENT);
@@ -95,23 +95,28 @@ struct SharedMemorySmartStackTy {
/// Return the top address of the warp data stack, that is the first address
/// this warp will allocate memory at next.
- void *getThreadDataTop(uint32_t TId) {
- return &Data[computeThreadStorageTotal() * TId + Usage[TId]];
+ static void *getThreadDataTop(uint32_t TId) {
+ return (void *)&Data[computeThreadStorageTotal() * TId + Usage[TId]];
}
/// The actual storage, shared among all warps.
- [[gnu::aligned(
- allocator::ALIGNMENT)]] unsigned char Data[state::SharedScratchpadSize];
- [[gnu::aligned(
- allocator::ALIGNMENT)]] unsigned char Usage[mapping::MaxThreadsPerTeam];
+
+ [[gnu::aligned(allocator::ALIGNMENT)]] [[clang::loader_uninitialized]]
+ static Local<unsigned char> Data[state::SharedScratchpadSize];
+ [[gnu::aligned(allocator::ALIGNMENT)]] [[clang::loader_uninitialized]]
+ static Local<unsigned char> Usage[mapping::MaxThreadsPerTeam];
};
+Local<unsigned char>
+ SharedMemorySmartStackTy::Data[state::SharedScratchpadSize];
+Local<unsigned char>
+ SharedMemorySmartStackTy::Usage[mapping::MaxThreadsPerTeam];
+
static_assert(state::SharedScratchpadSize / mapping::MaxThreadsPerTeam <= 256,
"Shared scratchpad of this size not supported yet.");
-/// The allocation of a single shared memory scratchpad.
-[[clang::loader_uninitialized]] static Local<SharedMemorySmartStackTy>
- SharedMemorySmartStack;
+/// The single shared memory scratchpad.
+using SharedMemorySmartStack = SharedMemorySmartStackTy;
void SharedMemorySmartStackTy::init(bool IsSPMD) {
Usage[mapping::getThreadIdInBlock()] = 0;
@@ -163,11 +168,11 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
void *memory::allocShared(uint64_t Bytes, const char *Reason) {
- return SharedMemorySmartStack.push(Bytes);
+ return SharedMemorySmartStack::push(Bytes);
}
void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) {
- SharedMemorySmartStack.pop(Ptr, Bytes);
+ SharedMemorySmartStack::pop(Ptr, Bytes);
}
void *memory::allocGlobal(uint64_t Bytes, const char *Reason) {
@@ -247,7 +252,7 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
- SharedMemorySmartStack.init(IsSPMD);
+ SharedMemorySmartStack::init(IsSPMD);
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
TeamState.init(IsSPMD);
ThreadStates = nullptr;
More information about the llvm-commits
mailing list