[Openmp-commits] [openmp] d571af7 - [OpenMP][FIX] Ensure thread states do not crash on the GPU
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Sat Oct 21 14:44:32 PDT 2023
Author: Johannes Doerfert
Date: 2023-10-21T14:43:09-07:00
New Revision: d571af7f627491841fab7c456f774d7b8f546159
URL: https://github.com/llvm/llvm-project/commit/d571af7f627491841fab7c456f774d7b8f546159
DIFF: https://github.com/llvm/llvm-project/commit/d571af7f627491841fab7c456f774d7b8f546159.diff
LOG: [OpenMP][FIX] Ensure thread states do not crash on the GPU
The nested parallelism causes thread states which still do not properly
work but at least don't crash anymore.
Added:
openmp/libomptarget/test/offloading/thread_state_1.c
Modified:
openmp/libomptarget/DeviceRTL/include/LibC.h
openmp/libomptarget/DeviceRTL/src/LibC.cpp
openmp/libomptarget/DeviceRTL/src/State.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/include/LibC.h b/openmp/libomptarget/DeviceRTL/include/LibC.h
index 87eed20f529a719..dde86af783af95c 100644
--- a/openmp/libomptarget/DeviceRTL/include/LibC.h
+++ b/openmp/libomptarget/DeviceRTL/include/LibC.h
@@ -17,6 +17,7 @@
extern "C" {
int memcmp(const void *lhs, const void *rhs, size_t count);
+void memset(void *dst, int C, size_t count);
int printf(const char *format, ...);
}
diff --git a/openmp/libomptarget/DeviceRTL/src/LibC.cpp b/openmp/libomptarget/DeviceRTL/src/LibC.cpp
index ae73a644b621f3f..af675b97256f6c9 100644
--- a/openmp/libomptarget/DeviceRTL/src/LibC.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/LibC.cpp
@@ -47,6 +47,12 @@ int memcmp(const void *lhs, const void *rhs, size_t count) {
return 0;
}
+void memset(void *dst, int C, size_t count) {
+ auto *dstc = reinterpret_cast<char *>(dst);
+ for (size_t I = 0; I < count; ++I)
+ dstc[I] = C;
+}
+
/// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf
int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) {
return impl::omp_vprintf(Format, Arguments, Size);
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 68fe0b383548e53..efa0502b82a5cfb 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -12,6 +12,7 @@
#include "Debug.h"
#include "Environment.h"
#include "Interface.h"
+#include "LibC.h"
#include "Mapping.h"
#include "Synchronization.h"
#include "Types.h"
@@ -263,13 +264,14 @@ void state::enterDataEnvironment(IdentTy *Ident) {
return;
unsigned TId = mapping::getThreadIdInBlock();
- ThreadStateTy *NewThreadState =
- static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
+ ThreadStateTy *NewThreadState = static_cast<ThreadStateTy *>(
+ memory::allocGlobal(sizeof(ThreadStateTy), "ThreadStates alloc"));
uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
uint32_t Bytes = sizeof(ThreadStates[0]) * mapping::getMaxTeamThreads();
void *ThreadStatesPtr =
memory::allocGlobal(Bytes, "Thread state array allocation");
+ memset(ThreadStatesPtr, '0', Bytes);
if (!atomic::cas(ThreadStatesBitsPtr, uintptr_t(0),
reinterpret_cast<uintptr_t>(ThreadStatesPtr),
atomic::seq_cst, atomic::seq_cst))
@@ -298,7 +300,7 @@ void state::resetStateForThread(uint32_t TId) {
return;
ThreadStateTy *PreviousThreadState = ThreadStates[TId]->PreviousThreadState;
- __kmpc_free_shared(ThreadStates[TId], sizeof(ThreadStateTy));
+ memory::freeGlobal(ThreadStates[TId], "ThreadStates dealloc");
ThreadStates[TId] = PreviousThreadState;
}
diff --git a/openmp/libomptarget/test/offloading/thread_state_1.c b/openmp/libomptarget/test/offloading/thread_state_1.c
new file mode 100644
index 000000000000000..87251200b907826
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/thread_state_1.c
@@ -0,0 +1,36 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+// RUN: %libomptarget-compileopt-run-and-check-generic
+
+// These are supported and work, but we compute bogus results on the GPU. For
+// now we disable the CPU and enable it once the GPU is fixed.
+//
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ // TODO: Test all ICVs
+ int lvl = 333, tid = 666, nt = 999;
+#pragma omp target teams map(tofrom : lvl, tid, nt) num_teams(2)
+ {
+ if (omp_get_team_num() == 0) {
+#pragma omp parallel num_threads(128)
+ if (omp_get_thread_num() == 17) {
+#pragma omp parallel num_threads(64)
+ if (omp_get_thread_num() == omp_get_num_threads() - 1) {
+ lvl = omp_get_level();
+ tid = omp_get_thread_num();
+ nt = omp_get_num_threads();
+ }
+ }
+ }
+ }
+ // TODO: This is wrong, but at least it doesn't crash
+ // CHECK: lvl: 333, tid: 666, nt: 999
+ printf("lvl: %i, tid: %i, nt: %i\n", lvl, tid, nt);
+ return 0;
+}
More information about the Openmp-commits
mailing list