[Openmp-commits] [llvm] [openmp] [OpenMP] Implement 'omp_alloc' on the device (PR #102526)
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Mon Aug 12 16:30:46 PDT 2024
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/102526
>From 4fb6ed900bb2a8641037ca1157cee009738998be Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 8 Aug 2024 14:21:27 -0500
Subject: [PATCH] [OpenMP] Implement 'omp_alloc' on the device
Summary:
The 'omp_alloc' function should be callable from a target region. This
patch implemets it by simply calling `malloc` for every non-default
trait value allocator. All the special access modifiers are
unimplemented and return null. The null allocator returns null as the
spec states it should not be usable from the target.
---
offload/DeviceRTL/include/Allocator.h | 5 +++++
offload/DeviceRTL/include/Types.h | 2 +-
offload/DeviceRTL/src/Misc.cpp | 28 +++++++++++++++++++++++++++
offload/DeviceRTL/src/State.cpp | 4 ++--
offload/test/api/omp_device_alloc.c | 25 ++++++++++++++++++++++++
openmp/docs/design/Runtimes.rst | 8 ++++++++
6 files changed, 69 insertions(+), 3 deletions(-)
create mode 100644 offload/test/api/omp_device_alloc.c
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h
index a28eb0fb2977ea..23e0106c80a2c8 100644
--- a/offload/DeviceRTL/include/Allocator.h
+++ b/offload/DeviceRTL/include/Allocator.h
@@ -39,6 +39,11 @@ void free(void *Ptr);
} // namespace ompx
+extern "C" {
+[[gnu::weak]] void *malloc(size_t Size);
+[[gnu::weak]] void free(void *Ptr);
+}
+
#pragma omp end declare target
#endif
diff --git a/offload/DeviceRTL/include/Types.h b/offload/DeviceRTL/include/Types.h
index 2e12d9da0353b7..cd8f925a392a80 100644
--- a/offload/DeviceRTL/include/Types.h
+++ b/offload/DeviceRTL/include/Types.h
@@ -188,7 +188,7 @@ typedef enum omp_allocator_handle_t {
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
- KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
+ KMP_ALLOCATOR_MAX_HANDLE = ~(0LU)
} omp_allocator_handle_t;
#define __PRAGMA(STR) _Pragma(#STR)
diff --git a/offload/DeviceRTL/src/Misc.cpp b/offload/DeviceRTL/src/Misc.cpp
index c24af9442d16e3..ce4a221bdb37dd 100644
--- a/offload/DeviceRTL/src/Misc.cpp
+++ b/offload/DeviceRTL/src/Misc.cpp
@@ -9,6 +9,7 @@
//
//===----------------------------------------------------------------------===//
+#include "Allocator.h"
#include "Configuration.h"
#include "Types.h"
@@ -128,6 +129,33 @@ double omp_get_wtime(void) { return ompx::impl::getWTime(); }
void *__llvm_omp_indirect_call_lookup(void *HstPtr) {
return ompx::impl::indirectCallLookup(HstPtr);
}
+
+void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
+ switch (allocator) {
+ case omp_default_mem_alloc:
+ case omp_large_cap_mem_alloc:
+ case omp_const_mem_alloc:
+ case omp_high_bw_mem_alloc:
+ case omp_low_lat_mem_alloc:
+ return malloc(size);
+ default:
+ return nullptr;
+ }
+}
+
+void omp_free(void *ptr, omp_allocator_handle_t allocator) {
+ switch (allocator) {
+ case omp_default_mem_alloc:
+ case omp_large_cap_mem_alloc:
+ case omp_const_mem_alloc:
+ case omp_high_bw_mem_alloc:
+ case omp_low_lat_mem_alloc:
+ free(ptr);
+ case omp_null_allocator:
+ default:
+ return;
+ }
+}
}
///}
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index a1e4fa2449d9a2..f43f2cedb431d0 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -53,12 +53,12 @@ namespace {
extern "C" {
#ifdef __AMDGPU__
-[[gnu::weak]] void *malloc(uint64_t Size) { return allocator::alloc(Size); }
+[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
#else
-[[gnu::weak, gnu::leaf]] void *malloc(uint64_t Size);
+[[gnu::weak, gnu::leaf]] void *malloc(size_t Size);
[[gnu::weak, gnu::leaf]] void free(void *Ptr);
#endif
diff --git a/offload/test/api/omp_device_alloc.c b/offload/test/api/omp_device_alloc.c
new file mode 100644
index 00000000000000..46153a30e2e304
--- /dev/null
+++ b/offload/test/api/omp_device_alloc.c
@@ -0,0 +1,25 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+#pragma omp target teams num_teams(4)
+#pragma omp parallel
+ {
+ int *ptr = (int *)omp_alloc(sizeof(int), omp_default_mem_alloc);
+ assert(ptr && "Ptr is (null)!");
+ *ptr = 1;
+ assert(*ptr == 1 && "Ptr is not 1");
+ omp_free(ptr, omp_default_mem_alloc);
+ }
+
+#pragma omp target
+ {
+ assert(!omp_alloc(sizeof(int), omp_null_allocator) && "Ptr is not (null)!");
+ }
+
+ // CHECK: PASS
+ printf("PASS\n");
+}
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index ed002c8cf0f807..951c651f42f29d 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -1496,6 +1496,14 @@ clause. Examples for both are given below.
$ clang++ -fopenmp --offload-arch=gfx90a -O3 shared.c
$ env ./shared
+.. _libomptarget_device_allocator:
+
+Device Allocation
+^^^^^^^^^^^^^^^^^
+
+The device runtime supports basic runtime allocation via the ``omp_alloc``
+function. Currently, this allocates global memory for all default traits. Access
+modifiers are currently not supported and return a null pointer.
.. _libomptarget_device_debugging:
More information about the Openmp-commits
mailing list