[Openmp-commits] [llvm] [openmp] [Offload] Remove handling for device memory pool (PR #163629)
via Openmp-commits
openmp-commits at lists.llvm.org
Wed Oct 15 14:12:19 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
This was a lot of code that was only used for upstream LLVM builds of
AMDGPU offloading. We have a generic and fast `malloc` in `libc` now so
just use that. Simplifies code, can be added back if we start providing
alternate forms but I don't think there's a single use-case that would
justify it yet.
---
Patch is 22.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163629.diff
15 Files Affected:
- (modified) offload/include/Shared/Environment.h (-22)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (-14)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+9-15)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+1-95)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (-5)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (-8)
- (renamed) offload/test/libc/malloc_parallel.c ()
- (modified) offload/test/mapping/lambda_mapping.cpp (+2)
- (removed) offload/test/offloading/malloc.c (-37)
- (modified) openmp/device/include/Allocator.h (-6)
- (modified) openmp/device/src/Allocator.cpp (+34-33)
- (modified) openmp/device/src/Kernel.cpp (-1)
- (modified) openmp/device/src/Misc.cpp (+2-2)
- (modified) openmp/device/src/State.cpp (+2-22)
- (modified) openmp/docs/design/Runtimes.rst (-1)
``````````diff
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index 2a283bd6fa4ed..79e45fd8e082d 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -21,7 +21,6 @@ enum class DeviceDebugKind : uint32_t {
Assertion = 1U << 0,
FunctionTracing = 1U << 1,
CommonIssues = 1U << 2,
- AllocationTracker = 1U << 3,
PGODump = 1U << 4,
};
@@ -36,27 +35,6 @@ struct DeviceEnvironmentTy {
uint64_t HardwareParallelism;
};
-struct DeviceMemoryPoolTy {
- void *Ptr;
- uint64_t Size;
-};
-
-struct DeviceMemoryPoolTrackingTy {
- uint64_t NumAllocations;
- uint64_t AllocationTotal;
- uint64_t AllocationMin;
- uint64_t AllocationMax;
-
- void combine(DeviceMemoryPoolTrackingTy &Other) {
- NumAllocations += Other.NumAllocations;
- AllocationTotal += Other.AllocationTotal;
- AllocationMin = AllocationMin > Other.AllocationMin ? Other.AllocationMin
- : AllocationMin;
- AllocationMax = AllocationMax < Other.AllocationMax ? Other.AllocationMax
- : AllocationMax;
- }
-};
-
// NOTE: Please don't change the order of those members as their indices are
// used in the middle end. Always add the new data member at the end.
// Different from KernelEnvironmentTy below, this structure contains members
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index a7723b8598815..4d827b3ad31e2 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -3057,17 +3057,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
StackSize = Value;
return Plugin::success();
}
- Error getDeviceHeapSize(uint64_t &Value) override {
- Value = DeviceMemoryPoolSize;
- return Plugin::success();
- }
- Error setDeviceHeapSize(uint64_t Value) override {
- for (DeviceImageTy *Image : LoadedImages)
- if (auto Err = setupDeviceMemoryPool(Plugin, *Image, Value))
- return Err;
- DeviceMemoryPoolSize = Value;
- return Plugin::success();
- }
Error getDeviceMemorySize(uint64_t &Value) override {
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
if (Pool->isGlobal()) {
@@ -3269,9 +3258,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
/// Reference to the host device.
AMDHostDeviceTy &HostDevice;
- /// The current size of the global device memory pool (managed by us).
- uint64_t DeviceMemoryPoolSize = 1L << 29L /*512MB=*/;
-
/// The current size of the stack that will be used in cases where it could
/// not be statically determined.
uint64_t StackSize = 16 * 1024 /* 16 KB */;
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 8c530bba3882c..26723d94199e7 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -816,10 +816,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Error unloadBinary(DeviceImageTy *Image);
virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
- /// Setup the global device memory pool, if the plugin requires one.
- Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
- uint64_t PoolSize);
-
// Setup the RPC server for this device if needed. This may not run on some
// plugins like the CPU targets. By default, it will not be executed so it is
// up to the target to override this using the shouldSetupRPCServer function.
@@ -1061,6 +1057,15 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
virtual Error getDeviceStackSize(uint64_t &V) = 0;
+ virtual Error getDeviceHeapSize(uint64_t &V) {
+ return Plugin::error(error::ErrorCode::UNSUPPORTED,
+ "%s not supported by platform", __func__);
+ }
+ virtual Error setDeviceHeapSize(uint64_t V) {
+ return Plugin::error(error::ErrorCode::UNSUPPORTED,
+ "%s not supported by platform", __func__);
+ }
+
/// Returns true if current plugin architecture is an APU
/// and unified_shared_memory was not requested by the program.
bool useAutoZeroCopy();
@@ -1149,12 +1154,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// plugin can implement the setters as no-op and setting the output
/// value to zero for the getters.
virtual Error setDeviceStackSize(uint64_t V) = 0;
- virtual Error getDeviceHeapSize(uint64_t &V) = 0;
- virtual Error setDeviceHeapSize(uint64_t V) = 0;
-
- /// Indicate whether the device should setup the global device memory pool. If
- /// false is return the value on the device will be uninitialized.
- virtual bool shouldSetupDeviceMemoryPool() const { return true; }
/// Indicate whether or not the device should setup the RPC server. This is
/// only necessary for unhosted targets like the GPU.
@@ -1175,7 +1174,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
Int32Envar OMPX_DebugKind;
UInt32Envar OMPX_SharedMemorySize;
UInt64Envar OMPX_TargetStackSize;
- UInt64Envar OMPX_TargetHeapSize;
/// Environment flag to set the minimum number of threads we use for a
/// low-trip count combined loop. Instead of using more threads we increase
@@ -1229,10 +1227,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// Internal representation for OMPT device (initialize & finalize)
std::atomic<bool> OmptInitialized;
#endif
-
-private:
- DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
- DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
};
/// Class implementing common functionalities of offload plugins. Each plugin
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index db43cbe49cc2b..b0a4113c2c3ec 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -708,7 +708,7 @@ GenericDeviceTy::GenericDeviceTy(GenericPluginTy &Plugin, int32_t DeviceId,
// Do not initialize the following two envars since they depend on the
// device initialization. These cannot be consulted until the device is
// initialized correctly. We initialize them in GenericDeviceTy::init().
- OMPX_TargetStackSize(), OMPX_TargetHeapSize(),
+ OMPX_TargetStackSize(),
// By default, the initial number of streams and events is 1.
OMPX_InitialNumStreams("LIBOMPTARGET_NUM_INITIAL_STREAMS", 1),
OMPX_InitialNumEvents("LIBOMPTARGET_NUM_INITIAL_EVENTS", 1),
@@ -758,14 +758,6 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
return StackSizeEnvarOrErr.takeError();
OMPX_TargetStackSize = std::move(*StackSizeEnvarOrErr);
- auto HeapSizeEnvarOrErr = UInt64Envar::create(
- "LIBOMPTARGET_HEAP_SIZE",
- [this](uint64_t &V) -> Error { return getDeviceHeapSize(V); },
- [this](uint64_t V) -> Error { return setDeviceHeapSize(V); });
- if (!HeapSizeEnvarOrErr)
- return HeapSizeEnvarOrErr.takeError();
- OMPX_TargetHeapSize = std::move(*HeapSizeEnvarOrErr);
-
// Update the maximum number of teams and threads after the device
// initialization sets the corresponding hardware limit.
if (OMP_NumTeams > 0)
@@ -791,19 +783,6 @@ Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
if (auto Err = callGlobalDestructors(Plugin, *Image))
return Err;
- if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
- GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
- DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
- GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
- sizeof(DeviceMemoryPoolTrackingTy),
- &ImageDeviceMemoryPoolTracking);
- if (auto Err =
- GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
- consumeError(std::move(Err));
- }
- DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
- }
-
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
if (!ProfOrErr)
@@ -829,22 +808,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
return Err;
LoadedImages.clear();
- if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
- // TODO: Write this by default into a file.
- printf("\n\n|-----------------------\n"
- "| Device memory tracker:\n"
- "|-----------------------\n"
- "| #Allocations: %lu\n"
- "| Byes allocated: %lu\n"
- "| Minimal allocation: %lu\n"
- "| Maximal allocation: %lu\n"
- "|-----------------------\n\n\n",
- DeviceMemoryPoolTracking.NumAllocations,
- DeviceMemoryPoolTracking.AllocationTotal,
- DeviceMemoryPoolTracking.AllocationMin,
- DeviceMemoryPoolTracking.AllocationMax);
- }
-
// Delete the memory manager before deinitializing the device. Otherwise,
// we may delete device allocations after the device is deinitialized.
if (MemoryManager)
@@ -897,18 +860,6 @@ Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
// Add the image to list.
LoadedImages.push_back(Image);
- // Setup the global device memory pool if needed.
- if (!Plugin.getRecordReplay().isReplaying() &&
- shouldSetupDeviceMemoryPool()) {
- uint64_t HeapSize;
- auto SizeOrErr = getDeviceHeapSize(HeapSize);
- if (SizeOrErr) {
- REPORT("No global device memory pool due to error: %s\n",
- toString(std::move(SizeOrErr)).data());
- } else if (auto Err = setupDeviceMemoryPool(Plugin, *Image, HeapSize))
- return std::move(Err);
- }
-
if (auto Err = setupRPCServer(Plugin, *Image))
return std::move(Err);
@@ -932,51 +883,6 @@ Expected<DeviceImageTy *> GenericDeviceTy::loadBinary(GenericPluginTy &Plugin,
return Image;
}
-Error GenericDeviceTy::setupDeviceMemoryPool(GenericPluginTy &Plugin,
- DeviceImageTy &Image,
- uint64_t PoolSize) {
- // Free the old pool, if any.
- if (DeviceMemoryPool.Ptr) {
- if (auto Err = dataDelete(DeviceMemoryPool.Ptr,
- TargetAllocTy::TARGET_ALLOC_DEVICE))
- return Err;
- }
-
- DeviceMemoryPool.Size = PoolSize;
- auto AllocOrErr = dataAlloc(PoolSize, /*HostPtr=*/nullptr,
- TargetAllocTy::TARGET_ALLOC_DEVICE);
- if (AllocOrErr) {
- DeviceMemoryPool.Ptr = *AllocOrErr;
- } else {
- auto Err = AllocOrErr.takeError();
- REPORT("Failure to allocate device memory for global memory pool: %s\n",
- toString(std::move(Err)).data());
- DeviceMemoryPool.Ptr = nullptr;
- DeviceMemoryPool.Size = 0;
- }
-
- // Create the metainfo of the device environment global.
- GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
- if (!GHandler.isSymbolInImage(*this, Image,
- "__omp_rtl_device_memory_pool_tracker")) {
- DP("Skip the memory pool as there is no tracker symbol in the image.");
- return Error::success();
- }
-
- GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
- sizeof(DeviceMemoryPoolTrackingTy),
- &DeviceMemoryPoolTracking);
- if (auto Err = GHandler.writeGlobalToDevice(*this, Image, TrackerGlobal))
- return Err;
-
- // Create the metainfo of the device environment global.
- GlobalTy DevEnvGlobal("__omp_rtl_device_memory_pool",
- sizeof(DeviceMemoryPoolTy), &DeviceMemoryPool);
-
- // Write device environment values to the device.
- return GHandler.writeGlobalToDevice(*this, Image, DevEnvGlobal);
-}
-
Error GenericDeviceTy::setupRPCServer(GenericPluginTy &Plugin,
DeviceImageTy &Image) {
// The plugin either does not need an RPC server or it is unavailable.
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index db94f7f2dd995..3ef724328916a 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1228,11 +1228,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
return Info;
}
- virtual bool shouldSetupDeviceMemoryPool() const override {
- /// We use the CUDA malloc for now.
- return false;
- }
-
/// Getters and setters for stack and heap sizes.
Error getDeviceStackSize(uint64_t &Value) override {
return getCtxLimit(CU_LIMIT_STACK_SIZE, Value);
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index eb4ecac9907a1..48de1fefa29d6 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -380,9 +380,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
return Info;
}
- /// This plugin should not setup the device environment or memory pool.
- virtual bool shouldSetupDeviceMemoryPool() const override { return false; };
-
/// Getters and setters for stack size and heap size not relevant.
Error getDeviceStackSize(uint64_t &Value) override {
Value = 0;
@@ -391,11 +388,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
Error setDeviceStackSize(uint64_t Value) override {
return Plugin::success();
}
- Error getDeviceHeapSize(uint64_t &Value) override {
- Value = 0;
- return Plugin::success();
- }
- Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); }
private:
/// Grid values for Generic ELF64 plugins.
diff --git a/offload/test/offloading/malloc_parallel.c b/offload/test/libc/malloc_parallel.c
similarity index 100%
rename from offload/test/offloading/malloc_parallel.c
rename to offload/test/libc/malloc_parallel.c
diff --git a/offload/test/mapping/lambda_mapping.cpp b/offload/test/mapping/lambda_mapping.cpp
index 63b1719fbbc36..8e640b7fff3aa 100644
--- a/offload/test/mapping/lambda_mapping.cpp
+++ b/offload/test/mapping/lambda_mapping.cpp
@@ -4,6 +4,8 @@
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic
// RUN: %libomptarget-compileoptxx-run-and-check-generic
+// REQUIRES: libc
+
#include <iostream>
template <typename LOOP_BODY>
diff --git a/offload/test/offloading/malloc.c b/offload/test/offloading/malloc.c
deleted file mode 100644
index 7b98e1f1110e5..0000000000000
--- a/offload/test/offloading/malloc.c
+++ /dev/null
@@ -1,37 +0,0 @@
-// RUN: %libomptarget-compile-generic && %libomptarget-run-generic
-// RUN: %libomptarget-compileopt-generic && %libomptarget-run-generic
-
-#include <stdio.h>
-#include <stdlib.h>
-
-int main() {
- long unsigned *DP = 0;
- int N = 32;
- int Threads = 64;
- int Teams = 10;
-
- // Allocate ~55MB on the device.
-#pragma omp target map(from : DP)
- DP = (long unsigned *)malloc(sizeof(long unsigned) * N * Threads * Teams);
-
-#pragma omp target teams distribute parallel for num_teams(Teams) \
- thread_limit(Threads) is_device_ptr(DP)
- for (int i = 0; i < Threads * Teams; ++i) {
- for (int j = 0; j < N; ++j) {
- DP[i * N + j] = i + j;
- }
- }
-
- long unsigned s = 0;
-#pragma omp target teams distribute parallel for num_teams(Teams) \
- thread_limit(Threads) reduction(+ : s)
- for (int i = 0; i < Threads * Teams; ++i) {
- for (int j = 0; j < N; ++j) {
- s += DP[i * N + j];
- }
- }
-
- // CHECK: Sum: 6860800
- printf("Sum: %li\n", s);
- return 0;
-}
diff --git a/openmp/device/include/Allocator.h b/openmp/device/include/Allocator.h
index dc4d029ed75f3..507ec6327126a 100644
--- a/openmp/device/include/Allocator.h
+++ b/openmp/device/include/Allocator.h
@@ -14,18 +14,12 @@
#include "DeviceTypes.h"
-// Forward declaration.
-struct KernelEnvironmentTy;
-
namespace ompx {
namespace allocator {
static uint64_t constexpr ALIGNMENT = 16;
-/// Initialize the allocator according to \p KernelEnvironment
-void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
-
/// Allocate \p Size bytes.
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
alloc(uint64_t Size);
diff --git a/openmp/device/src/Allocator.cpp b/openmp/device/src/Allocator.cpp
index aac2a6005158e..34c945c979ffb 100644
--- a/openmp/device/src/Allocator.cpp
+++ b/openmp/device/src/Allocator.cpp
@@ -18,42 +18,36 @@
#include "Synchronization.h"
using namespace ompx;
+using namespace allocator;
+
+// Provide a default implementation of malloc / free for AMDGPU platforms built
+// without 'libc' support.
+extern "C" {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+[[gnu::weak]] void *malloc(size_t Size) { return allocator::alloc(Size); }
+[[gnu::weak]] void free(void *Ptr) { allocator::free(Ptr); }
+#else
+[[gnu::leaf]] void *malloc(size_t Size);
+[[gnu::leaf]] void free(void *Ptr);
+#endif
+}
-[[gnu::used, gnu::retain, gnu::weak,
- gnu::visibility(
- "protected")]] DeviceMemoryPoolTy __omp_rtl_device_memory_pool;
-[[gnu::used, gnu::retain, gnu::weak,
- gnu::visibility("protected")]] DeviceMemoryPoolTrackingTy
- __omp_rtl_device_memory_pool_tracker;
+static constexpr uint64_t MEMORY_SIZE = /* 1 MiB */ 1024 * 1024;
+alignas(ALIGNMENT) static uint8_t Memory[MEMORY_SIZE] = {0};
-/// Stateless bump allocator that uses the __omp_rtl_device_memory_pool
-/// directly.
+// Fallback bump pointer interface for platforms without a functioning
+// allocator.
struct BumpAllocatorTy final {
+ uint64_t Offset = 0;
void *alloc(uint64_t Size) {
Size = utils::roundUp(Size, uint64_t(allocator::ALIGNMENT));
- if (config::isDebugMode(DeviceDebugKind::AllocationTracker)) {
- atomic::add(&__omp_rtl_device_memory_pool_tracker.NumAllocations, 1,
- atomic::seq_cst);
- atomic::add(&__omp_rtl_device_memory_pool_tracker.AllocationTotal, Size,
- atomic::seq_cst);
- atomic::min(&__omp_rtl_device_memory_pool_tracker.AllocationMin, Size,
- atomic::seq_cst);
- atomic::max(&__omp_rtl_device_memory_pool_tracker.AllocationMax, Size,
- atomic::seq_cst);
- }
-
- uint64_t *Data =
- reinterpret_cast<uint64_t *>(&__omp_rtl_device_memory_pool.Ptr);
- uint64_t End =
- reinterpret_cast<uint64_t>(Data) + __omp_rtl_device_memory_pool.Size;
-
- uint64_t OldData = atomic::add(Data, Size, atomic::seq_cst);
- if (OldData + Size > End)
+ uint64_t OldData = atomic::add(&Offset, Size, atomic::seq_cst);
+ if (OldData + Size >= MEMORY_SIZE)
__builtin_trap();
- return reinterpret_cast<void *>(OldData);
+ return &Memory[OldData];
}
void free(void *) {}
@@ -65,13 +59,20 @@ BumpAllocatorTy BumpAllocator;
///
///{
-void allocator::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment) {
- // TODO: Check KernelEnvironment for an allocator choice as soon as we have
- // more than one.
+void *allocator::alloc(uint64_t Size) {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+ return BumpAllocator.alloc(Size);
+#else
+ return ::malloc(Size);
+#endif
}
-void *allocator::alloc(uint64_t Size) { return BumpAllocator.alloc(Size); }
-
-void allocator::free(void *Ptr) { BumpAllocator.free(Ptr); }
+void allocator::free(void *Ptr) {
+#if defined(__AMDGPU__) && !defined(OMPTARGET_HAS_LIBC)
+ BumpAllocator.free(Ptr);
+#else
+ ::free(Ptr);
+#endif
+}
///}
diff --git a/openmp/device/src/Kernel.cpp b/openmp/device/src/Kernel.cpp
index 8c2828b270419..05af35d242ac5 100644
--- a/openmp/device/src/Kernel.cpp
+++ b/openmp/device/src/Kernel.cpp
@@ -41,7 +41,6 @@ inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
state::init(IsSPMD, KernelEnvironment, KernelLaunchEnvironment);
- allocator::init(IsSPMD, KernelEnvironment);
workshare::init(IsSPMD);
}
diff --git a/openmp/device/src/Misc.cpp b/openmp/device/src/Misc.cpp
index 563f674d166e5..a53fb4302fdb5 100644
--- a/openmp/device/src/Misc.cpp
+++ b/openmp/device/src/Misc.cpp
@@ -100,7 +100,7 @@ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
cas...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/163629
More information about the Openmp-commits
mailing list