[Openmp-commits] [openmp] 23bc343 - [Libomptarget] Change device free routines to accept the allocation kind

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Wed Sep 14 10:14:27 PDT 2022


Author: Joseph Huber
Date: 2022-09-14T12:14:07-05:00
New Revision: 23bc343855fdf6fb7668abadf2b064034b207981

URL: https://github.com/llvm/llvm-project/commit/23bc343855fdf6fb7668abadf2b064034b207981
DIFF: https://github.com/llvm/llvm-project/commit/23bc343855fdf6fb7668abadf2b064034b207981.diff

LOG: [Libomptarget] Change device free routines to accept the allocation kind

Previous support for device memory allocators used a single free
routine and did not provide the original kind of the allocation. This is
problematic as some of these memory types required different handling.
Previously this was worked around using a map in runtime to record the
original kind of each pointer. Instead, this patch introduces new free
routines similar to the existing allocation routines. This allows us to
avoid a map traversal every time we free a device pointer.

The only interfaces defined by the standard are `omp_target_alloc` and
`omp_target_free`, these do not take a kind as `omp_alloc` does. The
standard dictates the following:

"The omp_target_alloc routine returns a device pointer that references
the device address of a storage location of size bytes. The storage
location is dynamically allocated in the device data environment of the
device specified by device_num."

Which suggests that these routines only allocate the default device
memory for the kind. So this has been changed to reflect this. This
change is somewhat breaking if users were using `omp_target_free` as
previously shown in the tests.

Reviewed By: JonChesterfield, tianshilei1992

Differential Revision: https://reviews.llvm.org/D133053

Added: 
    

Modified: 
    openmp/libomptarget/include/device.h
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/include/omptargetplugin.h
    openmp/libomptarget/include/rtl.h
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
    openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h
    openmp/libomptarget/plugins/cuda/src/rtl.cpp
    openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
    openmp/libomptarget/plugins/remote/src/rtl.cpp
    openmp/libomptarget/plugins/ve/src/rtl.cpp
    openmp/libomptarget/src/api.cpp
    openmp/libomptarget/src/device.cpp
    openmp/libomptarget/src/exports
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/src/private.h
    openmp/libomptarget/test/api/omp_device_managed_memory.c
    openmp/libomptarget/test/api/omp_host_pinned_memory.c
    openmp/runtime/src/kmp_alloc.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index 5141899a19fa9..8a38634e4f777 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -411,8 +411,9 @@ struct DeviceTy {
   void *allocData(int64_t Size, void *HstPtr = nullptr,
                   int32_t Kind = TARGET_ALLOC_DEFAULT);
   /// Deallocates memory which \p TgtPtrBegin points at and returns
-  /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails.
-  int32_t deleteData(void *TgtPtrBegin);
+  /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. p Kind dictates what
+  /// allocator should be used (host, shared, device).
+  int32_t deleteData(void *TgtPtrBegin, int32_t Kind = TARGET_ALLOC_DEFAULT);
 
   // Data transfer. When AsyncInfo is nullptr, the transfer will be
   // synchronous.

diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index 8d4adc7a0d0ac..c805ce79367c2 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -247,6 +247,12 @@ void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum);
 void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum);
 void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum);
 
+/// Explicit target memory deallocators
+/// Using the llvm_ prefix until they become part of the OpenMP standard.
+void llvm_omp_target_free_device(void *DevicePtr, int DeviceNum);
+void llvm_omp_target_free_host(void *DevicePtr, int DeviceNum);
+void llvm_omp_target_free_shared(void *DevicePtr, int DeviceNum);
+
 /// Dummy target so we have a symbol for generating host fallback.
 void *llvm_omp_target_dynamic_shared_alloc();
 

diff  --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h
index c036eb7ced756..476f459bb608d 100644
--- a/openmp/libomptarget/include/omptargetplugin.h
+++ b/openmp/libomptarget/include/omptargetplugin.h
@@ -117,8 +117,9 @@ int32_t __tgt_rtl_data_exchange_async(int32_t SrcID, void *SrcPtr,
                                       __tgt_async_info *AsyncInfo);
 
 // De-allocate the data referenced by target ptr on the device. In case of
-// success, return zero. Otherwise, return an error code.
-int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr);
+// success, return zero. Otherwise, return an error code. Kind dictates what
+// allocator to use (e.g. shared, host, device).
+int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr, int32_t Kind);
 
 // Transfer control to the offloaded entry Entry on the target device.
 // Args and Offsets are arrays of NumArgs size of target addresses and

diff  --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h
index 186b37cde6bd6..4a0aec66b29a8 100644
--- a/openmp/libomptarget/include/rtl.h
+++ b/openmp/libomptarget/include/rtl.h
@@ -49,7 +49,7 @@ struct RTLInfoTy {
   typedef int32_t(data_exchange_ty)(int32_t, void *, int32_t, void *, int64_t);
   typedef int32_t(data_exchange_async_ty)(int32_t, void *, int32_t, void *,
                                           int64_t, __tgt_async_info *);
-  typedef int32_t(data_delete_ty)(int32_t, void *);
+  typedef int32_t(data_delete_ty)(int32_t, void *, int32_t);
   typedef int32_t(run_region_ty)(int32_t, void *, void **, ptr
diff _t *,
                                  int32_t);
   typedef int32_t(run_region_async_ty)(int32_t, void *, void **, ptr
diff _t *,

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index da738713ef6b5..3fcb8a74dc3b6 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -2606,7 +2606,7 @@ int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr,
   return dataRetrieve(DeviceId, HstPtr, TgtPtr, Size, AsyncInfo);
 }
 
-int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr) {
+int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr, int32_t) {
   assert(DeviceId < DeviceInfo().NumberOfDevices && "Device ID too large");
   // HSA can free pointers allocated from 
diff erent types of memory pool.
   hsa_status_t Err;

diff  --git a/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h b/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h
index f4b63f301c204..8e97fe5c3a5c4 100644
--- a/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h
+++ b/openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h
@@ -31,10 +31,11 @@ class DeviceAllocatorTy {
 
   /// Allocate a memory of size \p Size . \p HstPtr is used to assist the
   /// allocation.
-  virtual void *allocate(size_t Size, void *HstPtr, TargetAllocTy Kind) = 0;
+  virtual void *allocate(size_t Size, void *HstPtr,
+                         TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
 
   /// Delete the pointer \p TgtPtr on the device
-  virtual int free(void *TgtPtr) = 0;
+  virtual int free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0;
 };
 
 /// Class of memory manager. The memory manager is per-device by using

diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index a52e03000bee0..5f249ce9abb3a 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -20,7 +20,6 @@
 #include <memory>
 #include <mutex>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "Debug.h"
@@ -366,8 +365,6 @@ class DeviceRTLTy {
   /// A class responsible for interacting with device native runtime library to
   /// allocate and free memory.
   class CUDADeviceAllocatorTy : public DeviceAllocatorTy {
-    std::unordered_map<void *, TargetAllocTy> HostPinnedAllocs;
-
   public:
     void *allocate(size_t Size, void *, TargetAllocTy Kind) override {
       if (Size == 0)
@@ -390,7 +387,6 @@ class DeviceRTLTy {
         MemAlloc = HostPtr;
         if (!checkResult(Err, "Error returned from cuMemAllocHost\n"))
           return nullptr;
-        HostPinnedAllocs[MemAlloc] = Kind;
         break;
       case TARGET_ALLOC_SHARED:
         CUdeviceptr SharedPtr;
@@ -404,13 +400,9 @@ class DeviceRTLTy {
       return MemAlloc;
     }
 
-    int free(void *TgtPtr) override {
+    int free(void *TgtPtr, TargetAllocTy Kind) override {
       CUresult Err;
       // Host pinned memory must be freed 
diff erently.
-      TargetAllocTy Kind =
-          (HostPinnedAllocs.find(TgtPtr) == HostPinnedAllocs.end())
-              ? TARGET_ALLOC_DEFAULT
-              : TARGET_ALLOC_HOST;
       switch (Kind) {
       case TARGET_ALLOC_DEFAULT:
       case TARGET_ALLOC_DEVICE:
@@ -1102,11 +1094,23 @@ class DeviceRTLTy {
     return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
   }
 
-  int dataDelete(const int DeviceId, void *TgtPtr) {
-    if (UseMemoryManager)
-      return MemoryManagers[DeviceId]->free(TgtPtr);
+  int dataDelete(const int DeviceId, void *TgtPtr, TargetAllocTy Kind) {
+    switch (Kind) {
+    case TARGET_ALLOC_DEFAULT:
+    case TARGET_ALLOC_DEVICE:
+      if (UseMemoryManager)
+        return MemoryManagers[DeviceId]->free(TgtPtr);
+      else
+        return DeviceAllocators[DeviceId].free(TgtPtr, Kind);
+    case TARGET_ALLOC_HOST:
+    case TARGET_ALLOC_SHARED:
+      return DeviceAllocators[DeviceId].free(TgtPtr, Kind);
+    }
 
-    return DeviceAllocators[DeviceId].free(TgtPtr);
+    REPORT("Invalid target data allocation kind or requested allocator not "
+           "implemented yet\n");
+
+    return OFFLOAD_FAIL;
   }
 
   int runTargetTeamRegion(const int DeviceId, void *TgtEntryPtr, void **TgtArgs,
@@ -1699,13 +1703,13 @@ int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr,
   return __tgt_rtl_synchronize(SrcDevId, &AsyncInfo);
 }
 
-int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
+int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t Kind) {
   assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid");
 
   if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS)
     return OFFLOAD_FAIL;
 
-  return DeviceRTL.dataDelete(DeviceId, TgtPtr);
+  return DeviceRTL.dataDelete(DeviceId, TgtPtr, (TargetAllocTy)Kind);
 }
 
 int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr,

diff  --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
index 7987b142cfbd7..9622278428c0f 100644
--- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp
@@ -232,7 +232,7 @@ int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
   return OFFLOAD_SUCCESS;
 }
 
-int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
+int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t) {
   free(TgtPtr);
   return OFFLOAD_SUCCESS;
 }

diff  --git a/openmp/libomptarget/plugins/remote/src/rtl.cpp b/openmp/libomptarget/plugins/remote/src/rtl.cpp
index e80f55c6245b7..bafb7afbdf92b 100644
--- a/openmp/libomptarget/plugins/remote/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/remote/src/rtl.cpp
@@ -93,7 +93,7 @@ int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr,
   return Manager->dataRetrieve(DeviceId, HstPtr, TgtPtr, Size);
 }
 
-int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) {
+int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr, int32_t) {
   return Manager->dataDelete(DeviceId, TgtPtr);
 }
 

diff  --git a/openmp/libomptarget/plugins/ve/src/rtl.cpp b/openmp/libomptarget/plugins/ve/src/rtl.cpp
index 8772f60005548..76bd93e4fa53f 100644
--- a/openmp/libomptarget/plugins/ve/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/ve/src/rtl.cpp
@@ -392,7 +392,7 @@ int32_t __tgt_rtl_data_retrieve(int32_t ID, void *HostPtr, void *TargetPtr,
 
 // De-allocate the data referenced by target ptr on the device. In case of
 // success, return zero. Otherwise, return an error code.
-int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr) {
+int32_t __tgt_rtl_data_delete(int32_t ID, void *TargetPtr, int32_t) {
   int ret = veo_free_mem(DeviceInfo.ProcHandles[ID], (uint64_t)TargetPtr);
 
   if (ret != 0) {

diff  --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp
index 10bf242db0f99..d65ca0b02864d 100644
--- a/openmp/libomptarget/src/api.cpp
+++ b/openmp/libomptarget/src/api.cpp
@@ -62,34 +62,25 @@ EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) {
   return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__);
 }
 
-EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; }
-EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
-
-EXTERN void omp_target_free(void *DevicePtr, int DeviceNum) {
-  TIMESCOPE();
-  DP("Call to omp_target_free for device %d and address " DPxMOD "\n",
-     DeviceNum, DPxPTR(DevicePtr));
-
-  if (!DevicePtr) {
-    DP("Call to omp_target_free with NULL ptr\n");
-    return;
-  }
+EXTERN void omp_target_free(void *Ptr, int DeviceNum) {
+  return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
+}
 
-  if (DeviceNum == omp_get_initial_device()) {
-    free(DevicePtr);
-    DP("omp_target_free deallocated host ptr\n");
-    return;
-  }
+EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) {
+  return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
+}
 
-  if (!deviceIsReady(DeviceNum)) {
-    DP("omp_target_free returns, nothing to do\n");
-    return;
-  }
+EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) {
+  return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__);
+}
 
-  PM->Devices[DeviceNum]->deleteData(DevicePtr);
-  DP("omp_target_free deallocated device ptr\n");
+EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) {
+  return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__);
 }
 
+EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; }
+EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
+
 EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
   TIMESCOPE();
   DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",

diff  --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 43e9d13a6e3ce..1dffc76a0947f 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -530,8 +530,8 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
   return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
 }
 
-int32_t DeviceTy::deleteData(void *TgtPtrBegin) {
-  return RTL->data_delete(RTLDeviceID, TgtPtrBegin);
+int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) {
+  return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind);
 }
 
 // Submit data to device

diff  --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index 35a665a98c27a..24f81b6384649 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -43,6 +43,9 @@ VERS1.0 {
     llvm_omp_target_alloc_host;
     llvm_omp_target_alloc_shared;
     llvm_omp_target_alloc_device;
+    llvm_omp_target_free_host;
+    llvm_omp_target_free_shared;
+    llvm_omp_target_free_device;
     llvm_omp_target_dynamic_shared_alloc;
     __tgt_set_info_flag;
     __tgt_print_device_info;

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 865d3a6412f9f..e8b7a594cf633 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -368,6 +368,32 @@ void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
   return Rc;
 }
 
+void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
+                        const char *Name) {
+  TIMESCOPE();
+  DP("Call to %s for device %d and address " DPxMOD "\n", Name, DeviceNum,
+     DPxPTR(DevicePtr));
+
+  if (!DevicePtr) {
+    DP("Call to %s with NULL ptr\n", Name);
+    return;
+  }
+
+  if (DeviceNum == omp_get_initial_device()) {
+    free(DevicePtr);
+    DP("%s deallocated host ptr\n", Name);
+    return;
+  }
+
+  if (!deviceIsReady(DeviceNum)) {
+    DP("%s returns, nothing to do\n", Name);
+    return;
+  }
+
+  PM->Devices[DeviceNum]->deleteData(DevicePtr, Kind);
+  DP("omp_target_free deallocated device ptr\n");
+}
+
 /// Call the user-defined mapper function followed by the appropriate
 // targetData* function (targetData{Begin,End,Update}).
 int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 5adadf98dc899..df2ce399f6614 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -49,6 +49,8 @@ extern void handleTargetOutcome(bool Success, ident_t *Loc);
 extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);
 extern void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
                                  const char *Name);
+extern void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
+                               const char *Name);
 
 // This structure stores information of a mapped memory region.
 struct MapComponentInfoTy {

diff  --git a/openmp/libomptarget/test/api/omp_device_managed_memory.c b/openmp/libomptarget/test/api/omp_device_managed_memory.c
index e3114d85894f1..80b3559466100 100644
--- a/openmp/libomptarget/test/api/omp_device_managed_memory.c
+++ b/openmp/libomptarget/test/api/omp_device_managed_memory.c
@@ -5,6 +5,7 @@
 #include <stdio.h>
 
 void *llvm_omp_target_alloc_shared(size_t, int);
+void llvm_omp_target_free_shared(void *, int);
 
 int main() {
   const int N = 64;
@@ -12,8 +13,8 @@ int main() {
 
   int *shared_ptr = llvm_omp_target_alloc_shared(N * sizeof(int), device);
 
-#pragma omp target teams distribute parallel for device(device) \
-            is_device_ptr(shared_ptr)
+#pragma omp target teams distribute parallel for device(device)                \
+    is_device_ptr(shared_ptr)
   for (int i = 0; i < N; ++i) {
     shared_ptr[i] = 1;
   }
@@ -22,8 +23,8 @@ int main() {
   for (int i = 0; i < N; ++i)
     sum += shared_ptr[i];
 
-  omp_target_free(shared_ptr, device);
+  llvm_omp_target_free_shared(shared_ptr, device);
   // CHECK: PASS
   if (sum == N)
-    printf ("PASS\n");
+    printf("PASS\n");
 }

diff  --git a/openmp/libomptarget/test/api/omp_host_pinned_memory.c b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
index 1ca4a6a921af9..8531dad190102 100644
--- a/openmp/libomptarget/test/api/omp_host_pinned_memory.c
+++ b/openmp/libomptarget/test/api/omp_host_pinned_memory.c
@@ -5,6 +5,7 @@
 
 // Allocate pinned memory on the host
 void *llvm_omp_target_alloc_host(size_t, int);
+void llvm_omp_target_free_host(void *, int);
 
 int main() {
   const int N = 64;
@@ -25,7 +26,7 @@ int main() {
   for (int i = 0; i < N; ++i)
     sum += hst_ptr[i];
 
-  omp_target_free(hst_ptr, device);
+  llvm_omp_target_free_host(hst_ptr, device);
   // CHECK: PASS
   if (sum == N)
     printf ("PASS\n");

diff  --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 222cd16b8b88e..fc806cdc554be 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -1245,7 +1245,9 @@ static void **mk_dax_kmem_preferred;
 static void *(*kmp_target_alloc_host)(size_t size, int device);
 static void *(*kmp_target_alloc_shared)(size_t size, int device);
 static void *(*kmp_target_alloc_device)(size_t size, int device);
-static void *(*kmp_target_free)(void *ptr, int device);
+static void *(*kmp_target_free_host)(void *ptr, int device);
+static void *(*kmp_target_free_shared)(void *ptr, int device);
+static void *(*kmp_target_free_device)(void *ptr, int device);
 static bool __kmp_target_mem_available;
 #define KMP_IS_TARGET_MEM_SPACE(MS)                                            \
   (MS == llvm_omp_target_host_mem_space ||                                     \
@@ -1358,10 +1360,15 @@ void __kmp_init_target_mem() {
       KMP_DLSYM("llvm_omp_target_alloc_shared");
   *(void **)(&kmp_target_alloc_device) =
       KMP_DLSYM("llvm_omp_target_alloc_device");
-  *(void **)(&kmp_target_free) = KMP_DLSYM("omp_target_free");
-  __kmp_target_mem_available = kmp_target_alloc_host &&
-                               kmp_target_alloc_shared &&
-                               kmp_target_alloc_device && kmp_target_free;
+  *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
+  *(void **)(&kmp_target_free_shared) =
+      KMP_DLSYM("llvm_omp_target_free_shared");
+  *(void **)(&kmp_target_free_device) =
+      KMP_DLSYM("llvm_omp_target_free_device");
+  __kmp_target_mem_available =
+      kmp_target_alloc_host && kmp_target_alloc_shared &&
+      kmp_target_alloc_device && kmp_target_free_host &&
+      kmp_target_free_shared && kmp_target_free_device;
 }
 
 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
@@ -1774,13 +1781,18 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
   kmp_mem_desc_t desc;
   kmp_uintptr_t addr_align; // address to return to caller
   kmp_uintptr_t addr_descr; // address of memory block descriptor
-  if (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
-      (allocator > kmp_max_mem_alloc &&
-       KMP_IS_TARGET_MEM_SPACE(al->memspace))) {
-    KMP_DEBUG_ASSERT(kmp_target_free);
+  if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
+                                     (allocator > kmp_max_mem_alloc &&
+                                      KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
     kmp_int32 device =
         __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
-    kmp_target_free(ptr, device);
+    if (allocator == llvm_omp_target_host_mem_alloc) {
+      kmp_target_free_host(ptr, device);
+    } else if (allocator == llvm_omp_target_shared_mem_alloc) {
+      kmp_target_free_shared(ptr, device);
+    } else if (allocator == llvm_omp_target_device_mem_alloc) {
+      kmp_target_free_device(ptr, device);
+    }
     return;
   }
 


        


More information about the Openmp-commits mailing list