[Openmp-commits] [openmp] 4fc3286 - [AMDGPU][Libomptarget][NFC] Split host and device malloc

Pushpinder Singh via Openmp-commits openmp-commits at lists.llvm.org
Mon May 31 05:09:29 PDT 2021


Author: Pushpinder Singh
Date: 2021-05-31T12:09:18Z
New Revision: 4fc328695143d29a87d45ab5095da7372a366c42

URL: https://github.com/llvm/llvm-project/commit/4fc328695143d29a87d45ab5095da7372a366c42
DIFF: https://github.com/llvm/llvm-project/commit/4fc328695143d29a87d45ab5095da7372a366c42.diff

LOG: [AMDGPU][Libomptarget][NFC] Split host and device malloc

This patch splits the code path for host and device malloc.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D103389

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
    openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
    openmp/libomptarget/plugins/amdgpu/impl/data.cpp
    openmp/libomptarget/plugins/amdgpu/impl/rt.h
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
index db044159663cd..a7066a0e1d8dc 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
@@ -50,7 +50,7 @@ static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
 
 struct atmiFreePtrDeletor {
   void operator()(void *p) {
-    atmi_free(p); // ignore failure to free
+    core::Runtime::Memfree(p); // ignore failure to free
   }
 };
 
@@ -67,10 +67,9 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
   }
 
   void *tempHostPtr;
-  hsa_status_t ret =
-      atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
+  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
   if (ret != HSA_STATUS_SUCCESS) {
-    DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
+    DEBUG_PRINT("HostMalloc: Unable to alloc %d bytes for temp scratch\n",
                 size);
     return ret;
   }
@@ -98,10 +97,9 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
 
   void *tempHostPtr;
 
-  hsa_status_t ret =
-      atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
+  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
   if (ret != HSA_STATUS_SUCCESS) {
-    DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
+    DEBUG_PRINT("HostMalloc: Unable to alloc %d bytes for temp scratch\n",
                 size);
     return ret;
   }
@@ -115,10 +113,3 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
   memcpy(dest, tempHostPtr, size);
   return HSA_STATUS_SUCCESS;
 }
-
-hsa_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }
-
-hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
-                         atmi_devtype_t DeviceType) {
-  return core::Runtime::Malloc(ptr, size, DeviceId, DeviceType);
-}

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
index b0de144240351..92e2cc382bac2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
@@ -75,48 +75,6 @@ hsa_status_t atmi_module_register_from_memory_to_place(
 atmi_machine_t *atmi_machine_get_info();
 /** @} */
 
-/** \defgroup memory_functions ATMI Data Management
- * @{
- */
-/**
- * @brief Allocate memory from the specified memory place.
- *
- * @detail This function allocates memory from the specified memory place. If
- * the memory
- * place belongs primarily to the CPU, then the memory will be accessible by
- * other GPUs and CPUs in the system. If the memory place belongs primarily to a
- * GPU,
- * then it cannot be accessed by other devices in the system.
- *
- * @param[in] ptr The pointer to the memory that will be allocated.
- *
- * @param[in] size The size of the allocation in bytes.
- *
- * @param[in] place The memory place in the system to perform the allocation.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR The function encountered errors.
- *
- */
-hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
-                         atmi_devtype_t DeviceType);
-/**
- * @brief Frees memory that was previously allocated.
- *
- * @detail This function frees memory that was previously allocated by calling
- * @p atmi_malloc. It throws an error otherwise. It is illegal to access a
- * pointer after a call to this function.
- *
- * @param[in] ptr The pointer to the memory that has to be freed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR The function encountered errors.
- *
- */
-hsa_status_t atmi_free(void *ptr);
-
 hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
                              const void *hostSrc, size_t size,
                              hsa_agent_t agent);

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
index b96db1ef9d987..070145a56d3a7 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -47,6 +47,14 @@ hsa_status_t register_allocation(void *ptr, size_t size,
     return HSA_STATUS_SUCCESS;
 }
 
+hsa_status_t Runtime::DeviceMalloc(void **ptr, size_t size, int DeviceId) {
+  return Runtime::Malloc(ptr, size, DeviceId, ATMI_DEVTYPE_GPU);
+}
+
+hsa_status_t Runtime::HostMalloc(void **ptr, size_t size) {
+  return Runtime::Malloc(ptr, size, 0, ATMI_DEVTYPE_CPU);
+}
+
 hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId,
                              atmi_devtype_t DeviceType) {
   hsa_amd_memory_pool_t pool =

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
index 4e6f02800f4d2..fbde60f948292 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -61,12 +61,16 @@ class Runtime final {
   // data
   static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
   static hsa_status_t Memfree(void *);
-  static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
-                             atmi_devtype_t DeviceType);
+  static hsa_status_t HostMalloc(void **ptr, size_t size);
+  static hsa_status_t DeviceMalloc(void **ptr, size_t size, int DeviceId);
 
   int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
   int getDebugMode() const { return env_.getDebugMode(); }
 
+private:
+  static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
+                             atmi_devtype_t DeviceType);
+
 protected:
   Runtime() = default;
   ~Runtime() = default;

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 84830a301671a..c464f0449921e 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -351,7 +351,7 @@ class RTLDeviceInfoTy {
 
   struct atmiFreePtrDeletor {
     void operator()(void *p) {
-      atmi_free(p); // ignore failure to free
+      core::Runtime::Memfree(p); // ignore failure to free
     }
   };
 
@@ -1175,7 +1175,7 @@ struct device_environment {
 static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
   uint64_t rounded = 4 * ((size + 3) / 4);
   void *ptr;
-  hsa_status_t err = atmi_malloc(&ptr, rounded, DeviceId, ATMI_DEVTYPE_GPU);
+  hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, rounded, DeviceId);
   if (err != HSA_STATUS_SUCCESS) {
     return err;
   }
@@ -1183,7 +1183,7 @@ static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
   hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, rounded / 4);
   if (rc != HSA_STATUS_SUCCESS) {
     fprintf(stderr, "zero fill device_state failed with %u\n", rc);
-    atmi_free(ptr);
+    core::Runtime::Memfree(ptr);
     return HSA_STATUS_ERROR;
   }
 
@@ -1570,7 +1570,7 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
     return NULL;
   }
 
-  hsa_status_t err = atmi_malloc(&ptr, size, device_id, ATMI_DEVTYPE_GPU);
+  hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, size, device_id);
   DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
      (long long unsigned)(Elf64_Addr)ptr);
   ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;
@@ -1623,7 +1623,7 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
   assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
   hsa_status_t err;
   DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)tgt_ptr);
-  err = atmi_free(tgt_ptr);
+  err = core::Runtime::Memfree(tgt_ptr);
   if (err != HSA_STATUS_SUCCESS) {
     DP("Error when freeing CUDA memory\n");
     return OFFLOAD_FAIL;


        


More information about the Openmp-commits mailing list