[Openmp-commits] [openmp] 4fc3286 - [AMDGPU][Libomptarget][NFC] Split host and device malloc
Pushpinder Singh via Openmp-commits
openmp-commits at lists.llvm.org
Mon May 31 05:09:29 PDT 2021
Author: Pushpinder Singh
Date: 2021-05-31T12:09:18Z
New Revision: 4fc328695143d29a87d45ab5095da7372a366c42
URL: https://github.com/llvm/llvm-project/commit/4fc328695143d29a87d45ab5095da7372a366c42
DIFF: https://github.com/llvm/llvm-project/commit/4fc328695143d29a87d45ab5095da7372a366c42.diff
LOG: [AMDGPU][Libomptarget][NFC] Split host and device malloc
This patch splits the code path for host and device malloc.
Reviewed By: JonChesterfield
Differential Revision: https://reviews.llvm.org/D103389
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
openmp/libomptarget/plugins/amdgpu/impl/data.cpp
openmp/libomptarget/plugins/amdgpu/impl/rt.h
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
index db044159663cd..a7066a0e1d8dc 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
@@ -50,7 +50,7 @@ static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
struct atmiFreePtrDeletor {
void operator()(void *p) {
- atmi_free(p); // ignore failure to free
+ core::Runtime::Memfree(p); // ignore failure to free
}
};
@@ -67,10 +67,9 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
}
void *tempHostPtr;
- hsa_status_t ret =
- atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
+ hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
if (ret != HSA_STATUS_SUCCESS) {
- DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
+ DEBUG_PRINT("HostMalloc: Unable to alloc %d bytes for temp scratch\n",
size);
return ret;
}
@@ -98,10 +97,9 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
void *tempHostPtr;
- hsa_status_t ret =
- atmi_malloc(&tempHostPtr, size, 0 /* DeviceId */, ATMI_DEVTYPE_CPU);
+ hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
if (ret != HSA_STATUS_SUCCESS) {
- DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n",
+ DEBUG_PRINT("HostMalloc: Unable to alloc %d bytes for temp scratch\n",
size);
return ret;
}
@@ -115,10 +113,3 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
memcpy(dest, tempHostPtr, size);
return HSA_STATUS_SUCCESS;
}
-
-hsa_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); }
-
-hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
- atmi_devtype_t DeviceType) {
- return core::Runtime::Malloc(ptr, size, DeviceId, DeviceType);
-}
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
index b0de144240351..92e2cc382bac2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
@@ -75,48 +75,6 @@ hsa_status_t atmi_module_register_from_memory_to_place(
atmi_machine_t *atmi_machine_get_info();
/** @} */
-/** \defgroup memory_functions ATMI Data Management
- * @{
- */
-/**
- * @brief Allocate memory from the specified memory place.
- *
- * @detail This function allocates memory from the specified memory place. If
- * the memory
- * place belongs primarily to the CPU, then the memory will be accessible by
- * other GPUs and CPUs in the system. If the memory place belongs primarily to a
- * GPU,
- * then it cannot be accessed by other devices in the system.
- *
- * @param[in] ptr The pointer to the memory that will be allocated.
- *
- * @param[in] size The size of the allocation in bytes.
- *
- * @param[in] place The memory place in the system to perform the allocation.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR The function encountered errors.
- *
- */
-hsa_status_t atmi_malloc(void **ptr, size_t size, int DeviceId,
- atmi_devtype_t DeviceType);
-/**
- * @brief Frees memory that was previously allocated.
- *
- * @detail This function frees memory that was previously allocated by calling
- * @p atmi_malloc. It throws an error otherwise. It is illegal to access a
- * pointer after a call to this function.
- *
- * @param[in] ptr The pointer to the memory that has to be freed.
- *
- * @retval ::HSA_STATUS_SUCCESS The function has executed successfully.
- *
- * @retval ::HSA_STATUS_ERROR The function encountered errors.
- *
- */
-hsa_status_t atmi_free(void *ptr);
-
hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
const void *hostSrc, size_t size,
hsa_agent_t agent);
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
index b96db1ef9d987..070145a56d3a7 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -47,6 +47,14 @@ hsa_status_t register_allocation(void *ptr, size_t size,
return HSA_STATUS_SUCCESS;
}
+hsa_status_t Runtime::DeviceMalloc(void **ptr, size_t size, int DeviceId) {
+ return Runtime::Malloc(ptr, size, DeviceId, ATMI_DEVTYPE_GPU);
+}
+
+hsa_status_t Runtime::HostMalloc(void **ptr, size_t size) {
+ return Runtime::Malloc(ptr, size, 0, ATMI_DEVTYPE_CPU);
+}
+
hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId,
atmi_devtype_t DeviceType) {
hsa_amd_memory_pool_t pool =
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
index 4e6f02800f4d2..fbde60f948292 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -61,12 +61,16 @@ class Runtime final {
// data
static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
static hsa_status_t Memfree(void *);
- static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
- atmi_devtype_t DeviceType);
+ static hsa_status_t HostMalloc(void **ptr, size_t size);
+ static hsa_status_t DeviceMalloc(void **ptr, size_t size, int DeviceId);
int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
int getDebugMode() const { return env_.getDebugMode(); }
+private:
+ static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
+ atmi_devtype_t DeviceType);
+
protected:
Runtime() = default;
~Runtime() = default;
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 84830a301671a..c464f0449921e 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -351,7 +351,7 @@ class RTLDeviceInfoTy {
struct atmiFreePtrDeletor {
void operator()(void *p) {
- atmi_free(p); // ignore failure to free
+ core::Runtime::Memfree(p); // ignore failure to free
}
};
@@ -1175,7 +1175,7 @@ struct device_environment {
static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
uint64_t rounded = 4 * ((size + 3) / 4);
void *ptr;
- hsa_status_t err = atmi_malloc(&ptr, rounded, DeviceId, ATMI_DEVTYPE_GPU);
+ hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, rounded, DeviceId);
if (err != HSA_STATUS_SUCCESS) {
return err;
}
@@ -1183,7 +1183,7 @@ static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, rounded / 4);
if (rc != HSA_STATUS_SUCCESS) {
fprintf(stderr, "zero fill device_state failed with %u\n", rc);
- atmi_free(ptr);
+ core::Runtime::Memfree(ptr);
return HSA_STATUS_ERROR;
}
@@ -1570,7 +1570,7 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
return NULL;
}
- hsa_status_t err = atmi_malloc(&ptr, size, device_id, ATMI_DEVTYPE_GPU);
+ hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, size, device_id);
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
(long long unsigned)(Elf64_Addr)ptr);
ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;
@@ -1623,7 +1623,7 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) {
assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large");
hsa_status_t err;
DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)tgt_ptr);
- err = atmi_free(tgt_ptr);
+ err = core::Runtime::Memfree(tgt_ptr);
if (err != HSA_STATUS_SUCCESS) {
DP("Error when freeing CUDA memory\n");
return OFFLOAD_FAIL;
More information about the Openmp-commits
mailing list