[Openmp-commits] [openmp] 14ff611 - Revert "[OpenMP][AMDGPU] Switch host-device memory copy to asynchronous version"
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Wed Dec 8 00:23:40 PST 2021
Author: Jon Chesterfield
Date: 2021-12-08T08:23:12Z
New Revision: 14ff611fe12f84324febbf94cb1d93de7a5eb95d
URL: https://github.com/llvm/llvm-project/commit/14ff611fe12f84324febbf94cb1d93de7a5eb95d
DIFF: https://github.com/llvm/llvm-project/commit/14ff611fe12f84324febbf94cb1d93de7a5eb95d.diff
LOG: Revert "[OpenMP][AMDGPU] Switch host-device memory copy to asynchronous version"
This reverts commit 6de698bf10996b532632bb9dfa9fd420c5af62af.
It didn't build in the dynamic_hsa configuration
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
index d41ca9713623d..7f841aaa8f0d2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
@@ -5,6 +5,9 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "hsa_api.h"
+#include "impl_runtime.h"
+#include "internal.h"
#include "rt.h"
#include <memory>
@@ -12,27 +15,29 @@
* Data
*/
-// host pointer (either src or dest) must be locked via hsa_amd_memory_lock
-static hsa_status_t invoke_hsa_copy(hsa_signal_t signal, void *dest,
- hsa_agent_t agent, const void *src,
- size_t size) {
+static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
+ const void *src, size_t size,
+ hsa_agent_t agent) {
const hsa_signal_value_t init = 1;
const hsa_signal_value_t success = 0;
- hsa_signal_store_screlease(signal, init);
+ hsa_signal_store_screlease(sig, init);
- hsa_status_t err = hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0,
- nullptr, signal);
- if (err != HSA_STATUS_SUCCESS)
+ hsa_status_t err =
+ hsa_amd_memory_async_copy(dest, agent, src, agent, size, 0, NULL, sig);
+ if (err != HSA_STATUS_SUCCESS) {
return err;
+ }
// async_copy reports success by decrementing and failure by setting to < 0
hsa_signal_value_t got = init;
- while (got == init)
- got = hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_NE, init,
+ while (got == init) {
+ got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init,
UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
+ }
- if (got != success)
+ if (got != success) {
return HSA_STATUS_ERROR;
+ }
return err;
}
@@ -43,58 +48,19 @@ struct implFreePtrDeletor {
}
};
-enum CopyDirection { H2D, D2H };
-
-static hsa_status_t locking_async_memcpy(enum CopyDirection direction,
- hsa_signal_t signal, void *dest,
- hsa_agent_t agent, void *src,
- void *lockingPtr, size_t size) {
- hsa_status_t err;
-
- void *lockedPtr = nullptr;
- err = hsa_amd_memory_lock(lockingPtr, size, nullptr, 0, (void **)&lockedPtr);
- if (err != HSA_STATUS_SUCCESS)
- return err;
-
- switch (direction) {
- case H2D:
- err = invoke_hsa_copy(signal, dest, agent, lockedPtr, size);
- break;
- case D2H:
- err = invoke_hsa_copy(signal, lockedPtr, agent, src, size);
- break;
- default:
- err = HSA_STATUS_ERROR; // fall into unlock before returning
- }
-
- if (err != HSA_STATUS_SUCCESS) {
- // do not leak locked host pointers, but discard potential error message
- hsa_amd_memory_unlock(lockingPtr);
- return err;
- }
-
- err = hsa_amd_memory_unlock(lockingPtr);
- if (err != HSA_STATUS_SUCCESS)
- return err;
-
- return HSA_STATUS_SUCCESS;
-}
-
hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
- void *hostSrc, size_t size,
- hsa_agent_t device_agent,
+ const void *hostSrc, size_t size,
+ hsa_agent_t agent,
hsa_amd_memory_pool_t MemoryPool) {
- hsa_status_t err;
-
- err = locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
- device_agent, hostSrc, hostSrc, size);
-
- if (err == HSA_STATUS_SUCCESS)
- return err;
+ hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size);
- // async memcpy sometimes fails in situations where
+ // hsa_memory_copy sometimes fails in situations where
// allocate + copy succeeds. Looks like it might be related to
// locking part of a read only segment. Fall back for now.
+ if (rc == HSA_STATUS_SUCCESS) {
+ return HSA_STATUS_SUCCESS;
+ }
+
void *tempHostPtr;
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
if (ret != HSA_STATUS_SUCCESS) {
@@ -104,26 +70,26 @@ hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
memcpy(tempHostPtr, hostSrc, size);
- return locking_async_memcpy(CopyDirection::H2D, signal, deviceDest,
- device_agent, tempHostPtr, tempHostPtr, size);
+ if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) !=
+ HSA_STATUS_SUCCESS) {
+ return HSA_STATUS_ERROR;
+ }
+ return HSA_STATUS_SUCCESS;
}
-hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *hostDest,
- void *deviceSrc, size_t size,
- hsa_agent_t deviceAgent,
+hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *dest,
+ const void *deviceSrc, size_t size,
+ hsa_agent_t agent,
hsa_amd_memory_pool_t MemoryPool) {
- hsa_status_t err;
-
- // device has always visibility over both pointers, so use that
- err = locking_async_memcpy(CopyDirection::D2H, signal, hostDest, deviceAgent,
- deviceSrc, hostDest, size);
-
- if (err == HSA_STATUS_SUCCESS)
- return err;
+ hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size);
// hsa_memory_copy sometimes fails in situations where
// allocate + copy succeeds. Looks like it might be related to
// locking part of a read only segment. Fall back for now.
+ if (rc == HSA_STATUS_SUCCESS) {
+ return HSA_STATUS_SUCCESS;
+ }
+
void *tempHostPtr;
hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
if (ret != HSA_STATUS_SUCCESS) {
@@ -132,11 +98,11 @@ hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *hostDest,
}
std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
- err = locking_async_memcpy(CopyDirection::D2H, signal, tempHostPtr,
- deviceAgent, deviceSrc, tempHostPtr, size);
- if (err != HSA_STATUS_SUCCESS)
+ if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) !=
+ HSA_STATUS_SUCCESS) {
return HSA_STATUS_ERROR;
+ }
- memcpy(hostDest, tempHostPtr, size);
+ memcpy(dest, tempHostPtr, size);
return HSA_STATUS_SUCCESS;
}
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h
index c99b3e752ccec..52efaffb515f0 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h
@@ -19,12 +19,13 @@ hsa_status_t impl_module_register_from_memory_to_place(
void *cb_state);
hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
- void *hostSrc, size_t size,
- hsa_agent_t device_agent,
+ const void *hostSrc, size_t size,
+ hsa_agent_t agent,
hsa_amd_memory_pool_t MemoryPool);
-hsa_status_t impl_memcpy_d2h(hsa_signal_t sig, void *hostDest, void *deviceSrc,
- size_t size, hsa_agent_t device_agent,
+hsa_status_t impl_memcpy_d2h(hsa_signal_t sig, void *hostDest,
+ const void *deviceSrc, size_t size,
+ hsa_agent_t agent,
hsa_amd_memory_pool_t MemoryPool);
}
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 2da012daadc43..5434692d0119d 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -464,9 +464,10 @@ class RTLDeviceInfoTy {
"");
static const int Default_WG_Size = getGridValue<64>().GV_Default_WG_Size;
- using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, void *, size_t size,
- hsa_agent_t, hsa_amd_memory_pool_t);
- hsa_status_t freesignalpool_memcpy(void *dest, void *src, size_t size,
+ using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
+ size_t size, hsa_agent_t,
+ hsa_amd_memory_pool_t);
+ hsa_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size,
MemcpyFunc Func, int32_t deviceId) {
hsa_agent_t agent = HSAAgents[deviceId];
hsa_signal_t s = FreeSignalPool.pop();
@@ -478,13 +479,13 @@ class RTLDeviceInfoTy {
return r;
}
- hsa_status_t freesignalpool_memcpy_d2h(void *dest, void *src, size_t size,
- int32_t deviceId) {
+ hsa_status_t freesignalpool_memcpy_d2h(void *dest, const void *src,
+ size_t size, int32_t deviceId) {
return freesignalpool_memcpy(dest, src, size, impl_memcpy_d2h, deviceId);
}
- hsa_status_t freesignalpool_memcpy_h2d(void *dest, void *src, size_t size,
- int32_t deviceId) {
+ hsa_status_t freesignalpool_memcpy_h2d(void *dest, const void *src,
+ size_t size, int32_t deviceId) {
return freesignalpool_memcpy(dest, src, size, impl_memcpy_h2d, deviceId);
}
More information about the Openmp-commits
mailing list