[Openmp-commits] [openmp] 8e4836b - [OpenMP] Use IsHostPtr where needed for targetDataEnd
Joel E. Denny via Openmp-commits
openmp-commits at lists.llvm.org
Wed Sep 1 14:38:08 PDT 2021
Author: Joel E. Denny
Date: 2021-09-01T17:31:42-04:00
New Revision: 8e4836b2a296e4e78cc86f52014c48d9ad5aaf1a
URL: https://github.com/llvm/llvm-project/commit/8e4836b2a296e4e78cc86f52014c48d9ad5aaf1a
DIFF: https://github.com/llvm/llvm-project/commit/8e4836b2a296e4e78cc86f52014c48d9ad5aaf1a.diff
LOG: [OpenMP] Use IsHostPtr where needed for targetDataEnd
As discussed in D105990, without this patch, `targetDataEnd`
determines whether to transfer data or delete a device mapping (as
opposed to assuming it's in shared memory) using two different
conditions, each of which is broken for some cases:
1. `!(UNIFIED_SHARED_MEMORY && TgtPtrBegin == HstPtrBegin)`: The
broken case is rare: the device and host might happen to use the
same address for their mapped allocations. I don't know how to
write a test that's likely to reveal this case, but this patch does
fix it, as discussed below.
2. `!UNIFIED_SHARED_MEMORY || HasCloseModifier`: There are at least
two broken cases:
1. The `close` modifier might have been specified on an `omp
target enter data` but not the corresponding `omp target exit
data`, which thus might falsely assume a mapping is in shared
memory. The test `unified_shared_memory/close_enter_exit.c`
already has a missing deletion as a result, and this patch adds
a check for that. This patch also adds the new test
`close_member.c` to reveal a missing transfer and deletion.
2. Use of discrete memory might have been forced by
`omp_target_associate_ptr`, as in the test
`unified_shared_memory/api.c`. In the current `targetDataEnd`
implementation, this condition turns out not be used for this
case: because the reference count is infinite, a transfer is
possible only with an `always` modifier, and this condition is
never used in that case. To ensure it's never used for that
case in the future, this patch adds the test
`unified_shared_memory/associate_ptr.c`.
Fortunately, `DeviceTy::getTgtPtrBegin` already has a solution: it
reports whether the allocation was found in shared memory via the
variable `IsHostPtr`.
After this patch, `HasCloseModifier` is no longer used in
`targetDataEnd`, and I wonder if the `close` modifier is ever useful
on an `omp target data end`.
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D107925
Added:
openmp/libomptarget/test/unified_shared_memory/associate_ptr.c
openmp/libomptarget/test/unified_shared_memory/close_member.c
Modified:
openmp/libomptarget/src/device.cpp
openmp/libomptarget/src/device.h
openmp/libomptarget/src/omptarget.cpp
openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
Removed:
################################################################################
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index ff5b2882b46f6..c70d9e8975726 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -387,10 +387,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
}
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size,
- bool HasCloseModifier, bool HasHoldModifier) {
- if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
- !HasCloseModifier)
- return OFFLOAD_SUCCESS;
+ bool HasHoldModifier) {
// Check if the pointer is contained in any sub-nodes.
int rc;
DataMapMtx.lock();
diff --git a/openmp/libomptarget/src/device.h b/openmp/libomptarget/src/device.h
index 78a8e274857a8..ea87a4b270f0d 100644
--- a/openmp/libomptarget/src/device.h
+++ b/openmp/libomptarget/src/device.h
@@ -283,8 +283,14 @@ struct DeviceTy {
bool UpdateRefCount, bool UseHoldRefCount,
bool &IsHostPtr, bool MustContain = false,
bool ForceDelete = false);
- int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool HasCloseModifier,
- bool HasHoldModifier);
+ /// For the map entry for \p HstPtrBegin, decrement the reference count
+ /// specified by \p HasHoldModifier and, if the the total reference count is
+ /// then zero, deallocate the corresponding device storage and remove the map
+ /// entry. Return \c OFFLOAD_SUCCESS if the map entry existed, and return
+ /// \c OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
+ /// this function if the map entry is not expected to exist because
+ /// \p HstPtrBegin uses shared memory.
+ int deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool HasHoldModifier);
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
int disassociatePtr(void *HstPtrBegin);
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 086503a6be6d1..380e37e3cf15c 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -609,15 +609,11 @@ struct DeallocTgtPtrInfo {
void *HstPtrBegin;
/// Size of the data
int64_t DataSize;
- /// Whether it has \p close modifier
- bool HasCloseModifier;
/// Whether it has \p ompx_hold modifier
bool HasHoldModifier;
- DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasCloseModifier,
- bool HasHoldModifier)
- : HstPtrBegin(HstPtr), DataSize(Size), HasCloseModifier(HasCloseModifier),
- HasHoldModifier(HasHoldModifier) {}
+ DeallocTgtPtrInfo(void *HstPtr, int64_t Size, bool HasHoldModifier)
+ : HstPtrBegin(HstPtr), DataSize(Size), HasHoldModifier(HasHoldModifier) {}
};
} // namespace
@@ -682,7 +678,6 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
(ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&
!(FromMapper && I == 0);
bool ForceDelete = ArgTypes[I] & OMP_TGT_MAPTYPE_DELETE;
- bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE;
bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT;
bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD;
@@ -743,15 +738,12 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) {
bool Always = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
bool CopyMember = false;
- if (!(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
- HasCloseModifier) {
+ if (!IsHostPtr) {
if (IsLast)
CopyMember = true;
}
- if ((DelEntry || Always || CopyMember) &&
- !(PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
- TgtPtrBegin == HstPtrBegin)) {
+ if ((DelEntry || Always || CopyMember) && !IsHostPtr) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize,
@@ -805,9 +797,8 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
Device.ShadowMtx.unlock();
// Add pointer to the buffer for later deallocation
- if (DelEntry)
- DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasCloseModifier,
- HasHoldModifier);
+ if (DelEntry && !IsHostPtr)
+ DeallocTgtPtrs.emplace_back(HstPtrBegin, DataSize, HasHoldModifier);
}
}
@@ -824,7 +815,7 @@ int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum,
if (FromMapperBase && FromMapperBase == Info.HstPtrBegin)
continue;
Ret = Device.deallocTgtPtr(Info.HstPtrBegin, Info.DataSize,
- Info.HasCloseModifier, Info.HasHoldModifier);
+ Info.HasHoldModifier);
if (Ret != OFFLOAD_SUCCESS) {
REPORT("Deallocating data from device failed.\n");
return OFFLOAD_FAIL;
diff --git a/openmp/libomptarget/test/unified_shared_memory/associate_ptr.c b/openmp/libomptarget/test/unified_shared_memory/associate_ptr.c
new file mode 100644
index 0000000000000..7911046f5f3b1
--- /dev/null
+++ b/openmp/libomptarget/test/unified_shared_memory/associate_ptr.c
@@ -0,0 +1,36 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+// REQUIRES: unified_shared_memory
+// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
+
+// Fails on amdgcn with error: GPU Memory Error
+// XFAIL: amdgcn-amd-amdhsa
+
+#include <assert.h>
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp requires unified_shared_memory
+
+int main(int argc, char *argv[]) {
+ int dev = omp_get_default_device();
+ int x = 10;
+ int *x_dev = (int *)omp_target_alloc(sizeof x, dev);
+ assert(x_dev && "expected omp_target_alloc to succeed");
+ int rc = omp_target_associate_ptr(&x, x_dev, sizeof x, 0, dev);
+ assert(!rc && "expected omp_target_associate_ptr to succeed");
+
+ // To determine whether x needs to be transfered, the runtime cannot simply
+ // check whether unified shared memory is enabled and the 'close' modifier is
+ // specified. It must check whether x was previously placed in device memory
+ // by, for example, omp_target_associate_ptr.
+ #pragma omp target map(always, tofrom: x)
+ x = 20;
+
+ // CHECK: x=20
+ printf("x=%d\n", x);
+ // CHECK: present: 1
+ printf("present: %d\n", omp_target_is_present(&x, dev));
+
+ return 0;
+}
diff --git a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
index 006fd39e6bd32..e159ed82c25cc 100644
--- a/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
+++ b/openmp/libomptarget/test/unified_shared_memory/close_enter_exit.c
@@ -17,6 +17,7 @@ int main(int argc, char *argv[]) {
int fails;
void *host_alloc = 0, *device_alloc = 0;
int *a = (int *)malloc(N * sizeof(int));
+ int dev = omp_get_default_device();
// Init
for (int i = 0; i < N; ++i) {
@@ -79,14 +80,25 @@ int main(int argc, char *argv[]) {
#pragma omp target enter data map(close, to : a[ : N])
#pragma omp target map(from : device_alloc)
- { device_alloc = &a[0]; }
+ {
+ device_alloc = &a[0];
+ a[0] = 99;
+ }
+ // 'close' is missing, so the runtime must check whether s is actually in
+ // shared memory in order to determine whether to transfer data and delete the
+ // allocation.
#pragma omp target exit data map(from : a[ : N])
// CHECK: a has been mapped to the device.
if (device_alloc != host_alloc)
printf("a has been mapped to the device.\n");
+ // CHECK: a[0]=99
+ // CHECK: a is present: 0
+ printf("a[0]=%d\n", a[0]);
+ printf("a is present: %d\n", omp_target_is_present(a, dev));
+
free(a);
// CHECK: Done!
diff --git a/openmp/libomptarget/test/unified_shared_memory/close_member.c b/openmp/libomptarget/test/unified_shared_memory/close_member.c
new file mode 100644
index 0000000000000..1b23c4a630dd8
--- /dev/null
+++ b/openmp/libomptarget/test/unified_shared_memory/close_member.c
@@ -0,0 +1,44 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+
+// REQUIRES: unified_shared_memory
+// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
+
+// Fails on amdgcn with error: GPU Memory Error
+// XFAIL: amdgcn-amd-amdhsa
+
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp requires unified_shared_memory
+
+struct S {
+ int x;
+ int y;
+};
+
+int main(int argc, char *argv[]) {
+ int dev = omp_get_default_device();
+ struct S s = {10, 20};
+
+ #pragma omp target enter data map(close, to: s)
+ #pragma omp target map(alloc: s)
+ {
+ s.x = 11;
+ s.y = 21;
+ }
+ // To determine whether x needs to be transfered or deleted, the runtime
+ // cannot simply check whether unified shared memory is enabled and the
+ // 'close' modifier is specified. It must check whether x was previously
+ // placed in device memory by, for example, a 'close' modifier that isn't
+ // specified here. The following struct member case checks a special code
+ // path in the runtime implementation where members are transferred before
+ // deletion of the struct.
+ #pragma omp target exit data map(from: s.x, s.y)
+
+ // CHECK: s.x=11, s.y=21
+ printf("s.x=%d, s.y=%d\n", s.x, s.y);
+ // CHECK: present: 0
+ printf("present: %d\n", omp_target_is_present(&s, dev));
+
+ return 0;
+}
More information about the Openmp-commits
mailing list