[Openmp-commits] [openmp] r368488 - [OpenMP][libomptarget] Add support for close map modifier
Gheorghe-Teodor Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Fri Aug 9 14:32:58 PDT 2019
Author: gbercea
Date: Fri Aug 9 14:32:57 2019
New Revision: 368488
URL: http://llvm.org/viewvc/llvm-project?rev=368488&view=rev
Log:
[OpenMP][libomptarget] Add support for close map modifier
Summary:
This patch adds support for the close map modifier.
The close map modifier will overwrite the unified shared memory requirement and create a device copy of the data.
Reviewers: ABataev, Hahnfeld, caomhin, grokos, jdoerfert, AlexEichenberger
Reviewed By: Hahnfeld, AlexEichenberger
Subscribers: guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D65340
Added:
openmp/trunk/libomptarget/test/unified_shared_memory/close_enter_exit.c
openmp/trunk/libomptarget/test/unified_shared_memory/close_manual.c
openmp/trunk/libomptarget/test/unified_shared_memory/close_modifier.c
Modified:
openmp/trunk/libomptarget/include/omptarget.h
openmp/trunk/libomptarget/src/device.cpp
openmp/trunk/libomptarget/src/device.h
openmp/trunk/libomptarget/src/omptarget.cpp
Modified: openmp/trunk/libomptarget/include/omptarget.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/include/omptarget.h?rev=368488&r1=368487&r2=368488&view=diff
==============================================================================
--- openmp/trunk/libomptarget/include/omptarget.h (original)
+++ openmp/trunk/libomptarget/include/omptarget.h Fri Aug 9 14:32:57 2019
@@ -47,6 +47,8 @@ enum tgt_map_type {
OMP_TGT_MAPTYPE_LITERAL = 0x100,
// mapping is implicit
OMP_TGT_MAPTYPE_IMPLICIT = 0x200,
+ // copy data to device
+ OMP_TGT_MAPTYPE_CLOSE = 0x400,
// member of struct, member given by [16 MSBs] - 1
OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000
};
Modified: openmp/trunk/libomptarget/src/device.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/device.cpp?rev=368488&r1=368487&r2=368488&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/device.cpp (original)
+++ openmp/trunk/libomptarget/src/device.cpp Fri Aug 9 14:32:57 2019
@@ -158,7 +158,7 @@ LookupResult DeviceTy::lookupMapping(voi
// to do an illegal mapping.
void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
- bool UpdateRefCount) {
+ bool UpdateRefCount, bool HasCloseModifier) {
void *rc = NULL;
IsHostPtr = false;
DataMapMtx.lock();
@@ -192,9 +192,9 @@ void *DeviceTy::getOrAllocTgtPtr(void *H
// privatized use host address. Any explicitly mapped variables also use
// host address where correctness is not impeded. In all other cases
// maps are respected.
- // TODO: In addition to the mapping rules above, when the close map
- // modifier is implemented, foce the mapping of the variable to the device.
- if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
+ // In addition to the mapping rules above, the close map
+ // modifier forces the mapping of the variable to the device.
+ if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier) {
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
IsHostPtr = true;
@@ -204,8 +204,8 @@ void *DeviceTy::getOrAllocTgtPtr(void *H
IsNew = true;
uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
- "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
- DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
+ "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
+ DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
(uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
rc = (void *)tp;
@@ -269,8 +269,9 @@ void *DeviceTy::getTgtPtrBegin(void *Hst
return NULL;
}
-int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
- if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
+int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
+ bool HasCloseModifier) {
+ if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && !HasCloseModifier)
return OFFLOAD_SUCCESS;
// Check if the pointer is contained in any sub-nodes.
int rc;
Modified: openmp/trunk/libomptarget/src/device.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/device.h?rev=368488&r1=368487&r2=368488&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/device.h (original)
+++ openmp/trunk/libomptarget/src/device.h Fri Aug 9 14:32:57 2019
@@ -132,11 +132,13 @@ struct DeviceTy {
long getMapEntryRefCnt(void *HstPtrBegin);
LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
- bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true);
+ bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true,
+ bool HasCloseModifier = false);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
bool UpdateRefCount, bool &IsHostPtr);
- int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete);
+ int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete,
+ bool HasCloseModifier = false);
int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
int disassociatePtr(void *HstPtrBegin);
Modified: openmp/trunk/libomptarget/src/omptarget.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptarget.cpp?rev=368488&r1=368487&r2=368488&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/omptarget.cpp (original)
+++ openmp/trunk/libomptarget/src/omptarget.cpp Fri Aug 9 14:32:57 2019
@@ -244,6 +244,9 @@ int target_data_begin(DeviceTy &Device,
bool IsNew, Pointer_IsNew;
bool IsHostPtr = false;
bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
+ // Force the creation of a device side copy of the data when:
+ // a close map modifier was associated with a map that contained a to.
+ bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
// UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
// have reached this point via __tgt_target_data_begin and not __tgt_target
// then no argument is marked as TARGET_PARAM ("omp target data map" is not
@@ -254,7 +257,8 @@ int target_data_begin(DeviceTy &Device,
DP("Has a pointer entry: \n");
// base is address of pointer.
Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
- sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef);
+ sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef,
+ HasCloseModifier);
if (!Pointer_TgtPtrBegin) {
DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
"illegal mapping).\n");
@@ -270,7 +274,7 @@ int target_data_begin(DeviceTy &Device,
}
void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
- data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef);
+ data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef, HasCloseModifier);
if (!TgtPtrBegin && data_size) {
// If data_size==0, then the argument could be a zero-length pointer to
// NULL, so getOrAlloc() returning NULL is not an error.
@@ -290,7 +294,8 @@ int target_data_begin(DeviceTy &Device,
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
bool copy = false;
- if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
+ if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
+ HasCloseModifier) {
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
copy = true;
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
@@ -370,6 +375,7 @@ int target_data_end(DeviceTy &Device, in
bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
+ bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
// If PTR_AND_OBJ, HstPtrBegin is address of pointee
void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
@@ -390,7 +396,8 @@ int target_data_end(DeviceTy &Device, in
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
bool CopyMember = false;
- if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
+ if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
+ HasCloseModifier) {
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
// Copy data only if the "parent" struct has RefCount==1.
@@ -455,7 +462,8 @@ int target_data_end(DeviceTy &Device, in
// Deallocate map
if (DelEntry) {
- int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete);
+ int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete,
+ HasCloseModifier);
if (rt != OFFLOAD_SUCCESS) {
DP("Deallocating data from device failed.\n");
return OFFLOAD_FAIL;
Added: openmp/trunk/libomptarget/test/unified_shared_memory/close_enter_exit.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/test/unified_shared_memory/close_enter_exit.c?rev=368488&view=auto
==============================================================================
--- openmp/trunk/libomptarget/test/unified_shared_memory/close_enter_exit.c (added)
+++ openmp/trunk/libomptarget/test/unified_shared_memory/close_enter_exit.c Fri Aug 9 14:32:57 2019
@@ -0,0 +1,95 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
+
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp requires unified_shared_memory
+
+#define N 1024
+
+int main(int argc, char *argv[]) {
+ int fails;
+ void *host_alloc = 0, *device_alloc = 0;
+ int *a = (int *)malloc(N * sizeof(int));
+
+ // Init
+ for (int i = 0; i < N; ++i) {
+ a[i] = 10;
+ }
+ host_alloc = &a[0];
+
+ //
+ // map + target no close
+ //
+#pragma omp target data map(tofrom : a[ : N]) map(tofrom : device_alloc)
+ {
+#pragma omp target map(tofrom : device_alloc)
+ { device_alloc = &a[0]; }
+ }
+
+ // CHECK: a used from unified memory.
+ if (device_alloc == host_alloc)
+ printf("a used from unified memory.\n");
+
+ //
+ // map + target with close
+ //
+ device_alloc = 0;
+#pragma omp target data map(close, tofrom : a[ : N]) map(tofrom : device_alloc)
+ {
+#pragma omp target map(tofrom : device_alloc)
+ { device_alloc = &a[0]; }
+ }
+ // CHECK: a copied to device.
+ if (device_alloc != host_alloc)
+ printf("a copied to device.\n");
+
+ //
+ // map + use_device_ptr no close
+ //
+ device_alloc = 0;
+#pragma omp target data map(tofrom : a[ : N]) use_device_ptr(a)
+ { device_alloc = &a[0]; }
+
+ // CHECK: a used from unified memory with use_device_ptr.
+ if (device_alloc == host_alloc)
+ printf("a used from unified memory with use_device_ptr.\n");
+
+ //
+ // map + use_device_ptr close
+ //
+ device_alloc = 0;
+#pragma omp target data map(close, tofrom : a[ : N]) use_device_ptr(a)
+ { device_alloc = &a[0]; }
+
+ // CHECK: a used from device memory with use_device_ptr.
+ if (device_alloc != host_alloc)
+ printf("a used from device memory with use_device_ptr.\n");
+
+ //
+ // map enter/exit + close
+ //
+ device_alloc = 0;
+#pragma omp target enter data map(close, to : a[ : N])
+
+#pragma omp target map(from : device_alloc)
+ { device_alloc = &a[0]; }
+
+#pragma omp target exit data map(from : a[ : N])
+
+ // CHECK: a has been mapped to the device.
+ if (device_alloc != host_alloc)
+ printf("a has been mapped to the device.\n");
+
+ free(a);
+
+ // CHECK: Done!
+ printf("Done!\n");
+
+ return 0;
+}
Added: openmp/trunk/libomptarget/test/unified_shared_memory/close_manual.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/test/unified_shared_memory/close_manual.c?rev=368488&view=auto
==============================================================================
--- openmp/trunk/libomptarget/test/unified_shared_memory/close_manual.c (added)
+++ openmp/trunk/libomptarget/test/unified_shared_memory/close_manual.c Fri Aug 9 14:32:57 2019
@@ -0,0 +1,86 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <omp.h>
+#include <stdio.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+extern void __tgt_register_requires(int64_t);
+
+extern void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types);
+
+extern void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types);
+
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+
+#pragma omp requires unified_shared_memory
+
+#define N 1024
+
+int main(int argc, char *argv[]) {
+ int fails;
+ void *host_alloc = 0, *device_alloc = 0;
+ int *a = (int *)malloc(N * sizeof(int));
+
+ // Manual registration of requires flags for Clang versions
+ // that do not support requires.
+ __tgt_register_requires(8);
+
+ // Init
+ for (int i = 0; i < N; ++i) {
+ a[i] = 10;
+ }
+ host_alloc = &a[0];
+
+// Dummy target region that ensures the runtime library is loaded when
+// the target data begin/end functions are manually called below.
+#pragma omp target
+ {}
+
+ // Manual calls
+ int device_id = omp_get_default_device();
+ int arg_num = 1;
+ void **args_base = (void **)&a;
+ void **args = (void **)&a;
+ int64_t arg_sizes[arg_num];
+
+ arg_sizes[0] = sizeof(int) * N;
+
+ int64_t arg_types[arg_num];
+
+ // Ox400 enables the CLOSE map type in the runtime:
+ // OMP_TGT_MAPTYPE_CLOSE = 0x400
+ // OMP_TGT_MAPTYPE_TO = 0x001
+ arg_types[0] = 0x400 | 0x001;
+
+ device_alloc = host_alloc;
+
+ __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
+ arg_types);
+
+#pragma omp target data use_device_ptr(a)
+ { device_alloc = a; }
+
+ __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
+ arg_types);
+
+ // CHECK: a was copied to the device
+ if (device_alloc != host_alloc)
+ printf("a was copied to the device\n");
+
+ free(a);
+
+ // CHECK: Done!
+ printf("Done!\n");
+
+ return 0;
+}
Added: openmp/trunk/libomptarget/test/unified_shared_memory/close_modifier.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/test/unified_shared_memory/close_modifier.c?rev=368488&view=auto
==============================================================================
--- openmp/trunk/libomptarget/test/unified_shared_memory/close_modifier.c (added)
+++ openmp/trunk/libomptarget/test/unified_shared_memory/close_modifier.c Fri Aug 9 14:32:57 2019
@@ -0,0 +1,135 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+// UNSUPPORTED: clang-6, clang-7, clang-8, clang-9
+
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp requires unified_shared_memory
+
+#define N 1024
+
+int main(int argc, char *argv[]) {
+ int fails;
+ void *host_alloc, *device_alloc;
+ void *host_data, *device_data;
+ int *alloc = (int *)malloc(N * sizeof(int));
+ int data[N];
+
+ for (int i = 0; i < N; ++i) {
+ alloc[i] = 10;
+ data[i] = 1;
+ }
+
+ host_data = &data[0];
+ host_alloc = &alloc[0];
+
+//
+// Test that updates on the device are not visible to host
+// when only a TO mapping is used.
+//
+#pragma omp target map(tofrom \
+ : device_data, device_alloc) map(close, to \
+ : alloc[:N], data \
+ [:N])
+ {
+ device_data = &data[0];
+ device_alloc = &alloc[0];
+
+ for (int i = 0; i < N; i++) {
+ alloc[i] += 1;
+ data[i] += 1;
+ }
+ }
+
+ // CHECK: Address of alloc on device different from host address.
+ if (device_alloc != host_alloc)
+ printf("Address of alloc on device different from host address.\n");
+
+ // CHECK: Address of data on device different from host address.
+ if (device_data != host_data)
+ printf("Address of data on device different from host address.\n");
+
+ // On the host, check that the arrays have been updated.
+ // CHECK: Alloc host values not updated: Succeeded
+ fails = 0;
+ for (int i = 0; i < N; i++) {
+ if (alloc[i] != 10)
+ fails++;
+ }
+ printf("Alloc host values not updated: %s\n",
+ (fails == 0) ? "Succeeded" : "Failed");
+
+ // CHECK: Data host values not updated: Succeeded
+ fails = 0;
+ for (int i = 0; i < N; i++) {
+ if (data[i] != 1)
+ fails++;
+ }
+ printf("Data host values not updated: %s\n",
+ (fails == 0) ? "Succeeded" : "Failed");
+
+ //
+ // Test that updates on the device are visible on host
+ // when a from is used.
+ //
+
+ for (int i = 0; i < N; i++) {
+ alloc[i] += 1;
+ data[i] += 1;
+ }
+
+#pragma omp target map(close, tofrom : alloc[:N], data[:N])
+ {
+ // CHECK: Alloc device values are correct: Succeeded
+ fails = 0;
+ for (int i = 0; i < N; i++) {
+ if (alloc[i] != 11)
+ fails++;
+ }
+ printf("Alloc device values are correct: %s\n",
+ (fails == 0) ? "Succeeded" : "Failed");
+ // CHECK: Data device values are correct: Succeeded
+ fails = 0;
+ for (int i = 0; i < N; i++) {
+ if (data[i] != 2)
+ fails++;
+ }
+ printf("Data device values are correct: %s\n",
+ (fails == 0) ? "Succeeded" : "Failed");
+
+ // Update values on the device
+ for (int i = 0; i < N; i++) {
+ alloc[i] += 1;
+ data[i] += 1;
+ }
+ }
+
+ // CHECK: Alloc host values updated: Succeeded
+ fails = 0;
+ for (int i = 0; i < N; i++) {
+ if (alloc[i] != 12)
+ fails++;
+ }
+ printf("Alloc host values updated: %s\n",
+ (fails == 0) ? "Succeeded" : "Failed");
+
+ // CHECK: Data host values updated: Succeeded
+ fails = 0;
+ for (int i = 0; i < N; i++) {
+ if (data[i] != 3)
+ fails++;
+ }
+ printf("Data host values updated: %s\n",
+ (fails == 0) ? "Succeeded" : "Failed");
+
+ free(alloc);
+
+ // CHECK: Done!
+ printf("Done!\n");
+
+ return 0;
+}
More information about the Openmp-commits
mailing list