[Openmp-commits] [openmp] b6c2f53 - [OpenMP] Add allocator support for target memory
Hansang Bae via Openmp-commits
openmp-commits at lists.llvm.org
Tue Mar 2 14:46:12 PST 2021
Author: Hansang Bae
Date: 2021-03-02T16:45:12-06:00
New Revision: b6c2f538b22b4053ce10cfa6cf60c5244df202ac
URL: https://github.com/llvm/llvm-project/commit/b6c2f538b22b4053ce10cfa6cf60c5244df202ac
DIFF: https://github.com/llvm/llvm-project/commit/b6c2f538b22b4053ce10cfa6cf60c5244df202ac.diff
LOG: [OpenMP] Add allocator support for target memory
This is a preview of allocator support for target memory that depends on the
offload runtime API which allocates memory as described below.
llvm_omp_target_alloc_host(size_t size, int device_num);
-- Returns non-migratable memory owned by host.
-- Memory is accessible by host and device(s).
llvm_omp_target_alloc_shared(size_t size, int device_num);
-- Returns migratable memory owned by host and device.
-- Memory is accessible by host and device.
llvm_omp_target_alloc_device(size_t size, int device_num);
-- Returns memory owned by device.
-- Memory is only accessible by device.
New memory space and predefined allocator names are
-- llvm_omp_target_host_mem_space
-- llvm_omp_target_shared_mem_space
-- llvm_omp_target_device_mem_space
-- llvm_omp_target_host_mem_alloc
-- llvm_omp_target_shared_mem_alloc
-- llvm_omp_target_device_mem_alloc
Differential Revision: https://reviews.llvm.org/D96669
Added:
Modified:
openmp/runtime/src/dllexports
openmp/runtime/src/include/omp.h.var
openmp/runtime/src/include/omp_lib.f90.var
openmp/runtime/src/include/omp_lib.h.var
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_alloc.cpp
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_stub.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 362550d047a5..961bf24a9f3b 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -555,12 +555,20 @@ kmp_set_disp_num_buffers 890
omp_cgroup_mem_alloc DATA
omp_pteam_mem_alloc DATA
omp_thread_mem_alloc DATA
+ # Preview of target memory support
+ llvm_omp_target_host_mem_alloc DATA
+ llvm_omp_target_shared_mem_alloc DATA
+ llvm_omp_target_device_mem_alloc DATA
omp_default_mem_space DATA
omp_large_cap_mem_space DATA
omp_const_mem_space DATA
omp_high_bw_mem_space DATA
omp_low_lat_mem_space DATA
+ # Preview of target memory support
+ llvm_omp_target_host_mem_space DATA
+ llvm_omp_target_shared_mem_space DATA
+ llvm_omp_target_device_mem_space DATA
%ifndef stub
# Ordinals between 900 and 999 are reserved
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eff94ca21029..28e925948263 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -357,12 +357,21 @@
extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc;
extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc;
extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc;
+ /* Preview of target memory support */
+ extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
+ extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
+ extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
+
typedef omp_uintptr_t omp_memspace_handle_t;
extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space;
+ /* Preview of target memory support */
+ extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space;
+ extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
+ extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space;
# else
# if __cplusplus >= 201103
typedef enum omp_allocator_handle_t : omp_uintptr_t
@@ -379,6 +388,10 @@
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
+ /* Preview of target memory support */
+ llvm_omp_target_host_mem_alloc = 100,
+ llvm_omp_target_shared_mem_alloc = 101,
+ llvm_omp_target_device_mem_alloc = 102,
KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX
} omp_allocator_handle_t;
# if __cplusplus >= 201103
@@ -392,6 +405,10 @@
omp_const_mem_space = 2,
omp_high_bw_mem_space = 3,
omp_low_lat_mem_space = 4,
+ /* Preview of target memory support */
+ llvm_omp_target_host_mem_space = 100,
+ llvm_omp_target_shared_mem_space = 101,
+ llvm_omp_target_device_mem_space = 102,
KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX
} omp_memspace_handle_t;
# endif
diff --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.f90.var
index ef2d656924ca..4b39f1af54a0 100644
--- a/openmp/runtime/src/include/omp_lib.f90.var
+++ b/openmp/runtime/src/include/omp_lib.f90.var
@@ -137,12 +137,20 @@
integer (kind=omp_allocator_handle_kind), parameter :: omp_cgroup_mem_alloc = 6
integer (kind=omp_allocator_handle_kind), parameter :: omp_pteam_mem_alloc = 7
integer (kind=omp_allocator_handle_kind), parameter :: omp_thread_mem_alloc = 8
+ ! Preview of target memory support
+ integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_host_mem_alloc = 100
+ integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_shared_mem_alloc = 101
+ integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_device_mem_alloc = 102
integer (kind=omp_memspace_handle_kind), parameter :: omp_default_mem_space = 0
integer (kind=omp_memspace_handle_kind), parameter :: omp_large_cap_mem_space = 1
integer (kind=omp_memspace_handle_kind), parameter :: omp_const_mem_space = 2
integer (kind=omp_memspace_handle_kind), parameter :: omp_high_bw_mem_space = 3
integer (kind=omp_memspace_handle_kind), parameter :: omp_low_lat_mem_space = 4
+ ! Preview of target memory support
+ integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_host_mem_space = 100
+ integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_shared_mem_space = 101
+ integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_device_mem_space = 102
integer (kind=omp_pause_resource_kind), parameter :: omp_pause_resume = 0
integer (kind=omp_pause_resource_kind), parameter :: omp_pause_soft = 1
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index 7dcd21afa192..50c8ecb9d1e3 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -214,6 +214,13 @@
parameter(omp_pteam_mem_alloc=7)
integer(kind=omp_allocator_handle_kind)omp_thread_mem_alloc
parameter(omp_thread_mem_alloc=8)
+ ! Preview of target memory support
+ integer(kind=omp_allocator_handle_kind)llvm_omp_target_host_mem_alloc
+ parameter(llvm_omp_target_host_mem_alloc=100)
+ integer(kind=omp_allocator_handle_kind)llvm_omp_target_shared_mem_alloc
+ parameter(llvm_omp_target_shared_mem_alloc=101)
+ integer(kind=omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
+ parameter(llvm_omp_target_device_mem_alloc=102)
integer(kind=omp_memspace_handle_kind)omp_default_mem_space
parameter(omp_default_mem_space=0)
@@ -225,6 +232,13 @@
parameter(omp_high_bw_mem_space=3)
integer(kind=omp_memspace_handle_kind)omp_low_lat_mem_space
parameter(omp_low_lat_mem_space=4)
+ ! Preview of target memory support
+ integer(kind=omp_memspace_handle_kind)llvm_omp_target_host_mem_space
+ parameter(llvm_omp_target_host_mem_space=100)
+ integer(kind=omp_memspace_handle_kind)llvm_omp_target_shared_mem_space
+ parameter(llvm_omp_target_shared_mem_space=101)
+ integer(kind=omp_memspace_handle_kind)llvm_omp_target_device_mem_space
+ parameter(llvm_omp_target_device_mem_space=102)
integer(kind=omp_pause_resource_kind)omp_pause_resume
parameter(omp_pause_resume=0)
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index d5b314c91468..aa0ed7e12def 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -958,6 +958,10 @@ extern omp_memspace_handle_t const omp_large_cap_mem_space;
extern omp_memspace_handle_t const omp_const_mem_space;
extern omp_memspace_handle_t const omp_high_bw_mem_space;
extern omp_memspace_handle_t const omp_low_lat_mem_space;
+// Preview of target memory support
+extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
+extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
+extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
typedef struct {
omp_alloctrait_key_t key;
@@ -974,6 +978,10 @@ extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
extern omp_allocator_handle_t const omp_pteam_mem_alloc;
extern omp_allocator_handle_t const omp_thread_mem_alloc;
+// Preview of target memory support
+extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
+extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
+extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
extern omp_allocator_handle_t const kmp_max_mem_alloc;
extern omp_allocator_handle_t __kmp_def_allocator;
@@ -1011,6 +1019,7 @@ extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
extern void __kmp_init_memkind();
extern void __kmp_fini_memkind();
+extern void __kmp_init_target_mem();
/* ------------------------------------------------------------------------ */
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 4bb3d2030e6c..857855cf12d6 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -1242,6 +1242,20 @@ static void **mk_hbw_preferred_hugetlb;
static void **mk_dax_kmem;
static void **mk_dax_kmem_all;
static void **mk_dax_kmem_preferred;
+// Preview of target memory support
+static void *(*kmp_target_alloc_host)(size_t size, int device);
+static void *(*kmp_target_alloc_shared)(size_t size, int device);
+static void *(*kmp_target_alloc_device)(size_t size, int device);
+static void *(*kmp_target_free)(void *ptr, int device);
+static bool __kmp_target_mem_available;
+#define KMP_IS_TARGET_MEM_SPACE(MS) \
+ (MS == llvm_omp_target_host_mem_space || \
+ MS == llvm_omp_target_shared_mem_space || \
+ MS == llvm_omp_target_device_mem_space)
+#define KMP_IS_TARGET_MEM_ALLOC(MA) \
+ (MA == llvm_omp_target_host_mem_alloc || \
+ MA == llvm_omp_target_shared_mem_alloc || \
+ MA == llvm_omp_target_device_mem_alloc)
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
static inline void chk_kind(void ***pkind) {
@@ -1338,6 +1352,18 @@ void __kmp_fini_memkind() {
mk_dax_kmem_preferred = NULL;
#endif
}
+// Preview of target memory support
+void __kmp_init_target_mem() {
+ *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
+ *(void **)(&kmp_target_alloc_shared) =
+ KMP_DLSYM("llvm_omp_target_alloc_shared");
+ *(void **)(&kmp_target_alloc_device) =
+ KMP_DLSYM("llvm_omp_target_alloc_device");
+ *(void **)(&kmp_target_free) = KMP_DLSYM("omp_target_free");
+ __kmp_target_mem_available = kmp_target_alloc_host &&
+ kmp_target_alloc_shared &&
+ kmp_target_alloc_device && kmp_target_free;
+}
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
int ntraits,
@@ -1345,7 +1371,7 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
// OpenMP 5.0 only allows predefined memspaces
KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
- ms == omp_high_bw_mem_space);
+ ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
kmp_allocator_t *al;
int i;
al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
@@ -1423,6 +1449,9 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
al->memkind = mk_default;
}
}
+ } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
+ __kmp_free(al);
+ return omp_null_allocator;
} else {
if (ms == omp_high_bw_mem_space) {
// cannot detect HBW memory presence without memkind library
@@ -1543,6 +1572,22 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
}
}
} else if (allocator < kmp_max_mem_alloc) {
+ if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
+ // Use size input directly as the memory may not be accessible on host.
+ // Use default device for now.
+ if (__kmp_target_mem_available) {
+ kmp_int32 device =
+ __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
+ if (allocator == llvm_omp_target_host_mem_alloc)
+ ptr = kmp_target_alloc_host(size, device);
+ else if (allocator == llvm_omp_target_shared_mem_alloc)
+ ptr = kmp_target_alloc_shared(size, device);
+ else // allocator == llvm_omp_target_device_mem_alloc
+ ptr = kmp_target_alloc_device(size, device);
+ }
+ return ptr;
+ }
+
// pre-defined allocator
if (allocator == omp_high_bw_mem_alloc) {
// ptr = NULL;
@@ -1551,6 +1596,18 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
} else {
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
}
+ } else if (KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
+ if (__kmp_target_mem_available) {
+ kmp_int32 device =
+ __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
+ if (al->memspace == llvm_omp_target_host_mem_space)
+ ptr = kmp_target_alloc_host(size, device);
+ else if (al->memspace == llvm_omp_target_shared_mem_space)
+ ptr = kmp_target_alloc_shared(size, device);
+ else // al->memspace == llvm_omp_target_device_mem_space
+ ptr = kmp_target_alloc_device(size, device);
+ }
+ return ptr;
} else if (al->pool_size > 0) {
// custom allocator with pool size requested
kmp_uint64 used =
@@ -1685,6 +1742,15 @@ void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
kmp_mem_desc_t desc;
kmp_uintptr_t addr_align; // address to return to caller
kmp_uintptr_t addr_descr; // address of memory block descriptor
+ if (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
+ (allocator > kmp_max_mem_alloc &&
+ KMP_IS_TARGET_MEM_SPACE(al->memspace))) {
+ KMP_DEBUG_ASSERT(kmp_target_free);
+ kmp_int32 device =
+ __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
+ kmp_target_free(ptr, device);
+ return;
+ }
addr_align = (kmp_uintptr_t)ptr;
addr_descr = addr_align - sizeof(kmp_mem_desc_t);
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 0b6d19a2572c..034e3ee2acc5 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -319,6 +319,13 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
(omp_allocator_handle_t const)7;
omp_allocator_handle_t const omp_thread_mem_alloc =
(omp_allocator_handle_t const)8;
+// Preview of target memory support
+omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
+ (omp_allocator_handle_t const)100;
+omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
+ (omp_allocator_handle_t const)101;
+omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
+ (omp_allocator_handle_t const)102;
omp_allocator_handle_t const kmp_max_mem_alloc =
(omp_allocator_handle_t const)1024;
omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc;
@@ -333,6 +340,13 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
(omp_memspace_handle_t const)3;
omp_memspace_handle_t const omp_low_lat_mem_space =
(omp_memspace_handle_t const)4;
+// Preview of target memory support
+omp_memspace_handle_t const llvm_omp_target_host_mem_space =
+ (omp_memspace_handle_t const)100;
+omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
+ (omp_memspace_handle_t const)101;
+omp_memspace_handle_t const llvm_omp_target_device_mem_space =
+ (omp_memspace_handle_t const)102;
/* This check ensures that the compiler is passing the correct data type for the
flags formal parameter of the function kmpc_omp_task_alloc(). If the type is
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 57e3b7dc862a..f77196faa11d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -540,7 +540,10 @@ static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
"%s_%d.t_disp_buffer", header, team_id);
}
-static void __kmp_init_allocator() { __kmp_init_memkind(); }
+static void __kmp_init_allocator() {
+ __kmp_init_memkind();
+ __kmp_init_target_mem();
+}
static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
/* ------------------------------------------------------------------------ */
diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp
index 58add6b6ae3c..87e5388ca9db 100644
--- a/openmp/runtime/src/kmp_stub.cpp
+++ b/openmp/runtime/src/kmp_stub.cpp
@@ -350,6 +350,13 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
(omp_allocator_handle_t const)7;
omp_allocator_handle_t const omp_thread_mem_alloc =
(omp_allocator_handle_t const)8;
+// Preview of target memory support
+omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
+ (omp_allocator_handle_t const)100;
+omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
+ (omp_allocator_handle_t const)101;
+omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
+ (omp_allocator_handle_t const)102;
omp_memspace_handle_t const omp_default_mem_space =
(omp_memspace_handle_t const)0;
@@ -361,6 +368,13 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
(omp_memspace_handle_t const)3;
omp_memspace_handle_t const omp_low_lat_mem_space =
(omp_memspace_handle_t const)4;
+// Preview of target memory support
+omp_memspace_handle_t const llvm_omp_target_host_mem_space =
+ (omp_memspace_handle_t const)100;
+omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
+ (omp_memspace_handle_t const)101;
+omp_memspace_handle_t const llvm_omp_target_device_mem_space =
+ (omp_memspace_handle_t const)102;
#endif /* KMP_OS_WINDOWS */
void *omp_alloc(size_t size, const omp_allocator_handle_t allocator) {
i;
More information about the Openmp-commits
mailing list