[Openmp-commits] [openmp] b6c2f53 - [OpenMP] Add allocator support for target memory

Hansang Bae via Openmp-commits openmp-commits at lists.llvm.org
Tue Mar 2 14:46:12 PST 2021


Author: Hansang Bae
Date: 2021-03-02T16:45:12-06:00
New Revision: b6c2f538b22b4053ce10cfa6cf60c5244df202ac

URL: https://github.com/llvm/llvm-project/commit/b6c2f538b22b4053ce10cfa6cf60c5244df202ac
DIFF: https://github.com/llvm/llvm-project/commit/b6c2f538b22b4053ce10cfa6cf60c5244df202ac.diff

LOG: [OpenMP] Add allocator support for target memory

This is a preview of allocator support for target memory that depends on the
offload runtime API which allocates memory as described below.

llvm_omp_target_alloc_host(size_t size, int device_num);
-- Returns non-migratable memory owned by host.
-- Memory is accessible by host and device(s).

llvm_omp_target_alloc_shared(size_t size, int device_num);
-- Returns migratable memory owned by host and device.
-- Memory is accessible by host and device.

llvm_omp_target_alloc_device(size_t size, int device_num);
-- Returns memory owned by device.
-- Memory is only accessible by device.

New memory space and predefined allocator names are
-- llvm_omp_target_host_mem_space
-- llvm_omp_target_shared_mem_space
-- llvm_omp_target_device_mem_space
-- llvm_omp_target_host_mem_alloc
-- llvm_omp_target_shared_mem_alloc
-- llvm_omp_target_device_mem_alloc

Differential Revision: https://reviews.llvm.org/D96669

Added: 
    

Modified: 
    openmp/runtime/src/dllexports
    openmp/runtime/src/include/omp.h.var
    openmp/runtime/src/include/omp_lib.f90.var
    openmp/runtime/src/include/omp_lib.h.var
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_alloc.cpp
    openmp/runtime/src/kmp_global.cpp
    openmp/runtime/src/kmp_runtime.cpp
    openmp/runtime/src/kmp_stub.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 362550d047a5..961bf24a9f3b 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -555,12 +555,20 @@ kmp_set_disp_num_buffers                    890
     omp_cgroup_mem_alloc                   DATA
     omp_pteam_mem_alloc                    DATA
     omp_thread_mem_alloc                   DATA
+    # Preview of target memory support
+    llvm_omp_target_host_mem_alloc         DATA
+    llvm_omp_target_shared_mem_alloc       DATA
+    llvm_omp_target_device_mem_alloc       DATA
 
     omp_default_mem_space                  DATA
     omp_large_cap_mem_space                DATA
     omp_const_mem_space                    DATA
     omp_high_bw_mem_space                  DATA
     omp_low_lat_mem_space                  DATA
+    # Preview of target memory support
+    llvm_omp_target_host_mem_space         DATA
+    llvm_omp_target_shared_mem_space       DATA
+    llvm_omp_target_device_mem_space       DATA
 
 %ifndef stub
     # Ordinals between 900 and 999 are reserved

diff  --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eff94ca21029..28e925948263 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -357,12 +357,21 @@
     extern __KMP_IMP omp_allocator_handle_t const omp_cgroup_mem_alloc;
     extern __KMP_IMP omp_allocator_handle_t const omp_pteam_mem_alloc;
     extern __KMP_IMP omp_allocator_handle_t const omp_thread_mem_alloc;
+    /* Preview of target memory support */
+    extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
+    extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
+    extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
+
     typedef omp_uintptr_t omp_memspace_handle_t;
     extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space;
     extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space;
     extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
     extern __KMP_IMP omp_memspace_handle_t const omp_high_bw_mem_space;
     extern __KMP_IMP omp_memspace_handle_t const omp_low_lat_mem_space;
+    /* Preview of target memory support */
+    extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_host_mem_space;
+    extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
+    extern __KMP_IMP omp_memspace_handle_t const llvm_omp_target_device_mem_space;
 #   else
 #       if __cplusplus >= 201103
     typedef enum omp_allocator_handle_t : omp_uintptr_t
@@ -379,6 +388,10 @@
       omp_cgroup_mem_alloc = 6,
       omp_pteam_mem_alloc = 7,
       omp_thread_mem_alloc = 8,
+      /* Preview of target memory support */
+      llvm_omp_target_host_mem_alloc = 100,
+      llvm_omp_target_shared_mem_alloc = 101,
+      llvm_omp_target_device_mem_alloc = 102,
       KMP_ALLOCATOR_MAX_HANDLE = UINTPTR_MAX
     } omp_allocator_handle_t;
 #       if __cplusplus >= 201103
@@ -392,6 +405,10 @@
       omp_const_mem_space = 2,
       omp_high_bw_mem_space = 3,
       omp_low_lat_mem_space = 4,
+      /* Preview of target memory support */
+      llvm_omp_target_host_mem_space = 100,
+      llvm_omp_target_shared_mem_space = 101,
+      llvm_omp_target_device_mem_space = 102,
       KMP_MEMSPACE_MAX_HANDLE = UINTPTR_MAX
     } omp_memspace_handle_t;
 #   endif

diff  --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.f90.var
index ef2d656924ca..4b39f1af54a0 100644
--- a/openmp/runtime/src/include/omp_lib.f90.var
+++ b/openmp/runtime/src/include/omp_lib.f90.var
@@ -137,12 +137,20 @@
         integer (kind=omp_allocator_handle_kind), parameter :: omp_cgroup_mem_alloc = 6
         integer (kind=omp_allocator_handle_kind), parameter :: omp_pteam_mem_alloc = 7
         integer (kind=omp_allocator_handle_kind), parameter :: omp_thread_mem_alloc = 8
+        ! Preview of target memory support
+        integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_host_mem_alloc = 100
+        integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_shared_mem_alloc = 101
+        integer (kind=omp_allocator_handle_kind), parameter :: llvm_omp_target_device_mem_alloc = 102
 
         integer (kind=omp_memspace_handle_kind), parameter :: omp_default_mem_space = 0
         integer (kind=omp_memspace_handle_kind), parameter :: omp_large_cap_mem_space = 1
         integer (kind=omp_memspace_handle_kind), parameter :: omp_const_mem_space = 2
         integer (kind=omp_memspace_handle_kind), parameter :: omp_high_bw_mem_space = 3
         integer (kind=omp_memspace_handle_kind), parameter :: omp_low_lat_mem_space = 4
+        ! Preview of target memory support
+        integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_host_mem_space = 100
+        integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_shared_mem_space = 101
+        integer (kind=omp_memspace_handle_kind), parameter :: llvm_omp_target_device_mem_space = 102
 
         integer (kind=omp_pause_resource_kind), parameter :: omp_pause_resume = 0
         integer (kind=omp_pause_resource_kind), parameter :: omp_pause_soft = 1

diff  --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index 7dcd21afa192..50c8ecb9d1e3 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -214,6 +214,13 @@
       parameter(omp_pteam_mem_alloc=7)
       integer(kind=omp_allocator_handle_kind)omp_thread_mem_alloc
       parameter(omp_thread_mem_alloc=8)
+      ! Preview of target memory support
+      integer(kind=omp_allocator_handle_kind)llvm_omp_target_host_mem_alloc
+      parameter(llvm_omp_target_host_mem_alloc=100)
+      integer(kind=omp_allocator_handle_kind)llvm_omp_target_shared_mem_alloc
+      parameter(llvm_omp_target_shared_mem_alloc=101)
+      integer(kind=omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
+      parameter(llvm_omp_target_device_mem_alloc=102)
 
       integer(kind=omp_memspace_handle_kind)omp_default_mem_space
       parameter(omp_default_mem_space=0)
@@ -225,6 +232,13 @@
       parameter(omp_high_bw_mem_space=3)
       integer(kind=omp_memspace_handle_kind)omp_low_lat_mem_space
       parameter(omp_low_lat_mem_space=4)
+      ! Preview of target memory support
+      integer(kind=omp_memspace_handle_kind)llvm_omp_target_host_mem_space
+      parameter(llvm_omp_target_host_mem_space=100)
+      integer(kind=omp_memspace_handle_kind)llvm_omp_target_shared_mem_space
+      parameter(llvm_omp_target_shared_mem_space=101)
+      integer(kind=omp_memspace_handle_kind)llvm_omp_target_device_mem_space
+      parameter(llvm_omp_target_device_mem_space=102)
 
       integer(kind=omp_pause_resource_kind)omp_pause_resume
       parameter(omp_pause_resume=0)

diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index d5b314c91468..aa0ed7e12def 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -958,6 +958,10 @@ extern omp_memspace_handle_t const omp_large_cap_mem_space;
 extern omp_memspace_handle_t const omp_const_mem_space;
 extern omp_memspace_handle_t const omp_high_bw_mem_space;
 extern omp_memspace_handle_t const omp_low_lat_mem_space;
+// Preview of target memory support
+extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
+extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
+extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
 
 typedef struct {
   omp_alloctrait_key_t key;
@@ -974,6 +978,10 @@ extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
 extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
 extern omp_allocator_handle_t const omp_pteam_mem_alloc;
 extern omp_allocator_handle_t const omp_thread_mem_alloc;
+// Preview of target memory support
+extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
+extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
+extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
 extern omp_allocator_handle_t const kmp_max_mem_alloc;
 extern omp_allocator_handle_t __kmp_def_allocator;
 
@@ -1011,6 +1019,7 @@ extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
 
 extern void __kmp_init_memkind();
 extern void __kmp_fini_memkind();
+extern void __kmp_init_target_mem();
 
 /* ------------------------------------------------------------------------ */
 

diff  --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 4bb3d2030e6c..857855cf12d6 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -1242,6 +1242,20 @@ static void **mk_hbw_preferred_hugetlb;
 static void **mk_dax_kmem;
 static void **mk_dax_kmem_all;
 static void **mk_dax_kmem_preferred;
+// Preview of target memory support
+static void *(*kmp_target_alloc_host)(size_t size, int device);
+static void *(*kmp_target_alloc_shared)(size_t size, int device);
+static void *(*kmp_target_alloc_device)(size_t size, int device);
+static void *(*kmp_target_free)(void *ptr, int device);
+static bool __kmp_target_mem_available;
+#define KMP_IS_TARGET_MEM_SPACE(MS)                                            \
+  (MS == llvm_omp_target_host_mem_space ||                                     \
+   MS == llvm_omp_target_shared_mem_space ||                                   \
+   MS == llvm_omp_target_device_mem_space)
+#define KMP_IS_TARGET_MEM_ALLOC(MA)                                            \
+  (MA == llvm_omp_target_host_mem_alloc ||                                     \
+   MA == llvm_omp_target_shared_mem_alloc ||                                   \
+   MA == llvm_omp_target_device_mem_alloc)
 
 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
 static inline void chk_kind(void ***pkind) {
@@ -1338,6 +1352,18 @@ void __kmp_fini_memkind() {
   mk_dax_kmem_preferred = NULL;
 #endif
 }
+// Preview of target memory support
+void __kmp_init_target_mem() {
+  *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
+  *(void **)(&kmp_target_alloc_shared) =
+      KMP_DLSYM("llvm_omp_target_alloc_shared");
+  *(void **)(&kmp_target_alloc_device) =
+      KMP_DLSYM("llvm_omp_target_alloc_device");
+  *(void **)(&kmp_target_free) = KMP_DLSYM("omp_target_free");
+  __kmp_target_mem_available = kmp_target_alloc_host &&
+                               kmp_target_alloc_shared &&
+                               kmp_target_alloc_device && kmp_target_free;
+}
 
 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
                                              int ntraits,
@@ -1345,7 +1371,7 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
   // OpenMP 5.0 only allows predefined memspaces
   KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
                    ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
-                   ms == omp_high_bw_mem_space);
+                   ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
   kmp_allocator_t *al;
   int i;
   al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
@@ -1423,6 +1449,9 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
         al->memkind = mk_default;
       }
     }
+  } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
+    __kmp_free(al);
+    return omp_null_allocator;
   } else {
     if (ms == omp_high_bw_mem_space) {
       // cannot detect HBW memory presence without memkind library
@@ -1543,6 +1572,22 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
       }
     }
   } else if (allocator < kmp_max_mem_alloc) {
+    if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
+      // Use size input directly as the memory may not be accessible on host.
+      // Use default device for now.
+      if (__kmp_target_mem_available) {
+        kmp_int32 device =
+            __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
+        if (allocator == llvm_omp_target_host_mem_alloc)
+          ptr = kmp_target_alloc_host(size, device);
+        else if (allocator == llvm_omp_target_shared_mem_alloc)
+          ptr = kmp_target_alloc_shared(size, device);
+        else // allocator == llvm_omp_target_device_mem_alloc
+          ptr = kmp_target_alloc_device(size, device);
+      }
+      return ptr;
+    }
+
     // pre-defined allocator
     if (allocator == omp_high_bw_mem_alloc) {
       // ptr = NULL;
@@ -1551,6 +1596,18 @@ void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
     } else {
       ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
     }
+  } else if (KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
+    if (__kmp_target_mem_available) {
+      kmp_int32 device =
+          __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
+      if (al->memspace == llvm_omp_target_host_mem_space)
+        ptr = kmp_target_alloc_host(size, device);
+      else if (al->memspace == llvm_omp_target_shared_mem_space)
+        ptr = kmp_target_alloc_shared(size, device);
+      else // al->memspace == llvm_omp_target_device_mem_space
+        ptr = kmp_target_alloc_device(size, device);
+    }
+    return ptr;
   } else if (al->pool_size > 0) {
     // custom allocator with pool size requested
     kmp_uint64 used =
@@ -1685,6 +1742,15 @@ void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
   kmp_mem_desc_t desc;
   kmp_uintptr_t addr_align; // address to return to caller
   kmp_uintptr_t addr_descr; // address of memory block descriptor
+  if (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
+      (allocator > kmp_max_mem_alloc &&
+       KMP_IS_TARGET_MEM_SPACE(al->memspace))) {
+    KMP_DEBUG_ASSERT(kmp_target_free);
+    kmp_int32 device =
+        __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
+    kmp_target_free(ptr, device);
+    return;
+  }
 
   addr_align = (kmp_uintptr_t)ptr;
   addr_descr = addr_align - sizeof(kmp_mem_desc_t);

diff  --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 0b6d19a2572c..034e3ee2acc5 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -319,6 +319,13 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
     (omp_allocator_handle_t const)7;
 omp_allocator_handle_t const omp_thread_mem_alloc =
     (omp_allocator_handle_t const)8;
+// Preview of target memory support
+omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
+    (omp_allocator_handle_t const)100;
+omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
+    (omp_allocator_handle_t const)101;
+omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
+    (omp_allocator_handle_t const)102;
 omp_allocator_handle_t const kmp_max_mem_alloc =
     (omp_allocator_handle_t const)1024;
 omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc;
@@ -333,6 +340,13 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
     (omp_memspace_handle_t const)3;
 omp_memspace_handle_t const omp_low_lat_mem_space =
     (omp_memspace_handle_t const)4;
+// Preview of target memory support
+omp_memspace_handle_t const llvm_omp_target_host_mem_space =
+    (omp_memspace_handle_t const)100;
+omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
+    (omp_memspace_handle_t const)101;
+omp_memspace_handle_t const llvm_omp_target_device_mem_space =
+    (omp_memspace_handle_t const)102;
 
 /* This check ensures that the compiler is passing the correct data type for the
    flags formal parameter of the function kmpc_omp_task_alloc(). If the type is

diff  --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 57e3b7dc862a..f77196faa11d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -540,7 +540,10 @@ static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,
                                "%s_%d.t_disp_buffer", header, team_id);
 }
 
-static void __kmp_init_allocator() { __kmp_init_memkind(); }
+static void __kmp_init_allocator() {
+  __kmp_init_memkind();
+  __kmp_init_target_mem();
+}
 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
 
 /* ------------------------------------------------------------------------ */

diff  --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp
index 58add6b6ae3c..87e5388ca9db 100644
--- a/openmp/runtime/src/kmp_stub.cpp
+++ b/openmp/runtime/src/kmp_stub.cpp
@@ -350,6 +350,13 @@ omp_allocator_handle_t const omp_pteam_mem_alloc =
     (omp_allocator_handle_t const)7;
 omp_allocator_handle_t const omp_thread_mem_alloc =
     (omp_allocator_handle_t const)8;
+// Preview of target memory support
+omp_allocator_handle_t const llvm_omp_target_host_mem_alloc =
+    (omp_allocator_handle_t const)100;
+omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
+    (omp_allocator_handle_t const)101;
+omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
+    (omp_allocator_handle_t const)102;
 
 omp_memspace_handle_t const omp_default_mem_space =
     (omp_memspace_handle_t const)0;
@@ -361,6 +368,13 @@ omp_memspace_handle_t const omp_high_bw_mem_space =
     (omp_memspace_handle_t const)3;
 omp_memspace_handle_t const omp_low_lat_mem_space =
     (omp_memspace_handle_t const)4;
+// Preview of target memory support
+omp_memspace_handle_t const llvm_omp_target_host_mem_space =
+    (omp_memspace_handle_t const)100;
+omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
+    (omp_memspace_handle_t const)101;
+omp_memspace_handle_t const llvm_omp_target_device_mem_space =
+    (omp_memspace_handle_t const)102;
 #endif /* KMP_OS_WINDOWS */
 void *omp_alloc(size_t size, const omp_allocator_handle_t allocator) {
   i;


        


More information about the Openmp-commits mailing list