[Openmp-commits] [openmp] [OpenMP] 6.0 (TR11) Memory Management Update (PR #97106)
Hansang Bae via Openmp-commits
openmp-commits at lists.llvm.org
Fri Jun 28 13:11:21 PDT 2024
https://github.com/hansangbae created https://github.com/llvm/llvm-project/pull/97106
TR11 introduced changes to support target memory management in a unified way by defining a series of API routines and additional traits. Host runtime is oblivious to how actual memory resources are mapped when using the new API routines, so it can only support how the composed memory space is maintained, and the offload backend must handle which memory resources are actually used to allocate memory from the memory space.
Here is summary of the implementation.
* Implemented 12 API routines to get/mainpulate memory space/allocator.
* Memory space composed with a list of devices has a state with resource description, and runtime is responsible for maintaining the allocated memory space objects.
* Defined interface with offload runtime to access memory resource list, and to redirect calls to omp_alloc/omp_free since it requires backend-specific information.
* Value of omp_default_mem_space changed from 0 to 99, and omp_null_mem_space took the value 0 as defined in the language.
* New allocator traits were introduced, but how to use them is up to the offload backend.
* Added basic tests for the new API routines.
>From 1e76fad8d423ff435328d3c7382c5db22b0e52c0 Mon Sep 17 00:00:00 2001
From: Hansang Bae <hansang.bae at intel.com>
Date: Fri, 28 Jun 2024 14:58:09 -0500
Subject: [PATCH] [OpenMP] 6.0 (TR11) Memory Management Update
TR11 introduced changes to support target memory management in a unified
way by defining a series of API routines and additional traits.
Host runtime is oblivious to how actual memory resources are mapped when
using the new API routines, so it can only support how the composed
memory space is maintained, and the offload backend must handle which
memory resources are actually used to allocate memory from the memory
space.
Here is summary of the implementation.
* Implemented 12 API routines to get/mainpulate memory space/allocator.
* Memory space composed with a list of devices has a state with resource
description, and runtime is responsible for maintaining the allocated
memory space objects.
* Defined interface with offload runtime to access memory resource list,
and to redirect calls to omp_alloc/omp_free since it requires
backend-specific information.
* Value of omp_default_mem_space changed from 0 to 99, and
omp_null_mem_space took the value 0 as defined in the language.
* New allocator traits were introduced, but how to use them is up to the
offload backend.
* Added basic tests for the new API routines.
---
openmp/runtime/src/dllexports | 13 +
openmp/runtime/src/include/omp.h.var | 34 +-
openmp/runtime/src/include/omp_lib.F90.var | 106 ++++++-
openmp/runtime/src/include/omp_lib.h.var | 120 ++++++-
openmp/runtime/src/kmp.h | 48 ++-
openmp/runtime/src/kmp_alloc.cpp | 293 +++++++++++++++++-
openmp/runtime/src/kmp_ftn_entry.h | 123 ++++++++
openmp/runtime/src/kmp_ftn_os.h | 48 +++
openmp/runtime/src/kmp_global.cpp | 5 +-
openmp/runtime/src/kmp_runtime.cpp | 5 +-
openmp/runtime/src/kmp_stub.cpp | 3 +-
.../runtime/test/api/omp60_memory_routines.c | 228 ++++++++++++++
12 files changed, 1006 insertions(+), 20 deletions(-)
create mode 100644 openmp/runtime/test/api/omp60_memory_routines.c
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 747b828093156..d904ba8bddda1 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -532,6 +532,18 @@ kmp_set_disp_num_buffers 890
omp_get_device_num 896
omp_init_allocator 897
omp_destroy_allocator 898
+ omp_get_devices_memspace 810
+ omp_get_device_memspace 811
+ omp_get_devices_and_host_memspace 812
+ omp_get_device_and_host_memspace 813
+ omp_get_devices_all_memspace 814
+ omp_get_devices_allocator 815
+ omp_get_device_allocator 816
+ omp_get_devices_and_host_allocator 817
+ omp_get_device_and_host_allocator 818
+ omp_get_devices_all_allocator 819
+ omp_get_memspace_num_resources 820
+ omp_get_submemspace 821
%ifndef stub
__kmpc_set_default_allocator
__kmpc_get_default_allocator
@@ -592,6 +604,7 @@ kmp_set_disp_num_buffers 890
llvm_omp_target_host_mem_space DATA
llvm_omp_target_shared_mem_space DATA
llvm_omp_target_device_mem_space DATA
+ omp_null_mem_space DATA
%ifndef stub
# Ordinals between 900 and 999 are reserved
diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var
index eb3ab7778606a..83726ac59a8e5 100644
--- a/openmp/runtime/src/include/omp.h.var
+++ b/openmp/runtime/src/include/omp.h.var
@@ -333,7 +333,13 @@
omp_atk_fallback = 5,
omp_atk_fb_data = 6,
omp_atk_pinned = 7,
- omp_atk_partition = 8
+ omp_atk_partition = 8,
+ omp_atk_pin_device = 9,
+ omp_atk_preferred_device = 10,
+ omp_atk_device_access = 11,
+ omp_atk_target_access = 12,
+ omp_atk_atomic_scope = 13,
+ omp_atk_part_size = 14
} omp_alloctrait_key_t;
typedef enum {
@@ -344,7 +350,7 @@
omp_atv_serialized = 5,
omp_atv_sequential = omp_atv_serialized, // (deprecated)
omp_atv_private = 6,
- omp_atv_all = 7,
+ omp_atv_device = 7,
omp_atv_thread = 8,
omp_atv_pteam = 9,
omp_atv_cgroup = 10,
@@ -355,7 +361,11 @@
omp_atv_environment = 15,
omp_atv_nearest = 16,
omp_atv_blocked = 17,
- omp_atv_interleaved = 18
+ omp_atv_interleaved = 18,
+ omp_atv_all = 19,
+ omp_atv_single = 20,
+ omp_atv_multiple = 21,
+ omp_atv_memspace = 22
} omp_alloctrait_value_t;
#define omp_atv_default ((omp_uintptr_t)-1)
@@ -381,6 +391,7 @@
extern __KMP_IMP omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
typedef omp_uintptr_t omp_memspace_handle_t;
+ extern __KMP_IMP omp_memspace_handle_t const omp_null_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_default_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_large_cap_mem_space;
extern __KMP_IMP omp_memspace_handle_t const omp_const_mem_space;
@@ -416,7 +427,8 @@
typedef enum omp_memspace_handle_t
# endif
{
- omp_default_mem_space = 0,
+ omp_null_mem_space = 0,
+ omp_default_mem_space = 99,
omp_large_cap_mem_space = 1,
omp_const_mem_space = 2,
omp_high_bw_mem_space = 3,
@@ -457,6 +469,20 @@
extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, omp_allocator_handle_t a);
# endif
+ /* OpenMP TR11 routines to get memory spaces and allocators */
+ extern omp_memspace_handle_t omp_get_devices_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace);
+ extern omp_memspace_handle_t omp_get_device_memspace(int dev, omp_memspace_handle_t memspace);
+ extern omp_memspace_handle_t omp_get_devices_and_host_memspace(int ndevs, const int *devs, omp_memspace_handle_t memspace);
+ extern omp_memspace_handle_t omp_get_device_and_host_memspace(int dev, omp_memspace_handle_t memspace);
+ extern omp_memspace_handle_t omp_get_devices_all_memspace(omp_memspace_handle_t memspace);
+ extern omp_allocator_handle_t omp_get_devices_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace);
+ extern omp_allocator_handle_t omp_get_device_allocator(int dev, omp_memspace_handle_t memspace);
+ extern omp_allocator_handle_t omp_get_devices_and_host_allocator(int ndevs, const int *devs, omp_memspace_handle_t memspace);
+ extern omp_allocator_handle_t omp_get_device_and_host_allocator(int dev, omp_memspace_handle_t memspace);
+ extern omp_allocator_handle_t omp_get_devices_all_allocator(omp_memspace_handle_t memspace);
+ extern int omp_get_memspace_num_resources(omp_memspace_handle_t memspace);
+ extern omp_memspace_handle_t omp_get_submemspace(omp_memspace_handle_t memspace, int num_resources, int *resources);
+
/* OpenMP 5.0 Affinity Format */
extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);
extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);
diff --git a/openmp/runtime/src/include/omp_lib.F90.var b/openmp/runtime/src/include/omp_lib.F90.var
index 63a3c93b8d929..7182e71962479 100644
--- a/openmp/runtime/src/include/omp_lib.F90.var
+++ b/openmp/runtime/src/include/omp_lib.F90.var
@@ -145,6 +145,12 @@
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_fb_data = 6
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_pinned = 7
integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_partition = 8
+ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_pin_device = 9
+ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_preferred_device = 10
+ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_device_access = 11
+ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_target_access = 12
+ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_atomic_scope = 13
+ integer (kind=omp_alloctrait_key_kind), parameter, public :: omp_atk_part_size = 14
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_default = -1
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_false = 0
@@ -154,7 +160,7 @@
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_serialized = 5
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_sequential = omp_atv_serialized
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_private = 6
- integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_all = 7
+ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_device = 7
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_thread = 8
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_pteam = 9
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_cgroup = 10
@@ -166,6 +172,10 @@
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_nearest = 16
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_blocked = 17
integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_interleaved = 18
+ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_all = 19
+ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_single = 20
+ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_multiple = 21
+ integer (kind=omp_alloctrait_val_kind), parameter, public :: omp_atv_memspace = 22
integer (kind=omp_allocator_handle_kind), parameter, public :: omp_null_allocator = 0
integer (kind=omp_allocator_handle_kind), parameter, public :: omp_default_mem_alloc = 1
@@ -180,7 +190,8 @@
integer (kind=omp_allocator_handle_kind), parameter, public :: llvm_omp_target_shared_mem_alloc = 101
integer (kind=omp_allocator_handle_kind), parameter, public :: llvm_omp_target_device_mem_alloc = 102
- integer (kind=omp_memspace_handle_kind), parameter, public :: omp_default_mem_space = 0
+ integer (kind=omp_memspace_handle_kind), parameter, public :: omp_null_mem_space = 0
+ integer (kind=omp_memspace_handle_kind), parameter, public :: omp_default_mem_space = 99
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_large_cap_mem_space = 1
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_const_mem_space = 2
integer (kind=omp_memspace_handle_kind), parameter, public :: omp_high_bw_mem_space = 3
@@ -801,6 +812,97 @@
logical (kind=omp_logical_kind) omp_in_explicit_task
end function omp_in_explicit_task
+ function omp_get_devices_memspace(ndevs, devs, memspace)
+ use omp_lib_kinds
+ integer(omp_memspace_handle_kind) :: omp_get_devices_memspace
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_memspace
+
+ function omp_get_device_memspace(dev, memspace)
+ use omp_lib_kinds
+ integer(omp_memspace_handle_kind) :: omp_get_device_memspace
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_memspace
+
+ function omp_get_devices_and_host_memspace(ndevs, devs, memspace)
+ use omp_lib_kinds
+ integer(omp_memspace_handle_kind) :: &
+ omp_get_devices_and_host_memspace
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_and_host_memspace
+
+ function omp_get_device_and_host_memspace(dev, memspace)
+ use omp_lib_kinds
+ integer(omp_memspace_handle_kind) :: &
+ omp_get_device_and_host_memspace
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_and_host_memspace
+
+ function omp_get_devices_all_memspace(memspace)
+ use omp_lib_kinds
+ integer(omp_memspace_handle_kind) :: omp_get_devices_all_memspace
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_all_memspace
+
+ function omp_get_devices_allocator(ndevs, devs, memspace)
+ use omp_lib_kinds
+ integer(omp_allocator_handle_kind) :: omp_get_devices_allocator
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_allocator
+
+ function omp_get_device_allocator(dev, memspace)
+ use omp_lib_kinds
+ integer(omp_allocator_handle_kind) :: omp_get_device_allocator
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_allocator
+
+ function omp_get_devices_and_host_allocator(ndevs, devs, memspace)
+ use omp_lib_kinds
+ integer(omp_allocator_handle_kind) :: &
+ omp_get_devices_and_host_allocator
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_and_host_allocator
+
+ function omp_get_device_and_host_allocator(dev, memspace)
+ use omp_lib_kinds
+ integer(omp_allocator_handle_kind) :: &
+ omp_get_device_and_host_allocator
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_and_host_allocator
+
+ function omp_get_devices_all_allocator(memspace)
+ use omp_lib_kinds
+ integer(omp_allocator_handle_kind) :: &
+ omp_get_devices_all_allocator
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_all_allocator
+
+ function omp_get_memspace_num_resources(memspace)
+ use omp_lib_kinds
+ integer omp_get_memspace_num_resources
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_memspace_num_resources
+
+ function omp_get_submemspace(memspace, num_resources, resources)
+ use omp_lib_kinds
+ integer(omp_memspace_handle_kind) omp_get_submemspace
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ integer, intent(in) :: num_resources
+ integer, intent(in) :: resources(*)
+ end function omp_get_submemspace
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/include/omp_lib.h.var b/openmp/runtime/src/include/omp_lib.h.var
index a709a2f298f8c..dc4742ac9e63a 100644
--- a/openmp/runtime/src/include/omp_lib.h.var
+++ b/openmp/runtime/src/include/omp_lib.h.var
@@ -151,6 +151,18 @@
parameter(omp_atk_pinned=7)
integer(kind=omp_alloctrait_key_kind)omp_atk_partition
parameter(omp_atk_partition=8)
+ integer(kind=omp_alloctrait_key_kind)omp_atk_pin_device
+ parameter(omp_atk_pin_device=9)
+ integer(kind=omp_alloctrait_key_kind)omp_atk_preferred_device
+ parameter(omp_atk_preferred_device=10)
+ integer(kind=omp_alloctrait_key_kind)omp_atk_device_access
+ parameter(omp_atk_device_access=11)
+ integer(kind=omp_alloctrait_key_kind)omp_atk_target_access
+ parameter(omp_atk_target_access=12)
+ integer(kind=omp_alloctrait_key_kind)omp_atk_atomic_scope
+ parameter(omp_atk_atomic_scope=13)
+ integer(kind=omp_alloctrait_key_kind)omp_atk_part_size
+ parameter(omp_atk_part_size=14)
integer(kind=omp_alloctrait_val_kind)omp_atv_default
parameter(omp_atv_default=-1)
@@ -170,8 +182,8 @@
parameter(omp_atv_sequential=5)
integer(kind=omp_alloctrait_val_kind)omp_atv_private
parameter(omp_atv_private=6)
- integer(kind=omp_alloctrait_val_kind)omp_atv_all
- parameter(omp_atv_all=7)
+ integer(kind=omp_alloctrait_val_kind)omp_atv_device
+ parameter(omp_atv_device=7)
integer(kind=omp_alloctrait_val_kind)omp_atv_thread
parameter(omp_atv_thread=8)
integer(kind=omp_alloctrait_val_kind)omp_atv_pteam
@@ -194,6 +206,14 @@
parameter(omp_atv_blocked=17)
integer(kind=omp_alloctrait_val_kind)omp_atv_interleaved
parameter(omp_atv_interleaved=18)
+ integer(kind=omp_alloctrait_val_kind)omp_atv_all
+ parameter(omp_atv_all=19)
+ integer(kind=omp_alloctrait_val_kind)omp_atv_single
+ parameter(omp_atv_single=20)
+ integer(kind=omp_alloctrait_val_kind)omp_atv_multiple
+ parameter(omp_atv_multiple=21)
+ integer(kind=omp_alloctrait_val_kind)omp_atv_memspace
+ parameter(omp_atv_memspace=22)
type omp_alloctrait
integer (kind=omp_alloctrait_key_kind) key
@@ -225,8 +245,10 @@
integer(omp_allocator_handle_kind)llvm_omp_target_device_mem_alloc
parameter(llvm_omp_target_device_mem_alloc=102)
+ integer(kind=omp_memspace_handle_kind)omp_null_mem_space
+ parameter(omp_null_mem_space=0)
integer(kind=omp_memspace_handle_kind)omp_default_mem_space
- parameter(omp_default_mem_space=0)
+ parameter(omp_default_mem_space=99)
integer(kind=omp_memspace_handle_kind)omp_large_cap_mem_space
parameter(omp_large_cap_mem_space=1)
integer(kind=omp_memspace_handle_kind)omp_const_mem_space
@@ -861,6 +883,98 @@
logical (kind=omp_logical_kind) omp_in_explicit_task
end function omp_in_explicit_task
+ function omp_get_devices_memspace(ndevs, devs, memspace)
+ import
+ integer(omp_memspace_handle_kind) :: omp_get_devices_memspace
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_memspace
+
+ function omp_get_device_memspace(dev, memspace)
+ import
+ integer(omp_memspace_handle_kind) :: omp_get_device_memspace
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_memspace
+
+ function omp_get_devices_and_host_memspace(ndevs,devs,memspace)
+ import
+ integer(omp_memspace_handle_kind) :: &
+ & omp_get_devices_and_host_memspace
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_and_host_memspace
+
+ function omp_get_device_and_host_memspace(dev, memspace)
+ import
+ integer(omp_memspace_handle_kind) :: &
+ & omp_get_device_and_host_memspace
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_and_host_memspace
+
+ function omp_get_devices_all_memspace(memspace)
+ import
+ integer(omp_memspace_handle_kind)::omp_get_devices_all_memspace
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_all_memspace
+
+ function omp_get_devices_allocator(ndevs, devs, memspace)
+ import
+ integer(omp_allocator_handle_kind)::omp_get_devices_allocator
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_allocator
+
+ function omp_get_device_allocator(dev, memspace)
+ import
+ integer(omp_allocator_handle_kind) :: omp_get_device_allocator
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_allocator
+
+ function omp_get_devices_and_host_allocator(ndevs,devs,memspace)
+ import
+ integer(omp_allocator_handle_kind) :: &
+ & omp_get_devices_and_host_allocator
+ integer, intent(in) :: ndevs
+ integer, intent(in) :: devs(*)
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_and_host_allocator
+
+ function omp_get_device_and_host_allocator(dev, memspace)
+ import
+ integer(omp_allocator_handle_kind) :: &
+ & omp_get_device_and_host_allocator
+ integer, intent(in) :: dev
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_device_and_host_allocator
+
+ function omp_get_devices_all_allocator(memspace)
+ import
+ integer(omp_allocator_handle_kind) :: &
+ & omp_get_devices_all_allocator
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_devices_all_allocator
+
+ function omp_get_memspace_num_resources(memspace)
+ import
+ integer omp_get_memspace_num_resources
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ end function omp_get_memspace_num_resources
+
+ function omp_get_submemspace(memspace, num_resources, resources)
+ import
+ integer(omp_memspace_handle_kind) omp_get_submemspace
+ integer(omp_memspace_handle_kind), intent(in) :: memspace
+ integer, intent(in) :: num_resources
+ integer, intent(in) :: resources(*)
+ end function omp_get_submemspace
+
+
! ***
! *** kmp_* entry points
! ***
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index c8d821b12ff0c..2564926169e65 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1053,7 +1053,13 @@ typedef enum {
omp_atk_fallback = 5,
omp_atk_fb_data = 6,
omp_atk_pinned = 7,
- omp_atk_partition = 8
+ omp_atk_partition = 8,
+ omp_atk_pin_device = 9,
+ omp_atk_preferred_device = 10,
+ omp_atk_device_access = 11,
+ omp_atk_target_access = 12,
+ omp_atk_atomic_scope = 13,
+ omp_atk_part_size = 14
} omp_alloctrait_key_t;
typedef enum {
@@ -1064,7 +1070,7 @@ typedef enum {
omp_atv_serialized = 5,
omp_atv_sequential = omp_atv_serialized, // (deprecated)
omp_atv_private = 6,
- omp_atv_all = 7,
+ omp_atv_device = 7,
omp_atv_thread = 8,
omp_atv_pteam = 9,
omp_atv_cgroup = 10,
@@ -1075,11 +1081,16 @@ typedef enum {
omp_atv_environment = 15,
omp_atv_nearest = 16,
omp_atv_blocked = 17,
- omp_atv_interleaved = 18
+ omp_atv_interleaved = 18,
+ omp_atv_all = 19,
+ omp_atv_single = 20,
+ omp_atv_multiple = 21,
+ omp_atv_memspace = 22
} omp_alloctrait_value_t;
#define omp_atv_default ((omp_uintptr_t)-1)
typedef void *omp_memspace_handle_t;
+extern omp_memspace_handle_t const omp_null_mem_space;
extern omp_memspace_handle_t const omp_default_mem_space;
extern omp_memspace_handle_t const omp_large_cap_mem_space;
extern omp_memspace_handle_t const omp_const_mem_space;
@@ -1088,6 +1099,7 @@ extern omp_memspace_handle_t const omp_low_lat_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
+extern omp_memspace_handle_t const kmp_max_mem_space;
typedef struct {
omp_alloctrait_key_t key;
@@ -1115,8 +1127,15 @@ extern omp_allocator_handle_t __kmp_def_allocator;
extern int __kmp_memkind_available;
-typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
+/// Memory space informaition is shared with offload runtime.
+typedef struct kmp_memspace_t {
+ omp_memspace_handle_t memspace; // predefined input memory space
+ int num_resources = 0; // number of available resources
+ int *resources = nullptr; // available resources
+ kmp_memspace_t *next = nullptr; // next memory space handle
+} kmp_memspace_t;
+/// Memory allocator information is shared with offload runtime.
typedef struct kmp_allocator_t {
omp_memspace_handle_t memspace;
void **memkind; // pointer to memkind
@@ -1126,6 +1145,12 @@ typedef struct kmp_allocator_t {
kmp_uint64 pool_size;
kmp_uint64 pool_used;
bool pinned;
+ omp_alloctrait_value_t partition;
+ int pin_device;
+ int preferred_device;
+ omp_alloctrait_value_t target_access;
+ omp_alloctrait_value_t atomic_scope;
+ size_t part_size;
} kmp_allocator_t;
extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
@@ -1158,6 +1183,21 @@ extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
extern void __kmp_init_memkind();
extern void __kmp_fini_memkind();
extern void __kmp_init_target_mem();
+extern void __kmp_fini_target_mem();
+
+// OpenMP 6.0 (TR11) Memory Management support
+extern omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs,
+ const int *devs,
+ omp_memspace_handle_t,
+ int host);
+extern omp_allocator_handle_t __kmp_get_devices_allocator(int ndevs,
+ const int *devs,
+ omp_memspace_handle_t,
+ int host);
+extern int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace);
+extern omp_memspace_handle_t
+__kmp_get_submemspace(omp_memspace_handle_t memspace, int num_resources,
+ int *resources);
/* ------------------------------------------------------------------------ */
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index fb1b0eb5f0fe5..11893a9d6cd4a 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -1251,15 +1251,190 @@ static void *(*kmp_target_free_host)(void *ptr, int device);
static void *(*kmp_target_free_shared)(void *ptr, int device);
static void *(*kmp_target_free_device)(void *ptr, int device);
static bool __kmp_target_mem_available;
+
#define KMP_IS_TARGET_MEM_SPACE(MS) \
(MS == llvm_omp_target_host_mem_space || \
MS == llvm_omp_target_shared_mem_space || \
MS == llvm_omp_target_device_mem_space)
+
#define KMP_IS_TARGET_MEM_ALLOC(MA) \
(MA == llvm_omp_target_host_mem_alloc || \
MA == llvm_omp_target_shared_mem_alloc || \
MA == llvm_omp_target_device_mem_alloc)
+#define KMP_IS_PREDEF_MEM_SPACE(MS) \
+ (MS == omp_null_mem_space || MS == omp_default_mem_space || \
+ MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
+ MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
+ KMP_IS_TARGET_MEM_SPACE(MS))
+
+/// Support OMP 6.0 target memory management
+/// Expected offload runtime entries.
+///
+/// Returns number of resources and list of unique resource IDs in "resouces".
+/// Runtime needs to invoke this twice to get the number of resources, allocate
+/// space for the resource IDs, and finally let offload runtime write resource
+/// IDs in "resources".
+/// int __tgt_get_mem_resources(int num_devices, const int *devices,
+/// int host_access, omp_memspace_handle_t memspace,
+/// int *resources);
+///
+/// Redirects omp_alloc call to offload runtime.
+/// void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator);
+///
+/// Redirects omp_free call to offload runtime.
+/// void __tgt_omp_free(void *ptr, omp_allocator_handle_t);
+class kmp_tgt_allocator_t {
+ bool supported = false;
+ using get_mem_resources_t = int (*)(int, const int *, int,
+ omp_memspace_handle_t, int *);
+ using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t);
+ using omp_free_t = void (*)(void *, omp_allocator_handle_t);
+ get_mem_resources_t tgt_get_mem_resources = nullptr;
+ omp_alloc_t tgt_omp_alloc = nullptr;
+ omp_free_t tgt_omp_free = nullptr;
+
+public:
+ /// Initialize interface with offload runtime
+ void init() {
+ tgt_get_mem_resources =
+ (get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources");
+ tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc");
+ tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free");
+ supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
+ }
+ /// Obtain resource information from offload runtime. We assume offload
+ /// runtime backends maintain a list of unique resource IDS.
+ int get_mem_resources(int ndevs, const int *devs, int host,
+ omp_memspace_handle_t memspace, int *resources) {
+ if (supported)
+ return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
+ return 0;
+ }
+ /// Invoke offload runtime's memory allocation routine
+ void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
+ if (supported)
+ return tgt_omp_alloc(size, allocator);
+ return nullptr;
+ }
+ /// Invoke offload runtime's memory deallocation routine
+ void omp_free(void *ptr, omp_allocator_handle_t allocator) {
+ if (supported)
+ tgt_omp_free(ptr, allocator);
+ }
+} __kmp_tgt_allocator;
+
+extern "C" int omp_get_num_devices(void);
+
+/// Maintain a list of target memory spaces that are identified with the
+/// requested information. There will be only one unique memory space object
+/// that matches the input.
+class kmp_tgt_memspace_list_t {
+ kmp_memspace_t *memspace_list = nullptr;
+ KMP_LOCK_INIT(mtx);
+ /// Find memory space that matches the provided input
+ kmp_memspace_t *find(int num_resources, const int *resources,
+ omp_memspace_handle_t memspace) {
+ kmp_memspace_t *ms = memspace_list;
+ while (ms) {
+ if (ms->num_resources == num_resources && ms->memspace == memspace &&
+ !memcmp(ms->resources, resources, sizeof(int) * num_resources))
+ break;
+ ms = ms->next;
+ }
+ return ms;
+ }
+ /// Return memory space for the provided input. It tries to find existing
+ /// memory space that exactly matches the provided input or create one if
+ /// not found.
+ omp_memspace_handle_t get(int num_resources, const int *resources,
+ omp_memspace_handle_t memspace) {
+ int gtid = __kmp_entry_gtid();
+ __kmp_acquire_lock(&mtx, gtid);
+ // Sort absolute IDs in the resource list
+ int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
+ KMP_MEMCPY(sorted_resources, resources, num_resources * sizeof(int));
+ qsort(sorted_resources, (size_t)num_resources, sizeof(int),
+ [](const void *a, const void *b) {
+ const int val_a = *(const int *)a;
+ const int val_b = *(const int *)b;
+ return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
+ });
+ kmp_memspace_t *ms = find(num_resources, sorted_resources, memspace);
+ if (ms) {
+ __kmp_free(sorted_resources);
+ __kmp_release_lock(&mtx, gtid);
+ return ms;
+ }
+ ms = (kmp_memspace_t *)__kmp_allocate(sizeof(kmp_memspace_t));
+ ms->memspace = memspace;
+ ms->num_resources = num_resources;
+ ms->resources = sorted_resources;
+ ms->next = memspace_list;
+ memspace_list = ms;
+ __kmp_release_lock(&mtx, gtid);
+ return ms;
+ }
+
+public:
+ /// Initialize memory space list
+ void init() { __kmp_init_lock(&mtx); }
+ /// Release resources for the memory space list
+ void fini() {
+ kmp_memspace_t *ms = memspace_list;
+ while (ms) {
+ if (ms->resources)
+ __kmp_free(ms->resources);
+ kmp_memspace_t *tmp = ms;
+ ms = ms->next;
+ __kmp_free(tmp);
+ }
+ __kmp_destroy_lock(&mtx);
+ }
+ /// Return memory space for the provided input
+ omp_memspace_handle_t get_memspace(int num_devices, const int *devices,
+ int host_access,
+ omp_memspace_handle_t memspace) {
+ int actual_num_devices = num_devices;
+ int *actual_devices = const_cast<int *>(devices);
+ if (actual_num_devices == 0) {
+ actual_num_devices = omp_get_num_devices();
+ if (actual_num_devices <= 0)
+ return omp_null_mem_space;
+ }
+ if (actual_devices == NULL) {
+ // Prepare list of all devices in this case.
+ actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices);
+ for (int i = 0; i < actual_num_devices; i++)
+ actual_devices[i] = i;
+ }
+ // Get the number of available resources first
+ int num_resources = __kmp_tgt_allocator.get_mem_resources(
+ actual_num_devices, actual_devices, host_access, memspace, NULL);
+ if (num_resources <= 0)
+ return omp_null_mem_space; // No available resources
+
+ omp_memspace_handle_t ms = omp_null_mem_space;
+ if (num_resources > 0) {
+ int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
+ // Let offload runtime write the resource IDs
+ num_resources = __kmp_tgt_allocator.get_mem_resources(
+ actual_num_devices, actual_devices, host_access, memspace, resources);
+ ms = get(num_resources, resources, memspace);
+ __kmp_free(resources);
+ }
+ if (!devices && actual_devices)
+ __kmp_free(actual_devices);
+ return ms;
+ }
+ /// Return sub memory space from the parent memory space
+ omp_memspace_handle_t get_memspace(int num_resources, const int *resources,
+ omp_memspace_handle_t parent) {
+ kmp_memspace_t *ms = (kmp_memspace_t *)parent;
+ return get(num_resources, resources, ms->memspace);
+ }
+} __kmp_tgt_memspace_list;
+
#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
static inline void chk_kind(void ***pkind) {
KMP_DEBUG_ASSERT(pkind);
@@ -1374,19 +1549,30 @@ void __kmp_init_target_mem() {
// lock/pin and unlock/unpin target calls
*(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
*(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
+ __kmp_tgt_allocator.init();
+ __kmp_tgt_memspace_list.init();
}
+/// Finalize target memory support
+void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.fini(); }
+
omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
int ntraits,
omp_alloctrait_t traits[]) {
- // OpenMP 5.0 only allows predefined memspaces
- KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
- ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
- ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
kmp_allocator_t *al;
int i;
al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
al->memspace = ms; // not used currently
+
+ // Assign default values if applicable
+ al->alignment = 1;
+ al->pinned = false;
+ al->partition = omp_atv_environment;
+ al->pin_device = -1;
+ al->preferred_device = -1;
+ al->target_access = omp_atv_single;
+ al->atomic_scope = omp_atv_device;
+
for (i = 0; i < ntraits; ++i) {
switch (traits[i].key) {
case omp_atk_sync_hint:
@@ -1414,10 +1600,33 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
case omp_atk_partition:
al->memkind = RCAST(void **, traits[i].value);
break;
+ case omp_atk_pin_device:
+ __kmp_type_convert(traits[i].value, &(al->pin_device));
+ break;
+ case omp_atk_preferred_device:
+ __kmp_type_convert(traits[i].value, &(al->preferred_device));
+ break;
+ case omp_atk_target_access:
+ al->target_access = (omp_alloctrait_value_t)traits[i].value;
+ break;
+ case omp_atk_atomic_scope:
+ al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
+ break;
+ case omp_atk_part_size:
+ __kmp_type_convert(traits[i].value, &(al->part_size));
+ break;
default:
KMP_ASSERT2(0, "Unexpected allocator trait");
}
}
+
+ if (al->memspace > kmp_max_mem_space) {
+ // Memory space has been allocated for targets.
+ return (omp_allocator_handle_t)al;
+ }
+
+ KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
+
if (al->fb == 0) {
// set default allocator
al->fb = omp_atv_default_mem_fb;
@@ -1490,6 +1699,71 @@ omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
return __kmp_threads[gtid]->th.th_def_allocator;
}
+omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs,
+ omp_memspace_handle_t memspace,
+ int host) {
+ if (!__kmp_init_serial)
+ __kmp_serial_initialize();
+ // Only accept valid device description and predefined memory space
+ if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
+ return omp_null_mem_space;
+
+ return __kmp_tgt_memspace_list.get_memspace(ndevs, devs, host, memspace);
+}
+
+omp_allocator_handle_t
+__kmp_get_devices_allocator(int ndevs, const int *devs,
+ omp_memspace_handle_t memspace, int host) {
+ if (!__kmp_init_serial)
+ __kmp_serial_initialize();
+ // Only accept valid device description and predefined memory space
+ if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
+ return omp_null_allocator;
+
+ omp_memspace_handle_t mspace =
+ __kmp_get_devices_memspace(ndevs, devs, memspace, host);
+ if (mspace == omp_null_mem_space)
+ return omp_null_allocator;
+
+ return __kmpc_init_allocator(__kmp_entry_gtid(), mspace, 0, NULL);
+}
+
+int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
+ if (!__kmp_init_serial)
+ __kmp_serial_initialize();
+ if (memspace == omp_null_mem_space)
+ return 0;
+ if (memspace < kmp_max_mem_space)
+ return 1; // return 1 for predefined memory space
+ kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
+ return ms->num_resources;
+}
+
+omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
+ int num_resources, int *resources) {
+ if (!__kmp_init_serial)
+ __kmp_serial_initialize();
+ if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
+ return memspace; // return input memory space for predefined memory space
+ kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
+ if (num_resources == 0 || ms->num_resources < num_resources || !resources)
+ return omp_null_mem_space; // input memory space cannot satisfy the request
+
+ // The stored resource ID is an absolute ID only known to the offload backend,
+ // and the returned memory space will still keep the property.
+ int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources);
+
+ // Collect absolute resource ID from the relative ID
+ for (int i = 0; i < num_resources; i++)
+ resources_abs[i] = ms->resources[resources[i]];
+
+ omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace(
+ num_resources, resources_abs, memspace);
+ __kmp_free(resources_abs);
+
+ return submemspace;
+}
+
typedef struct kmp_mem_desc { // Memory block descriptor
void *ptr_alloc; // Pointer returned by allocator
size_t size_a; // Size of allocated memory block (initial+descriptor+align)
@@ -1576,6 +1850,11 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
// Use default allocator if libmemkind is not available
int use_default_allocator = (__kmp_memkind_available) ? false : true;
+ if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
+ // Memspace has been allocated for targets.
+ return __kmp_tgt_allocator.omp_alloc(size, allocator);
+ }
+
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
// Use size input directly as the memory may not be accessible on host.
// Use default device for now.
@@ -1831,6 +2110,12 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
kmp_mem_desc_t desc;
kmp_uintptr_t addr_align; // address to return to caller
kmp_uintptr_t addr_descr; // address of memory block descriptor
+
+ if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
+ __kmp_tgt_allocator.omp_free(ptr, allocator);
+ return;
+ }
+
if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
(allocator > kmp_max_mem_alloc &&
KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 713561734c481..8d84d4ab643f6 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -428,6 +428,129 @@ omp_allocator_handle_t FTN_STDCALL FTN_GET_DEFAULT_ALLOCATOR(void) {
#endif
}
+/* OpenMP 6.0 (TR11) Memory Management support */
+omp_memspace_handle_t FTN_STDCALL
+FTN_GET_DEVICES_MEMSPACE(int KMP_DEREF ndevs, const int *devs,
+ omp_memspace_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_devices_memspace(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
+ 0 /* host */);
+#endif
+}
+
+omp_memspace_handle_t FTN_STDCALL FTN_GET_DEVICE_MEMSPACE(
+ int KMP_DEREF dev, omp_memspace_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ int dev_num = KMP_DEREF dev;
+ return __kmp_get_devices_memspace(1, &dev_num, KMP_DEREF memspace, 0);
+#endif
+}
+
+omp_memspace_handle_t FTN_STDCALL
+FTN_GET_DEVICES_AND_HOST_MEMSPACE(int KMP_DEREF ndevs, const int *devs,
+ omp_memspace_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_devices_memspace(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
+ 1);
+#endif
+}
+
+omp_memspace_handle_t FTN_STDCALL FTN_GET_DEVICE_AND_HOST_MEMSPACE(
+ int KMP_DEREF dev, omp_memspace_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ int dev_num = KMP_DEREF dev;
+ return __kmp_get_devices_memspace(1, &dev_num, KMP_DEREF memspace, 1);
+#endif
+}
+
+omp_memspace_handle_t FTN_STDCALL
+FTN_GET_DEVICES_ALL_MEMSPACE(omp_memspace_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_devices_memspace(0, NULL, KMP_DEREF memspace, 1);
+#endif
+}
+
+omp_allocator_handle_t FTN_STDCALL
+FTN_GET_DEVICES_ALLOCATOR(int KMP_DEREF ndevs, const int *devs,
+ omp_allocator_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_devices_allocator(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
+ 0 /* host */);
+#endif
+}
+
+omp_allocator_handle_t FTN_STDCALL FTN_GET_DEVICE_ALLOCATOR(
+ int KMP_DEREF dev, omp_allocator_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ int dev_num = KMP_DEREF dev;
+ return __kmp_get_devices_allocator(1, &dev_num, KMP_DEREF memspace, 0);
+#endif
+}
+
+omp_allocator_handle_t FTN_STDCALL
+FTN_GET_DEVICES_AND_HOST_ALLOCATOR(int KMP_DEREF ndevs, const int *devs,
+ omp_allocator_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_devices_allocator(KMP_DEREF ndevs, devs, KMP_DEREF memspace,
+ 1);
+#endif
+}
+
+omp_allocator_handle_t FTN_STDCALL FTN_GET_DEVICE_AND_HOST_ALLOCATOR(
+ int KMP_DEREF dev, omp_allocator_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ int dev_num = KMP_DEREF dev;
+ return __kmp_get_devices_allocator(1, &dev_num, KMP_DEREF memspace, 1);
+#endif
+}
+
+omp_allocator_handle_t FTN_STDCALL
+FTN_GET_DEVICES_ALL_ALLOCATOR(omp_allocator_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_devices_allocator(0, NULL, KMP_DEREF memspace, 1);
+#endif
+}
+
+int FTN_STDCALL
+FTN_GET_MEMSPACE_NUM_RESOURCES(omp_memspace_handle_t KMP_DEREF memspace) {
+#ifdef KMP_STUB
+ return 0;
+#else
+ return __kmp_get_memspace_num_resources(KMP_DEREF memspace);
+#endif
+}
+
+omp_memspace_handle_t FTN_STDCALL
+FTN_GET_SUBMEMSPACE(omp_memspace_handle_t KMP_DEREF memspace,
+ int KMP_DEREF num_resources, int *resources) {
+#ifdef KMP_STUB
+ return NULL;
+#else
+ return __kmp_get_submemspace(KMP_DEREF memspace, KMP_DEREF num_resources,
+ resources);
+#endif
+}
+
/* OpenMP 5.0 affinity format support */
#ifndef KMP_STUB
static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size,
diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index 7d595b947f4a9..ae0ed067235e5 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -127,6 +127,18 @@
#define FTN_DESTROY_ALLOCATOR omp_destroy_allocator
#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator
#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator
+#define FTN_GET_DEVICES_MEMSPACE omp_get_devices_memspace
+#define FTN_GET_DEVICE_MEMSPACE omp_get_device_memspace
+#define FTN_GET_DEVICES_AND_HOST_MEMSPACE omp_get_devices_and_host_memspace
+#define FTN_GET_DEVICE_AND_HOST_MEMSPACE omp_get_device_and_host_memspace
+#define FTN_GET_DEVICES_ALL_MEMSPACE omp_get_devices_all_memspace
+#define FTN_GET_DEVICES_ALLOCATOR omp_get_devices_allocator
+#define FTN_GET_DEVICE_ALLOCATOR omp_get_device_allocator
+#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR omp_get_devices_and_host_allocator
+#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR omp_get_device_and_host_allocator
+#define FTN_GET_DEVICES_ALL_ALLOCATOR omp_get_devices_all_allocator
+#define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources
+#define FTN_GET_SUBMEMSPACE omp_get_submemspace
#define FTN_GET_DEVICE_NUM omp_get_device_num
#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format
#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format
@@ -262,6 +274,18 @@
#define FTN_DESTROY_ALLOCATOR omp_destroy_allocator_
#define FTN_SET_DEFAULT_ALLOCATOR omp_set_default_allocator_
#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_
+#define FTN_GET_DEVICES_MEMSPACE omp_get_devices_memspace_
+#define FTN_GET_DEVICE_MEMSPACE omp_get_device_memspace_
+#define FTN_GET_DEVICES_AND_HOST_MEMSPACE omp_get_devices_and_host_memspace_
+#define FTN_GET_DEVICE_AND_HOST_MEMSPACE omp_get_device_and_host_memspace_
+#define FTN_GET_DEVICES_ALL_MEMSPACE omp_get_devices_all_memspace_
+#define FTN_GET_DEVICES_ALLOCATOR omp_get_devices_allocator_
+#define FTN_GET_DEVICE_ALLOCATOR omp_get_device_allocator_
+#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR omp_get_devices_and_host_allocator_
+#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR omp_get_device_and_host_allocator_
+#define FTN_GET_DEVICES_ALL_ALLOCATOR omp_get_devices_all_allocator_
+#define FTN_GET_MEMSPACE_NUM_RESOURCES omp_get_memspace_num_resources_
+#define FTN_GET_SUBMEMSPACE omp_get_submemspace_
#define FTN_ALLOC omp_alloc_
#define FTN_FREE omp_free_
#define FTN_GET_DEVICE_NUM omp_get_device_num_
@@ -399,6 +423,18 @@
#define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR
#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR
#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR
+#define FTN_GET_DEVICES_MEMSPACE OMP_GET_DEVICES_MEMSPACE
+#define FTN_GET_DEVICE_MEMSPACE OMP_GET_DEVICE_MEMSPACE
+#define FTN_GET_DEVICES_AND_HOST_MEMSPACE OMP_GET_DEVICES_AND_HOST_MEMSPACE
+#define FTN_GET_DEVICE_AND_HOST_MEMSPACE OMP_GET_DEVICE_AND_HOST_MEMSPACE
+#define FTN_GET_DEVICES_ALL_MEMSPACE OMP_GET_DEVICES_ALL_MEMSPACE
+#define FTN_GET_DEVICES_ALLOCATOR OMP_GET_DEVICES_ALLOCATOR
+#define FTN_GET_DEVICE_ALLOCATOR OMP_GET_DEVICE_ALLOCATOR
+#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR OMP_GET_DEVICES_AND_HOST_ALLOCATOR
+#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR OMP_GET_DEVICE_AND_HOST_ALLOCATOR
+#define FTN_GET_DEVICES_ALL_ALLOCATOR OMP_GET_DEVICES_ALL_ALLOCATOR
+#define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES
+#define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE
#define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM
#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT
#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT
@@ -534,6 +570,18 @@
#define FTN_DESTROY_ALLOCATOR OMP_DESTROY_ALLOCATOR_
#define FTN_SET_DEFAULT_ALLOCATOR OMP_SET_DEFAULT_ALLOCATOR_
#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_
+#define FTN_GET_DEVICES_MEMSPACE OMP_GET_DEVICES_MEMSPACE_
+#define FTN_GET_DEVICE_MEMSPACE OMP_GET_DEVICE_MEMSPACE_
+#define FTN_GET_DEVICES_AND_HOST_MEMSPACE OMP_GET_DEVICES_AND_HOST_MEMSPACE_
+#define FTN_GET_DEVICE_AND_HOST_MEMSPACE OMP_GET_DEVICE_AND_HOST_MEMSPACE_
+#define FTN_GET_DEVICES_ALL_MEMSPACE OMP_GET_DEVICES_ALL_MEMSPACE_
+#define FTN_GET_DEVICES_ALLOCATOR OMP_GET_DEVICES_ALLOCATOR_
+#define FTN_GET_DEVICE_ALLOCATOR OMP_GET_DEVICE_ALLOCATOR_
+#define FTN_GET_DEVICES_AND_HOST_ALLOCATOR OMP_GET_DEVICES_AND_HOST_ALLOCATOR_
+#define FTN_GET_DEVICE_AND_HOST_ALLOCATOR OMP_GET_DEVICE_AND_HOST_ALLOCATOR_
+#define FTN_GET_DEVICES_ALL_ALLOCATOR OMP_GET_DEVICES_ALL_ALLOCATOR_
+#define FTN_GET_MEMSPACE_NUM_RESOURCES OMP_GET_MEMSPACE_NUM_RESOURCES_
+#define FTN_GET_SUBMEMSPACE OMP_GET_SUBMEMSPACE_
#define FTN_ALLOC OMP_ALLOC_
#define FTN_FREE OMP_FREE_
#define FTN_GET_DEVICE_NUM OMP_GET_DEVICE_NUM_
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 5017cd3de4be5..87c23dc9edc7d 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -328,8 +328,9 @@ omp_allocator_handle_t const kmp_max_mem_alloc =
(omp_allocator_handle_t const)1024;
omp_allocator_handle_t __kmp_def_allocator = omp_default_mem_alloc;
+omp_memspace_handle_t const omp_null_mem_space = (omp_memspace_handle_t const)0;
omp_memspace_handle_t const omp_default_mem_space =
- (omp_memspace_handle_t const)0;
+ (omp_memspace_handle_t const)99;
omp_memspace_handle_t const omp_large_cap_mem_space =
(omp_memspace_handle_t const)1;
omp_memspace_handle_t const omp_const_mem_space =
@@ -344,6 +345,8 @@ omp_memspace_handle_t const llvm_omp_target_shared_mem_space =
(omp_memspace_handle_t const)101;
omp_memspace_handle_t const llvm_omp_target_device_mem_space =
(omp_memspace_handle_t const)102;
+omp_memspace_handle_t const kmp_max_mem_space =
+ (omp_memspace_handle_t const)1024;
/* This check ensures that the compiler is passing the correct data type for the
flags formal parameter of the function kmpc_omp_task_alloc(). If the type is
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index b49c44f348d6b..7f65e476e7038 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -579,7 +579,10 @@ static void __kmp_init_allocator() {
__kmp_init_memkind();
__kmp_init_target_mem();
}
-static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
+static void __kmp_fini_allocator() {
+ __kmp_fini_target_mem();
+ __kmp_fini_memkind();
+}
/* ------------------------------------------------------------------------ */
diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp
index f25e24f09a03d..06276d1bed1c7 100644
--- a/openmp/runtime/src/kmp_stub.cpp
+++ b/openmp/runtime/src/kmp_stub.cpp
@@ -357,8 +357,9 @@ omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc =
omp_allocator_handle_t const llvm_omp_target_device_mem_alloc =
(omp_allocator_handle_t const)102;
+omp_memspace_handle_t const omp_null_mem_space = (omp_memspace_handle_t const)0;
omp_memspace_handle_t const omp_default_mem_space =
- (omp_memspace_handle_t const)0;
+ (omp_memspace_handle_t const)99;
omp_memspace_handle_t const omp_large_cap_mem_space =
(omp_memspace_handle_t const)1;
omp_memspace_handle_t const omp_const_mem_space =
diff --git a/openmp/runtime/test/api/omp60_memory_routines.c b/openmp/runtime/test/api/omp60_memory_routines.c
new file mode 100644
index 0000000000000..5b2f3b78da429
--- /dev/null
+++ b/openmp/runtime/test/api/omp60_memory_routines.c
@@ -0,0 +1,228 @@
+// RUN: %libomp-compile -Wl,--export-dynamic && %libomp-run
+
+// REQUIRES: linux
+
+// Test OpenMP 6.0 memory management routines.
+// Test host runtime's basic support with an emulated offload runtime.
+
+#include <stdlib.h>
+#include <omp.h>
+
+#define NUM_DEVICES 4
+
+//
+// Required offload runtime interfaces
+//
+extern int __tgt_get_num_devices(void) { return NUM_DEVICES; }
+
+extern int __tgt_get_mem_resources(int num_devices, const int *devices,
+ int host, omp_memspace_handle_t memspace,
+ int *resources) {
+ int i;
+ // We expect valid inputs within this test.
+ int num_resources = num_devices;
+ if (resources) {
+ // Simple resouce ID mapping example in the backend (=device ID).
+ // This does not represent any real backend.
+ for (i = 0; i < num_devices; i++)
+ resources[i] = devices[i];
+ }
+ return num_resources;
+}
+
+extern void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator) {
+ return malloc(size);
+}
+
+extern void __tgt_omp_free(void *ptr, omp_allocator_handle_t allocator) {
+ free(ptr);
+}
+
+// Code above is also used by the corresponding Fortran test
+
+#define CHECK_OR_RET_FAIL(Expr) \
+ do { \
+ if (!(Expr)) \
+ return EXIT_FAILURE; \
+ } while(0)
+
+// Test user-initialized allocator with the given memory space
+static int test_user_allocator(omp_memspace_handle_t ms) {
+ omp_allocator_handle_t al = omp_null_allocator;
+ al = omp_init_allocator(ms, 0, NULL);
+ CHECK_OR_RET_FAIL(al != omp_null_allocator);
+ void *m = omp_alloc(1024, al);
+ CHECK_OR_RET_FAIL(m != NULL);
+ omp_free(m, al);
+ omp_destroy_allocator(al);
+ return EXIT_SUCCESS;
+}
+
+static int test_allocator(omp_allocator_handle_t al) {
+ void *m = omp_alloc(1024, al);
+ CHECK_OR_RET_FAIL(m != NULL);
+ omp_free(m, al);
+ omp_destroy_allocator(al);
+ return EXIT_SUCCESS;
+}
+
+static int test_mem_space(void) {
+ int i, count;
+ int num_devices = omp_get_num_devices();
+ CHECK_OR_RET_FAIL(num_devices == NUM_DEVICES);
+
+ int *all_devices = (int *)malloc(sizeof(int) * num_devices);
+ for (i = 0; i < num_devices; i++)
+ all_devices[i] = i;
+
+ omp_memspace_handle_t predef = omp_default_mem_space;
+ omp_memspace_handle_t ms1 = omp_null_mem_space;
+ omp_memspace_handle_t ms2 = omp_null_mem_space;
+
+ // Test the following API routines.
+ // * omp_get_device_memspace
+ // * omp_get_device_and_host_memspace
+ // * omp_get_devices_memspace
+ // * omp_get_devices_and_host_memspace
+ // Test if runtime returns the same memory space handle for the same input.
+ // Test if we can use the memory space to intialize allocator.
+ for (i = 0; i < num_devices; i++) {
+ ms1 = omp_get_device_memspace(i, predef);
+ CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
+ ms2 = omp_get_device_memspace(i, predef);
+ CHECK_OR_RET_FAIL(ms1 == ms2);
+ CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
+ ms1 = ms2 = omp_null_mem_space;
+
+ ms1 = omp_get_device_and_host_memspace(i, predef);
+ CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
+ ms2 = omp_get_device_and_host_memspace(i, predef);
+ CHECK_OR_RET_FAIL(ms1 == ms2);
+ CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
+ ms1 = ms2 = omp_null_mem_space;
+
+ for (count = 1; i + count <= num_devices; count++) {
+ int *devices = &all_devices[i];
+ ms1 = omp_get_devices_memspace(count, devices, predef);
+ CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
+ ms2 = omp_get_devices_memspace(count, devices, predef);
+ CHECK_OR_RET_FAIL(ms1 == ms2);
+ CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
+ ms1 = ms2 = omp_null_mem_space;
+
+ ms1 = omp_get_devices_and_host_memspace(count, devices, predef);
+ CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
+ ms2 = omp_get_devices_and_host_memspace(count, devices, predef);
+ CHECK_OR_RET_FAIL(ms1 == ms2);
+ CHECK_OR_RET_FAIL(test_user_allocator(ms1) == EXIT_SUCCESS);
+ ms1 = ms2 = omp_null_mem_space;
+ }
+ }
+
+ // Test the following API routines.
+ // * omp_get_devices_all_memspace
+ // Test if runtime returns the same memory space handle for the same input.
+ ms1 = omp_get_devices_all_memspace(predef);
+ CHECK_OR_RET_FAIL(ms1 != omp_null_mem_space);
+ ms2 = omp_get_devices_all_memspace(predef);
+ CHECK_OR_RET_FAIL(ms1 == ms2);
+
+ free(all_devices);
+
+ return EXIT_SUCCESS;
+}
+
+static int test_mem_allocator(void) {
+ int i, count;
+ int num_devices = omp_get_num_devices();
+ CHECK_OR_RET_FAIL(num_devices == NUM_DEVICES);
+
+ int *all_devices = (int *)malloc(sizeof(int) * num_devices);
+ for (i = 0; i < num_devices; i++)
+ all_devices[i] = i;
+
+ omp_memspace_handle_t predef = omp_default_mem_space;
+ omp_allocator_handle_t al = omp_null_allocator;
+
+ // Test the following API routines.
+ // * omp_get_device_allocator
+ // * omp_get_device_and_host_allocator
+ // * omp_get_devices_allocator
+ // * omp_get_devices_and_host_allocator
+ for (i = 0; i < num_devices; i++) {
+ al = omp_get_device_allocator(i, predef);
+ CHECK_OR_RET_FAIL(al != omp_null_allocator);
+ CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
+ al = omp_null_allocator;
+
+ al = omp_get_device_and_host_allocator(i, predef);
+ CHECK_OR_RET_FAIL(al != omp_null_allocator);
+ CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
+ al = omp_null_allocator;
+
+ for (count = 1; i + count <= num_devices; count++) {
+ int *devices = &all_devices[i];
+ al = omp_get_devices_allocator(count, devices, predef);
+ CHECK_OR_RET_FAIL(al != omp_null_allocator);
+ CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
+ al = omp_null_allocator;
+
+ al = omp_get_devices_and_host_allocator(count, devices, predef);
+ CHECK_OR_RET_FAIL(al != omp_null_allocator);
+ CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
+ al = omp_null_allocator;
+ }
+ }
+
+ // Test the following API routines.
+ // * omp_get_devices_all_allocator
+ al = omp_get_devices_all_allocator(predef);
+ CHECK_OR_RET_FAIL(al != omp_null_allocator);
+ CHECK_OR_RET_FAIL(test_allocator(al) == EXIT_SUCCESS);
+
+ free(all_devices);
+
+ return EXIT_SUCCESS;
+}
+
+// Just test what we can expect from the emulated backend.
+static int test_sub_mem_space(void) {
+ int i;
+ omp_memspace_handle_t ms = omp_null_mem_space;
+ ms = omp_get_devices_all_memspace(omp_default_mem_space);
+ CHECK_OR_RET_FAIL(ms != omp_null_mem_space);
+ int num_resources = omp_get_memspace_num_resources(ms);
+ CHECK_OR_RET_FAIL(num_resources == NUM_DEVICES);
+
+ // Check if single-resource sub memspace is correctly returned.
+ for (i = 0; i < num_resources; i++) {
+ omp_memspace_handle_t sub = omp_get_submemspace(ms, 1, &i);
+ CHECK_OR_RET_FAIL(sub != omp_null_mem_space);
+ CHECK_OR_RET_FAIL(sub != ms);
+ int num_sub_resources = omp_get_memspace_num_resources(sub);
+ CHECK_OR_RET_FAIL(num_sub_resources == 1);
+ }
+
+ // Check if all-resrouce sub memspace is correctly returned.
+ int *resources = (int *)malloc(sizeof(int) * num_resources);
+ for (i = 0; i < num_resources; i++)
+ resources[i] = i;
+ omp_memspace_handle_t sub = omp_get_submemspace(ms, num_resources, resources);
+ CHECK_OR_RET_FAIL(sub != omp_null_mem_space);
+ CHECK_OR_RET_FAIL(sub == ms);
+
+ return EXIT_SUCCESS;
+}
+
+int main() {
+ int rc = test_mem_space();
+ CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS);
+
+ rc = test_mem_allocator();
+ CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS);
+
+ rc = test_sub_mem_space();
+ CHECK_OR_RET_FAIL(rc == EXIT_SUCCESS);
+
+ return rc;
+}
More information about the Openmp-commits
mailing list