[Openmp-commits] [openmp] [OpenMP] Add memory allocation using hwloc (PR #132843)
via Openmp-commits
openmp-commits at lists.llvm.org
Tue Apr 1 15:29:37 PDT 2025
https://github.com/nawrinsu updated https://github.com/llvm/llvm-project/pull/132843
>From f80b1cc2a2b140b437737ed3ab8ae97aa056d9a0 Mon Sep 17 00:00:00 2001
From: Nawrin Sultana <nawrin.sultana at intel.com>
Date: Mon, 24 Mar 2025 18:19:30 -0500
Subject: [PATCH 1/3] [OpenMP] Add memory allocation using hwloc
This patch adds support for memory allocation using hwloc. To enable
memory allocation using hwloc, env KMP_TOPOLOGY_METHOD=hwloc needs to
be used. If hwloc is not supported/available, allocation will fallback
to default path.
---
openmp/runtime/src/kmp.h | 4 +
openmp/runtime/src/kmp_affinity.cpp | 1 +
openmp/runtime/src/kmp_alloc.cpp | 390 ++++++++++++++++------
openmp/runtime/src/kmp_global.cpp | 1 +
openmp/runtime/src/kmp_settings.cpp | 4 +-
openmp/runtime/test/api/omp_alloc_hwloc.c | 25 ++
6 files changed, 317 insertions(+), 108 deletions(-)
create mode 100644 openmp/runtime/test/api/omp_alloc_hwloc.c
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 9b8c6102dbee2..5a89b8e2d9e5c 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -1107,6 +1107,7 @@ extern omp_allocator_handle_t __kmp_def_allocator;
#endif
extern int __kmp_memkind_available;
+extern bool __kmp_hwloc_available;
typedef omp_memspace_handle_t kmp_memspace_t; // placeholder
@@ -1119,6 +1120,9 @@ typedef struct kmp_allocator_t {
kmp_uint64 pool_size;
kmp_uint64 pool_used;
bool pinned;
+#if KMP_USE_HWLOC
+ omp_alloctrait_value_t membind;
+#endif
} kmp_allocator_t;
extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index c3d5ecf1345e8..f2520db145552 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1444,6 +1444,7 @@ void KMPAffinity::pick_api() {
if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
__kmp_affinity.type != affinity_disabled) {
affinity_dispatch = new KMPHwlocAffinity();
+ __kmp_hwloc_available = true;
} else
#endif
{
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index fb1b0eb5f0fe5..8306dc5add2d3 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -1356,6 +1356,62 @@ void __kmp_fini_memkind() {
#endif
}
+#if KMP_USE_HWLOC
+static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
+ const hwloc_topology_support *support;
+ support = hwloc_topology_get_support(__kmp_hwloc_topology);
+ if (support) {
+ if (policy == HWLOC_MEMBIND_BIND)
+ return (support->membind->alloc_membind &&
+ support->membind->bind_membind);
+ if (policy == HWLOC_MEMBIND_INTERLEAVE)
+ return (support->membind->alloc_membind &&
+ support->membind->interleave_membind);
+ }
+ return false;
+}
+
+void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
+ hwloc_membind_policy_t policy) {
+ void *ptr = NULL;
+ hwloc_obj_t node;
+ struct hwloc_location initiator;
+ int ret;
+ // TODO: We should make this more efficient by getting rid of the OS syscall
+ // 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead
+ // use th_affin_mask field when it's capable of getting the underlying
+ // mask implementation.
+ hwloc_cpuset_t mask = hwloc_bitmap_alloc();
+ ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+ if (ret < 0) {
+ hwloc_bitmap_free(mask);
+ return ptr;
+ }
+ initiator.type = HWLOC_LOCATION_TYPE_CPUSET;
+ initiator.location.cpuset = mask;
+ ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
+ &node, NULL);
+ if (ret < 0) {
+ return ptr;
+ }
+ return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
+ HWLOC_MEMBIND_BYNODESET);
+}
+
+void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
+ hwloc_membind_policy_t policy) {
+ void *ptr = NULL;
+ if (ms == omp_high_bw_mem_space) {
+ ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
+ } else if (ms == omp_large_cap_mem_space) {
+ ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
+ } else {
+ ptr = hwloc_alloc(__kmp_hwloc_topology, size);
+ }
+ return ptr;
+}
+#endif
+
void __kmp_init_target_mem() {
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
*(void **)(&kmp_target_alloc_shared) =
@@ -1412,6 +1468,13 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
break;
case omp_atk_partition:
+#if KMP_USE_HWLOC
+ al->membind = (omp_alloctrait_value_t)traits[i].value;
+ KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
+ al->membind == omp_atv_nearest ||
+ al->membind == omp_atv_blocked ||
+ al->membind == omp_atv_interleaved);
+#endif
al->memkind = RCAST(void **, traits[i].value);
break;
default:
@@ -1466,7 +1529,8 @@ omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
__kmp_free(al);
return omp_null_allocator;
} else {
- if (ms == omp_high_bw_mem_space) {
+ if (!__kmp_hwloc_available &&
+ (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
// cannot detect HBW memory presence without memkind library
__kmp_free(al);
return omp_null_allocator;
@@ -1573,8 +1637,9 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
if (allocator > kmp_max_mem_alloc)
is_pinned = al->pinned;
- // Use default allocator if libmemkind is not available
- int use_default_allocator = (__kmp_memkind_available) ? false : true;
+ // Use default allocator if hwloc and libmemkind are not available
+ int use_default_allocator =
+ (!__kmp_hwloc_available && !__kmp_memkind_available);
if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
// Use size input directly as the memory may not be accessible on host.
@@ -1610,38 +1675,152 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
}
}
- if (__kmp_memkind_available) {
- if (allocator < kmp_max_mem_alloc) {
- // pre-defined allocator
- if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
- ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
- } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
- ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
+#if KMP_USE_HWLOC
+ if (__kmp_hwloc_available) {
+ if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
+ if (allocator < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (allocator == omp_high_bw_mem_alloc) {
+ ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
+ desc.size_a, HWLOC_MEMBIND_BIND);
+ if (ptr == NULL)
+ use_default_allocator = true;
+ } else if (allocator == omp_large_cap_mem_alloc) {
+ ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
+ desc.size_a, HWLOC_MEMBIND_BIND);
+ if (ptr == NULL)
+ use_default_allocator = true;
+ } else {
+ use_default_allocator = true;
+ }
+ if (use_default_allocator) {
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ }
+ } else if (al->pool_size > 0) {
+ // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
+ if (used + desc.size_a > al->pool_size) {
+ // not enough space, need to go fallback path
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ if (al->fb == omp_atv_default_mem_fb) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ } else if (al->fb == omp_atv_abort_fb) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == omp_atv_allocator_fb) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
+ } // else ptr == NULL;
+ } else {
+ // pool has enough space
+ if (al->membind == omp_atv_interleaved) {
+ if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
+ ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
+ HWLOC_MEMBIND_INTERLEAVE);
+ }
+ } else if (al->membind == omp_atv_environment) {
+ ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
+ HWLOC_MEMBIND_DEFAULT);
+ } else {
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ }
+ if (ptr == NULL) {
+ if (al->fb == omp_atv_default_mem_fb) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ } else if (al->fb == omp_atv_abort_fb) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == omp_atv_allocator_fb) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
+ }
+ }
+ }
} else {
- ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ // custom allocator, pool size not requested
+ if (al->membind == omp_atv_interleaved) {
+ if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
+ ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
+ HWLOC_MEMBIND_INTERLEAVE);
+ }
+ } else if (al->membind == omp_atv_environment) {
+ ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
+ HWLOC_MEMBIND_DEFAULT);
+ } else {
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ }
+ if (ptr == NULL) {
+ if (al->fb == omp_atv_default_mem_fb) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ } else if (al->fb == omp_atv_abort_fb) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == omp_atv_allocator_fb) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
+ }
+ }
}
- } else if (al->pool_size > 0) {
- // custom allocator with pool size requested
- kmp_uint64 used =
- KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
- if (used + desc.size_a > al->pool_size) {
- // not enough space, need to go fallback path
- KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
- if (al->fb == omp_atv_default_mem_fb) {
- al = (kmp_allocator_t *)omp_default_mem_alloc;
+ } else { // alloc membind not supported, use hwloc_alloc
+ ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
+ }
+ } else {
+#endif
+ if (__kmp_memkind_available) {
+ if (allocator < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
+ ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
+ } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
+ ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
+ } else {
ptr = kmp_mk_alloc(*mk_default, desc.size_a);
- } else if (al->fb == omp_atv_abort_fb) {
- KMP_ASSERT(0); // abort fallback requested
- } else if (al->fb == omp_atv_allocator_fb) {
- KMP_ASSERT(al != al->fb_data);
- al = al->fb_data;
- ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
- if (is_pinned && kmp_target_lock_mem)
- kmp_target_lock_mem(ptr, size, default_device);
- return ptr;
- } // else ptr == NULL;
+ }
+ } else if (al->pool_size > 0) {
+ // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
+ if (used + desc.size_a > al->pool_size) {
+ // not enough space, need to go fallback path
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ if (al->fb == omp_atv_default_mem_fb) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ } else if (al->fb == omp_atv_abort_fb) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == omp_atv_allocator_fb) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
+ if (is_pinned && kmp_target_lock_mem)
+ kmp_target_lock_mem(ptr, size, default_device);
+ return ptr;
+ } // else ptr == NULL;
+ } else {
+ // pool has enough space
+ ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
+ if (ptr == NULL) {
+ if (al->fb == omp_atv_default_mem_fb) {
+ al = (kmp_allocator_t *)omp_default_mem_alloc;
+ ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ } else if (al->fb == omp_atv_abort_fb) {
+ KMP_ASSERT(0); // abort fallback requested
+ } else if (al->fb == omp_atv_allocator_fb) {
+ KMP_ASSERT(al != al->fb_data);
+ al = al->fb_data;
+ ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
+ if (is_pinned && kmp_target_lock_mem)
+ kmp_target_lock_mem(ptr, size, default_device);
+ return ptr;
+ }
+ }
+ }
} else {
- // pool has enough space
+ // custom allocator, pool size not requested
ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
if (ptr == NULL) {
if (al->fb == omp_atv_default_mem_fb) {
@@ -1659,13 +1838,39 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
}
}
}
- } else {
- // custom allocator, pool size not requested
- ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
- if (ptr == NULL) {
+ } else if (allocator < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (allocator == omp_high_bw_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
+ } else if (allocator == omp_large_cap_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
+ } else if (allocator == omp_const_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
+ } else if (allocator == omp_low_lat_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
+ } else if (allocator == omp_cgroup_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
+ } else if (allocator == omp_pteam_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
+ } else if (allocator == omp_thread_mem_alloc) {
+ KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
+ } else { // default allocator requested
+ use_default_allocator = true;
+ }
+ if (use_default_allocator) {
+ ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
+ use_default_allocator = false;
+ }
+ } else if (al->pool_size > 0) {
+ // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
+ if (used + desc.size_a > al->pool_size) {
+ // not enough space, need to go fallback path
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
if (al->fb == omp_atv_default_mem_fb) {
al = (kmp_allocator_t *)omp_default_mem_alloc;
- ptr = kmp_mk_alloc(*mk_default, desc.size_a);
+ ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
} else if (al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} else if (al->fb == omp_atv_allocator_fb) {
@@ -1675,66 +1880,25 @@ void *__kmp_alloc(int gtid, size_t algn, size_t size,
if (is_pinned && kmp_target_lock_mem)
kmp_target_lock_mem(ptr, size, default_device);
return ptr;
- }
- }
- }
- } else if (allocator < kmp_max_mem_alloc) {
- // pre-defined allocator
- if (allocator == omp_high_bw_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
- } else if (allocator == omp_large_cap_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
- } else if (allocator == omp_const_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
- } else if (allocator == omp_low_lat_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
- } else if (allocator == omp_cgroup_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
- } else if (allocator == omp_pteam_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
- } else if (allocator == omp_thread_mem_alloc) {
- KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
- } else { // default allocator requested
- use_default_allocator = true;
- }
- if (use_default_allocator) {
- ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
- use_default_allocator = false;
- }
- } else if (al->pool_size > 0) {
- // custom allocator with pool size requested
- kmp_uint64 used =
- KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
- if (used + desc.size_a > al->pool_size) {
- // not enough space, need to go fallback path
- KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
- if (al->fb == omp_atv_default_mem_fb) {
- al = (kmp_allocator_t *)omp_default_mem_alloc;
+ } // else ptr == NULL
+ } else {
+ // pool has enough space
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
- } else if (al->fb == omp_atv_abort_fb) {
- KMP_ASSERT(0); // abort fallback requested
- } else if (al->fb == omp_atv_allocator_fb) {
- KMP_ASSERT(al != al->fb_data);
- al = al->fb_data;
- ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
- if (is_pinned && kmp_target_lock_mem)
- kmp_target_lock_mem(ptr, size, default_device);
- return ptr;
- } // else ptr == NULL;
+ if (ptr == NULL && al->fb == omp_atv_abort_fb) {
+ KMP_ASSERT(0); // abort fallback requested
+ } // no sense to look for another fallback because of same internal
+ // alloc
+ }
} else {
- // pool has enough space
+ // custom allocator, pool size not requested
ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
if (ptr == NULL && al->fb == omp_atv_abort_fb) {
KMP_ASSERT(0); // abort fallback requested
} // no sense to look for another fallback because of same internal alloc
}
- } else {
- // custom allocator, pool size not requested
- ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
- if (ptr == NULL && al->fb == omp_atv_abort_fb) {
- KMP_ASSERT(0); // abort fallback requested
- } // no sense to look for another fallback because of same internal alloc
+#if KMP_USE_HWLOC
}
+#endif
KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
if (ptr == NULL)
return NULL;
@@ -1864,34 +2028,48 @@ void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
kmp_target_unlock_mem(desc.ptr_alloc, device);
}
- if (__kmp_memkind_available) {
- if (oal < kmp_max_mem_alloc) {
- // pre-defined allocator
- if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
- kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
- } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
- kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
+#if KMP_USE_HWLOC
+ if (__kmp_hwloc_available) {
+ if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ (void)used; // to suppress compiler warning
+ KMP_DEBUG_ASSERT(used >= desc.size_a);
+ }
+ hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
+ } else {
+#endif
+ if (__kmp_memkind_available) {
+ if (oal < kmp_max_mem_alloc) {
+ // pre-defined allocator
+ if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
+ kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
+ } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
+ kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
+ } else {
+ kmp_mk_free(*mk_default, desc.ptr_alloc);
+ }
} else {
- kmp_mk_free(*mk_default, desc.ptr_alloc);
+ if (al->pool_size > 0) { // custom allocator with pool size requested
+ kmp_uint64 used =
+ KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
+ (void)used; // to suppress compiler warning
+ KMP_DEBUG_ASSERT(used >= desc.size_a);
+ }
+ kmp_mk_free(*al->memkind, desc.ptr_alloc);
}
} else {
- if (al->pool_size > 0) { // custom allocator with pool size requested
+ if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
kmp_uint64 used =
KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
(void)used; // to suppress compiler warning
KMP_DEBUG_ASSERT(used >= desc.size_a);
}
- kmp_mk_free(*al->memkind, desc.ptr_alloc);
+ __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
}
- } else {
- if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
- kmp_uint64 used =
- KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
- (void)used; // to suppress compiler warning
- KMP_DEBUG_ASSERT(used >= desc.size_a);
- }
- __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
+#if KMP_USE_HWLOC
}
+#endif
}
/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 52e0fdbdfb1da..eb077bca4ce21 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -296,6 +296,7 @@ kmp_int32 __kmp_max_task_priority = 0;
kmp_uint64 __kmp_taskloop_min_tasks = 0;
int __kmp_memkind_available = 0;
+bool __kmp_hwloc_available = false;
omp_allocator_handle_t const omp_null_allocator = NULL;
omp_allocator_handle_t const omp_default_mem_alloc =
(omp_allocator_handle_t const)1;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 8b6092cb1085c..392a02ebbd9aa 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -3767,7 +3767,7 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value,
if (__kmp_match_str("omp_high_bw_mem_alloc", scan, &next)) {
SKIP_WS(next);
if (is_memalloc) {
- if (__kmp_memkind_available) {
+ if (__kmp_hwloc_available || __kmp_memkind_available) {
__kmp_def_allocator = omp_high_bw_mem_alloc;
return;
} else {
@@ -3780,7 +3780,7 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value,
} else if (__kmp_match_str("omp_large_cap_mem_alloc", scan, &next)) {
SKIP_WS(next);
if (is_memalloc) {
- if (__kmp_memkind_available) {
+ if (__kmp_hwloc_available || __kmp_memkind_available) {
__kmp_def_allocator = omp_large_cap_mem_alloc;
return;
} else {
diff --git a/openmp/runtime/test/api/omp_alloc_hwloc.c b/openmp/runtime/test/api/omp_alloc_hwloc.c
new file mode 100644
index 0000000000000..7cdcae9b36e8a
--- /dev/null
+++ b/openmp/runtime/test/api/omp_alloc_hwloc.c
@@ -0,0 +1,25 @@
+// RUN: %libomp-compile && env KMP_TOPOLOGY_METHOD=hwloc %libomp-run
+// REQUIRES: hwloc
+
+#include <stdio.h>
+#include <omp.h>
+
+int main() {
+ void *p[2];
+#pragma omp parallel num_threads(2)
+ {
+ int i = omp_get_thread_num();
+ p[i] = omp_alloc(1024 * 1024, omp_get_default_allocator());
+#pragma omp barrier
+ printf("th %d, ptr %p\n", i, p[i]);
+ omp_free(p[i], omp_get_default_allocator());
+ }
+ // Both pointers should be non-NULL
+ if (p[0] != NULL && p[1] != NULL) {
+ printf("passed\n");
+ return 0;
+ } else {
+ printf("failed: pointers %p %p\n", p[0], p[1]);
+ return 1;
+ }
+}
>From e90eb18666ed12be76741995342f2e09727f5ed3 Mon Sep 17 00:00:00 2001
From: Nawrin Sultana <nawrin.sultana at intel.com>
Date: Tue, 1 Apr 2025 15:07:12 -0500
Subject: [PATCH 2/3] [OpenMP] Added hwloc API version check
---
openmp/runtime/src/kmp_alloc.cpp | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 8306dc5add2d3..83a4c066f5a27 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -1358,6 +1358,7 @@ void __kmp_fini_memkind() {
#if KMP_USE_HWLOC
static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
+#if HWLOC_API_VERSION >= 0x00020400
const hwloc_topology_support *support;
support = hwloc_topology_get_support(__kmp_hwloc_topology);
if (support) {
@@ -1369,10 +1370,14 @@ static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
support->membind->interleave_membind);
}
return false;
+#else
+ return false;
+#endif
}
void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
hwloc_membind_policy_t policy) {
+#if HWLOC_API_VERSION >= 0x00020400
void *ptr = NULL;
hwloc_obj_t node;
struct hwloc_location initiator;
@@ -1396,10 +1401,14 @@ void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
}
return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
HWLOC_MEMBIND_BYNODESET);
+#else
+ return NULL;
+#endif
}
void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
hwloc_membind_policy_t policy) {
+#if HWLOC_API_VERSION >= 0x00020400
void *ptr = NULL;
if (ms == omp_high_bw_mem_space) {
ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
@@ -1409,8 +1418,11 @@ void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
ptr = hwloc_alloc(__kmp_hwloc_topology, size);
}
return ptr;
-}
+#else
+ return NULL;
#endif
+}
+#endif // KMP_USE_HWLOC
void __kmp_init_target_mem() {
*(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
>From 41c4b57e16fda8653d60a60e25e98bd3828485bc Mon Sep 17 00:00:00 2001
From: Nawrin Sultana <nawrin.sultana at intel.com>
Date: Tue, 1 Apr 2025 17:29:02 -0500
Subject: [PATCH 3/3] [OpenMP] Fix hwloc version
---
openmp/runtime/src/kmp_alloc.cpp | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp
index 83a4c066f5a27..783d9ffe88aa3 100644
--- a/openmp/runtime/src/kmp_alloc.cpp
+++ b/openmp/runtime/src/kmp_alloc.cpp
@@ -14,6 +14,20 @@
#include "kmp_io.h"
#include "kmp_wrapper_malloc.h"
+#if KMP_USE_HWLOC
+#if HWLOC_API_VERSION > 0x00020300
+#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
+#elif HWLOC_API_VERSION == 0x00020300
+#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
+ hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
+#else
+enum hwloc_memattr_id_e {
+ HWLOC_MEMATTR_ID_BANDWIDTH,
+ HWLOC_MEMATTR_ID_CAPACITY
+};
+#endif
+#endif // KMP_USE_HWLOC
+
// Disable bget when it is not used
#if KMP_USE_BGET
@@ -1358,7 +1372,7 @@ void __kmp_fini_memkind() {
#if KMP_USE_HWLOC
static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
-#if HWLOC_API_VERSION >= 0x00020400
+#if HWLOC_API_VERSION >= 0x00020300
const hwloc_topology_support *support;
support = hwloc_topology_get_support(__kmp_hwloc_topology);
if (support) {
@@ -1377,7 +1391,7 @@ static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
hwloc_membind_policy_t policy) {
-#if HWLOC_API_VERSION >= 0x00020400
+#if HWLOC_API_VERSION >= 0x00020300
void *ptr = NULL;
hwloc_obj_t node;
struct hwloc_location initiator;
@@ -1392,7 +1406,7 @@ void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
hwloc_bitmap_free(mask);
return ptr;
}
- initiator.type = HWLOC_LOCATION_TYPE_CPUSET;
+ initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
initiator.location.cpuset = mask;
ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
&node, NULL);
@@ -1408,7 +1422,7 @@ void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
hwloc_membind_policy_t policy) {
-#if HWLOC_API_VERSION >= 0x00020400
+#if HWLOC_API_VERSION >= 0x00020300
void *ptr = NULL;
if (ms == omp_high_bw_mem_space) {
ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
More information about the Openmp-commits
mailing list