[Openmp-commits] [openmp] r273278 - Improvements to process affinity mask setting
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jun 21 08:54:38 PDT 2016
Author: jlpeyton
Date: Tue Jun 21 10:54:38 2016
New Revision: 273278
URL: http://llvm.org/viewvc/llvm-project?rev=273278&view=rev
Log:
Improvements to process affinity mask setting
A couple improvements:
1) Add ability to limit fullMask size when KMP_HW_SUBSET limits resources.
2) Make KMP_HW_SUBSET work for affinity_none, and only limit fullMask in this case.
Patch by Andrey Churbanov.
Differential Revision: http://reviews.llvm.org/D21528
Modified:
openmp/trunk/runtime/src/kmp_affinity.cpp
Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=273278&r1=273277&r2=273278&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Tue Jun 21 10:54:38 2016
@@ -249,6 +249,7 @@ static int __kmp_nThreadsPerCore;
#ifndef KMP_DFLT_NTH_CORES
static int __kmp_ncores;
#endif
+static int *__kmp_pu_os_idx = NULL;
//
// __kmp_affinity_uniform_topology() doesn't work when called from
@@ -421,6 +422,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
// Allocate the data structure to be returned.
//
AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
//
// When affinity is off, this routine will still be called to set
@@ -464,6 +466,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
addr.labels[1] = core_identifier; // core
addr.labels[2] = pu_identifier; // pu
retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
+ __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
nActiveThreads++;
++num_active_threads; // count active threads per core
}
@@ -668,7 +671,16 @@ __kmp_affinity_create_flat_map(AddrUnsPa
KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
__kmp_nThreadsPerCore, __kmp_ncores);
}
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
if (__kmp_affinity_type == affinity_none) {
+ int avail_ct = 0;
+ unsigned int i;
+ KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
+ if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
+ continue;
+ __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
+ }
return 0;
}
@@ -686,7 +698,7 @@ __kmp_affinity_create_flat_map(AddrUnsPa
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
continue;
}
-
+ __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
Address addr(1);
addr.labels[0] = i;
(*address2os)[avail_ct++] = AddrUnsPair(addr,i);
@@ -742,6 +754,8 @@ __kmp_affinity_create_proc_group_map(Add
//
*address2os = (AddrUnsPair*)
__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
int avail_ct = 0;
int i;
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
@@ -751,7 +765,7 @@ __kmp_affinity_create_proc_group_map(Add
if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
continue;
}
-
+ __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
Address addr(2);
addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
@@ -1267,7 +1281,12 @@ __kmp_affinity_create_apicid_map(AddrUns
__kmp_nThreadsPerCore, __kmp_ncores);
}
-
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
+ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ for (i = 0; i < nApics; ++i) {
+ __kmp_pu_os_idx[i] = threadInfo[i].osId;
+ }
if (__kmp_affinity_type == affinity_none) {
__kmp_free(threadInfo);
KMP_CPU_FREE(oldMask);
@@ -1745,7 +1764,12 @@ __kmp_affinity_create_x2apicid_map(AddrU
__kmp_str_buf_free(&buf);
}
-
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
+ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ for (proc = 0; (int)proc < nApics; ++proc) {
+ __kmp_pu_os_idx[proc] = retval[proc].second;
+ }
if (__kmp_affinity_type == affinity_none) {
__kmp_free(last);
__kmp_free(maxCt);
@@ -2491,6 +2515,13 @@ __kmp_affinity_create_cpuinfo_map(AddrUn
}
# endif // KMP_MIC && REDUCE_TEAM_SIZE
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
+ __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ for (i = 0; i < num_avail; ++i) { // fill the os indices
+ __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
+ }
+
if (__kmp_affinity_type == affinity_none) {
__kmp_free(lastId);
__kmp_free(totals);
@@ -3391,10 +3422,11 @@ __kmp_affinity_process_placelist(kmp_aff
static void
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
{
+ int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
if (__kmp_place_num_sockets == 0 &&
__kmp_place_num_cores == 0 &&
__kmp_place_num_threads_per_core == 0 )
- return; // no topology limiting actions requested, exit
+ goto _exit; // no topology limiting actions requested, exit
if (__kmp_place_num_sockets == 0)
__kmp_place_num_sockets = nPackages; // use all available sockets
if (__kmp_place_num_cores == 0)
@@ -3405,42 +3437,66 @@ __kmp_apply_thread_places(AddrUnsPair **
if ( !__kmp_affinity_uniform_topology() ) {
KMP_WARNING( AffHWSubsetNonUniform );
- return; // don't support non-uniform topology
+ goto _exit; // don't support non-uniform topology
}
- if ( depth != 3 ) {
+ if ( depth > 3 ) {
KMP_WARNING( AffHWSubsetNonThreeLevel );
- return; // don't support not-3-level topology
+ goto _exit; // don't support not-3-level topology
}
if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
KMP_WARNING(AffHWSubsetManySockets);
- return;
+ goto _exit;
}
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
KMP_WARNING( AffHWSubsetManyCores );
- return;
+ goto _exit;
}
- AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
- __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
+ AddrUnsPair *newAddr;
+ if (pAddr) // pAddr is NULL in case of affinity_none
+ newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
+ __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
- int i, j, k, n_old = 0, n_new = 0;
- for (i = 0; i < nPackages; ++i)
+ for (i = 0; i < nPackages; ++i) {
if (i < __kmp_place_socket_offset ||
- i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
+ i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
- else
- for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
+ if (__kmp_pu_os_idx != NULL) {
+ for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
+ for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+ ++proc_num;
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
if (j < __kmp_place_core_offset ||
- j >= __kmp_place_core_offset + __kmp_place_num_cores)
+ j >= __kmp_place_core_offset + __kmp_place_num_cores) {
n_old += __kmp_nThreadsPerCore; // skip not-requested core
- else
+ if (__kmp_pu_os_idx != NULL) {
+ for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+ ++proc_num;
+ }
+ }
+ } else {
for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
if (k < __kmp_place_num_threads_per_core) {
- newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
+ if (pAddr)
+ newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
n_new++;
+ } else {
+ if (__kmp_pu_os_idx != NULL)
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
}
n_old++;
+ ++proc_num;
}
+ }
+ }
+ }
+ }
KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
__kmp_place_num_threads_per_core);
@@ -3451,8 +3507,15 @@ __kmp_apply_thread_places(AddrUnsPair **
__kmp_avail_proc = n_new; // correct avail_proc
__kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
- __kmp_free( *pAddr );
- *pAddr = newAddr; // replace old topology with new one
+ if (pAddr) {
+ __kmp_free( *pAddr );
+ *pAddr = newAddr; // replace old topology with new one
+ }
+_exit:
+ if (__kmp_pu_os_idx != NULL) {
+ __kmp_free(__kmp_pu_os_idx);
+ __kmp_pu_os_idx = NULL;
+ }
}
@@ -3460,6 +3523,12 @@ static AddrUnsPair *address2os = NULL;
static int * procarr = NULL;
static int __kmp_aff_depth = 0;
+#define KMP_EXIT_AFF_NONE \
+ KMP_ASSERT(__kmp_affinity_type == affinity_none); \
+ KMP_ASSERT(address2os == NULL); \
+ __kmp_apply_thread_places(NULL, 0); \
+ return;
+
static void
__kmp_aux_affinity_initialize(void)
{
@@ -3536,9 +3605,7 @@ __kmp_aux_affinity_initialize(void)
if(!__kmp_hwloc_error) {
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
} else if(depth < 0 && __kmp_affinity_verbose) {
KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
}
@@ -3558,9 +3625,7 @@ __kmp_aux_affinity_initialize(void)
file_name = NULL;
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
if (depth < 0) {
@@ -3577,9 +3642,7 @@ __kmp_aux_affinity_initialize(void)
file_name = NULL;
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
}
}
@@ -3607,9 +3670,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
fclose(f);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
}
}
@@ -3646,9 +3707,7 @@ __kmp_aux_affinity_initialize(void)
file_name = "";
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
KMP_ASSERT(depth > 0);
KMP_ASSERT(address2os != NULL);
@@ -3671,9 +3730,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
if (depth < 0) {
KMP_ASSERT(msg_id != kmp_i18n_null);
@@ -3688,9 +3745,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
if (depth < 0) {
KMP_ASSERT(msg_id != kmp_i18n_null);
@@ -3748,8 +3803,7 @@ __kmp_aux_affinity_initialize(void)
}
if (__kmp_affinity_type == affinity_none) {
KMP_ASSERT(depth == 0);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
}
@@ -3777,9 +3831,7 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
// should not fail
KMP_ASSERT(depth > 0);
@@ -3793,9 +3845,7 @@ __kmp_aux_affinity_initialize(void)
}
depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
+ KMP_EXIT_AFF_NONE;
}
}
# endif // KMP_USE_HWLOC
@@ -4014,6 +4064,7 @@ __kmp_aux_affinity_initialize(void)
__kmp_free(osId2Mask);
machine_hierarchy.init(address2os, __kmp_avail_proc);
}
+#undef KMP_EXIT_AFF_NONE
void
More information about the Openmp-commits
mailing list