[Openmp-commits] [openmp] r273278 - Improvements to process affinity mask setting

Tue Jun 21 08:54:38 PDT 2016

Author: jlpeyton
Date: Tue Jun 21 10:54:38 2016
New Revision: 273278

URL: http://llvm.org/viewvc/llvm-project?rev=273278&view=rev
Log:
Improvements to process affinity mask setting

A couple improvements:
1) Add ability to limit fullMask size when KMP_HW_SUBSET limits resources.
2) Make KMP_HW_SUBSET work for affinity_none, and only limit fullMask in this case.

Patch by Andrey Churbanov.

Differential Revision: http://reviews.llvm.org/D21528

Modified:
    openmp/trunk/runtime/src/kmp_affinity.cpp

Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=273278&r1=273277&r2=273278&view=diff
==============================================================================

--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Tue Jun 21 10:54:38 2016
@@ -249,6 +249,7 @@ static int __kmp_nThreadsPerCore;
 #ifndef KMP_DFLT_NTH_CORES
 static int __kmp_ncores;
 #endif
+static int *__kmp_pu_os_idx = NULL;
 
 //
 // __kmp_affinity_uniform_topology() doesn't work when called from
@@ -421,6 +422,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
     // Allocate the data structure to be returned.
     //
     AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+    __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
 
     //
     // When affinity is off, this routine will still be called to set
@@ -464,6 +466,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
                 addr.labels[1] = core_identifier; // core
                 addr.labels[2] = pu_identifier; // pu
                 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
+                __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
                 nActiveThreads++;
                 ++num_active_threads; // count active threads per core
             }
@@ -668,7 +671,16 @@ __kmp_affinity_create_flat_map(AddrUnsPa
         KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
           __kmp_nThreadsPerCore, __kmp_ncores);
     }
+    KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+    __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
     if (__kmp_affinity_type == affinity_none) {
+        int avail_ct = 0;
+        unsigned int i;
+        KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
+            if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
+                continue;
+            __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
+        }
         return 0;
     }
 
@@ -686,7 +698,7 @@ __kmp_affinity_create_flat_map(AddrUnsPa
         if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
             continue;
         }
-
+        __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
         Address addr(1);
         addr.labels[0] = i;
         (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
@@ -742,6 +754,8 @@ __kmp_affinity_create_proc_group_map(Add
     //
     *address2os = (AddrUnsPair*)
       __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+    KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+    __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
     int avail_ct = 0;
     int i;
     KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
@@ -751,7 +765,7 @@ __kmp_affinity_create_proc_group_map(Add
         if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
             continue;
         }
-
+        __kmp_pu_os_idx[avail_ct] = i;  // suppose indices are flat
         Address addr(2);
         addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
         addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
@@ -1267,7 +1281,12 @@ __kmp_affinity_create_apicid_map(AddrUns
           __kmp_nThreadsPerCore, __kmp_ncores);
 
     }
-
+    KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+    KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
+    __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+    for (i = 0; i < nApics; ++i) {
+        __kmp_pu_os_idx[i] = threadInfo[i].osId;
+    }
     if (__kmp_affinity_type == affinity_none) {
         __kmp_free(threadInfo);
         KMP_CPU_FREE(oldMask);
@@ -1745,7 +1764,12 @@ __kmp_affinity_create_x2apicid_map(AddrU
 
         __kmp_str_buf_free(&buf);
     }
-
+    KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+    KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
+    __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+    for (proc = 0; (int)proc < nApics; ++proc) {
+        __kmp_pu_os_idx[proc] = retval[proc].second;
+    }
     if (__kmp_affinity_type == affinity_none) {
         __kmp_free(last);
         __kmp_free(maxCt);
@@ -2491,6 +2515,13 @@ __kmp_affinity_create_cpuinfo_map(AddrUn
     }
 # endif // KMP_MIC && REDUCE_TEAM_SIZE
 
+    KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+    KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
+    __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+    for (i = 0; i < num_avail; ++i) { // fill the os indices
+        __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
+    }
+
     if (__kmp_affinity_type == affinity_none) {
         __kmp_free(lastId);
         __kmp_free(totals);
@@ -3391,10 +3422,11 @@ __kmp_affinity_process_placelist(kmp_aff
 static void
 __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
 {
+    int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
     if (__kmp_place_num_sockets == 0 &&
         __kmp_place_num_cores == 0 &&
         __kmp_place_num_threads_per_core == 0 )
-        return;   // no topology limiting actions requested, exit
+        goto _exit;   // no topology limiting actions requested, exit
     if (__kmp_place_num_sockets == 0)
         __kmp_place_num_sockets = nPackages;    // use all available sockets
     if (__kmp_place_num_cores == 0)
@@ -3405,42 +3437,66 @@ __kmp_apply_thread_places(AddrUnsPair **
 
     if ( !__kmp_affinity_uniform_topology() ) {
         KMP_WARNING( AffHWSubsetNonUniform );
-        return; // don't support non-uniform topology
+        goto _exit; // don't support non-uniform topology
     }
-    if ( depth != 3 ) {
+    if ( depth > 3 ) {
         KMP_WARNING( AffHWSubsetNonThreeLevel );
-        return; // don't support not-3-level topology
+        goto _exit; // don't support not-3-level topology
     }
     if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
         KMP_WARNING(AffHWSubsetManySockets);
-        return;
+        goto _exit;
     }
     if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
         KMP_WARNING( AffHWSubsetManyCores );
-        return;
+        goto _exit;
     }
 
-    AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
-        __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
+    AddrUnsPair *newAddr;
+    if (pAddr) // pAddr is NULL in case of affinity_none
+        newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
+            __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
 
-    int i, j, k, n_old = 0, n_new = 0;
-    for (i = 0; i < nPackages; ++i)
+    for (i = 0; i < nPackages; ++i) {
         if (i < __kmp_place_socket_offset ||
-            i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
+            i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
             n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
-        else
-            for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
+            if (__kmp_pu_os_idx != NULL) {
+                for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
+                    for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
+                        KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+                        ++proc_num;
+                    }
+                }
+            }
+        } else {
+            for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
                 if (j < __kmp_place_core_offset ||
-                    j >= __kmp_place_core_offset + __kmp_place_num_cores)
+                    j >= __kmp_place_core_offset + __kmp_place_num_cores) {
                     n_old += __kmp_nThreadsPerCore; // skip not-requested core
-                else
+                    if (__kmp_pu_os_idx != NULL) {
+                        for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
+                            KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+                            ++proc_num;
+                        }
+                    }
+                } else {
                     for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
                         if (k < __kmp_place_num_threads_per_core) {
-                            newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
+                            if (pAddr)
+                                newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
                             n_new++;
+                        } else {
+                            if (__kmp_pu_os_idx != NULL)
+                                KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
                         }
                         n_old++;
+                        ++proc_num;
                     }
+                }
+            }
+        }
+    }
     KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
     KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
                      __kmp_place_num_threads_per_core);
@@ -3451,8 +3507,15 @@ __kmp_apply_thread_places(AddrUnsPair **
     __kmp_avail_proc = n_new;                                 // correct avail_proc
     __kmp_ncores = nPackages * __kmp_place_num_cores;         // correct ncores
 
-    __kmp_free( *pAddr );
-    *pAddr = newAddr;      // replace old topology with new one
+    if (pAddr) {
+        __kmp_free( *pAddr );
+        *pAddr = newAddr;      // replace old topology with new one
+    }
+_exit:
+    if (__kmp_pu_os_idx != NULL) {
+        __kmp_free(__kmp_pu_os_idx);
+        __kmp_pu_os_idx = NULL;
+    }
 }
 
 
@@ -3460,6 +3523,12 @@ static AddrUnsPair *address2os = NULL;
 static int           * procarr = NULL;
 static int     __kmp_aff_depth = 0;
 
+#define KMP_EXIT_AFF_NONE                             \
+    KMP_ASSERT(__kmp_affinity_type == affinity_none); \
+    KMP_ASSERT(address2os == NULL);                   \
+    __kmp_apply_thread_places(NULL, 0);               \
+    return;
+
 static void
 __kmp_aux_affinity_initialize(void)
 {
@@ -3536,9 +3605,7 @@ __kmp_aux_affinity_initialize(void)
             if(!__kmp_hwloc_error) {
                 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
                 if (depth == 0) {
-                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
-                    KMP_ASSERT(address2os == NULL);
-                    return;
+                    KMP_EXIT_AFF_NONE;
                 } else if(depth < 0 && __kmp_affinity_verbose) {
                     KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
                 }
@@ -3558,9 +3625,7 @@ __kmp_aux_affinity_initialize(void)
             file_name = NULL;
             depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
             if (depth == 0) {
-                KMP_ASSERT(__kmp_affinity_type == affinity_none);
-                KMP_ASSERT(address2os == NULL);
-                return;
+                KMP_EXIT_AFF_NONE;
             }
 
             if (depth < 0) {
@@ -3577,9 +3642,7 @@ __kmp_aux_affinity_initialize(void)
                 file_name = NULL;
                 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
                 if (depth == 0) {
-                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
-                    KMP_ASSERT(address2os == NULL);
-                    return;
+                    KMP_EXIT_AFF_NONE;
                 }
             }
         }
@@ -3607,9 +3670,7 @@ __kmp_aux_affinity_initialize(void)
                 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
                 fclose(f);
                 if (depth == 0) {
-                    KMP_ASSERT(__kmp_affinity_type == affinity_none);
-                    KMP_ASSERT(address2os == NULL);
-                    return;
+                    KMP_EXIT_AFF_NONE;
                 }
             }
         }
@@ -3646,9 +3707,7 @@ __kmp_aux_affinity_initialize(void)
             file_name = "";
             depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
             if (depth == 0) {
-                KMP_ASSERT(__kmp_affinity_type == affinity_none);
-                KMP_ASSERT(address2os == NULL);
-                return;
+                KMP_EXIT_AFF_NONE;
             }
             KMP_ASSERT(depth > 0);
             KMP_ASSERT(address2os != NULL);
@@ -3671,9 +3730,7 @@ __kmp_aux_affinity_initialize(void)
 
         depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
         if (depth == 0) {
-            KMP_ASSERT(__kmp_affinity_type == affinity_none);
-            KMP_ASSERT(address2os == NULL);
-            return;
+            KMP_EXIT_AFF_NONE;
         }
         if (depth < 0) {
             KMP_ASSERT(msg_id != kmp_i18n_null);
@@ -3688,9 +3745,7 @@ __kmp_aux_affinity_initialize(void)
 
         depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
         if (depth == 0) {
-            KMP_ASSERT(__kmp_affinity_type == affinity_none);
-            KMP_ASSERT(address2os == NULL);
-            return;
+            KMP_EXIT_AFF_NONE;
         }
         if (depth < 0) {
             KMP_ASSERT(msg_id != kmp_i18n_null);
@@ -3748,8 +3803,7 @@ __kmp_aux_affinity_initialize(void)
         }
         if (__kmp_affinity_type == affinity_none) {
             KMP_ASSERT(depth == 0);
-            KMP_ASSERT(address2os == NULL);
-            return;
+            KMP_EXIT_AFF_NONE;
         }
     }
 
@@ -3777,9 +3831,7 @@ __kmp_aux_affinity_initialize(void)
 
         depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
         if (depth == 0) {
-            KMP_ASSERT(__kmp_affinity_type == affinity_none);
-            KMP_ASSERT(address2os == NULL);
-            return;
+            KMP_EXIT_AFF_NONE;
         }
         // should not fail
         KMP_ASSERT(depth > 0);
@@ -3793,9 +3845,7 @@ __kmp_aux_affinity_initialize(void)
         }
         depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
         if (depth == 0) {
-            KMP_ASSERT(__kmp_affinity_type == affinity_none);
-            KMP_ASSERT(address2os == NULL);
-            return;
+            KMP_EXIT_AFF_NONE;
         }
     }
 # endif // KMP_USE_HWLOC
@@ -4014,6 +4064,7 @@ __kmp_aux_affinity_initialize(void)
     __kmp_free(osId2Mask);
     machine_hierarchy.init(address2os, __kmp_avail_proc);
 }
+#undef KMP_EXIT_AFF_NONE
 
 
 void