[Openmp-commits] [openmp] r272952 - Change hwloc discovery algorithm to print topology only for accessible resources

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Thu Jun 16 13:31:20 PDT 2016


Author: jlpeyton
Date: Thu Jun 16 15:31:19 2016
New Revision: 272952

URL: http://llvm.org/viewvc/llvm-project?rev=272952&view=rev
Log:
Change hwloc discovery algorithm to print topology only for accessible resources

Change hwloc discovery algorithm to print topology for only accessible
resources, and report uniformity correspondingly, similar to what other topology
discovery algorithms do. Fixes minor inconsistency in total topology reported
and resources used for threads binding in case hwloc used.

Patch by Andrey Churbanov.

Differential Revision: http://reviews.llvm.org/D21389

Modified:
    openmp/trunk/runtime/src/kmp_affinity.cpp

Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=272952&r1=272951&r2=272952&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Thu Jun 16 15:31:19 2016
@@ -389,9 +389,6 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
     int pkgLevel = 0;
     int coreLevel = 1;
     int threadLevel = 2;
-    nPackages = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_root_obj(__kmp_hwloc_topology), HWLOC_OBJ_SOCKET);
-    nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
-    __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
 
     if (! KMP_AFFINITY_CAPABLE())
     {
@@ -401,6 +398,8 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
         //
         KMP_ASSERT(__kmp_affinity_type == affinity_none);
 
+        nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0), HWLOC_OBJ_CORE);
+        __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
         __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
         nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
         if (__kmp_affinity_verbose) {
@@ -423,23 +422,34 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
     //
     AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
 
+    //
+    // When affinity is off, this routine will still be called to set
+    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
+    // correctly, and return if affinity is not enabled.
+    //
+
     hwloc_obj_t pu;
     hwloc_obj_t core;
     hwloc_obj_t socket;
     int nActiveThreads = 0;
     int socket_identifier = 0;
+    // re-calculate globals to count only accessible resources
+    __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
     for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, 0);
         socket != NULL;
         socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET, socket),
         socket_identifier++)
     {
         int core_identifier = 0;
+        int num_active_cores = 0;
         for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
             core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
             core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
             core_identifier++)
         {
             int pu_identifier = 0;
+            int num_active_threads = 0;
             for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
                 pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
                 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
@@ -447,7 +457,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
             {
                 Address addr(3);
                 if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
-                    continue;
+                    continue;         // skip inactive (inaccessible) unit
                 KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
                     socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
                 addr.labels[0] = socket_identifier; // package
@@ -455,13 +465,26 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
                 addr.labels[2] = pu_identifier; // pu
                 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
                 nActiveThreads++;
+                ++num_active_threads; // count active threads per core
+            }
+            if (num_active_threads) { // were there any active threads on the core?
+                ++__kmp_ncores;       // count total active cores
+                ++num_active_cores;   // count active cores per socket
+                if (num_active_threads > __kmp_nThreadsPerCore)
+                    __kmp_nThreadsPerCore = num_active_threads; // calc maximum
             }
         }
+        if (num_active_cores) {       // were there any active cores on the socket?
+            ++nPackages;              // count total active packages
+            if (num_active_cores > nCoresPerPkg)
+                nCoresPerPkg = num_active_cores; // calc maximum
+        }
     }
 
     //
     // If there's only one thread context to bind to, return now.
     //
+    KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
     KMP_ASSERT(nActiveThreads > 0);
     if (nActiveThreads == 1) {
         __kmp_ncores = nPackages = 1;
@@ -514,20 +537,9 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
     qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
 
     //
-    // When affinity is off, this routine will still be called to set
-    // __kmp_ncores, as well as __kmp_nThreadsPerCore,
-    // nCoresPerPkg, & nPackages.  Make sure all these vars are set
-    // correctly, and return if affinity is not enabled.
-    //
-    __kmp_ncores = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE);
-
-    //
     // Check to see if the machine topology is uniform
     //
-    unsigned npackages = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_SOCKET);
-    unsigned ncores = __kmp_ncores;
-    unsigned nthreads = hwloc_get_nbobjs_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU);
-    unsigned uniform = (npackages * nCoresPerPkg * __kmp_nThreadsPerCore == nthreads);
+    unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
 
     //
     // Print the machine topology summary.
@@ -552,7 +564,7 @@ __kmp_affinity_create_hwloc_map(AddrUnsP
         kmp_str_buf_t buf;
         __kmp_str_buf_init(&buf);
 
-        __kmp_str_buf_print(&buf, "%d", npackages);
+        __kmp_str_buf_print(&buf, "%d", nPackages);
         //for (level = 1; level <= pkgLevel; level++) {
         //    __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
        // }




More information about the Openmp-commits mailing list