[Openmp-commits] [openmp] r247326 - Cleanup of affinity hierarchy code.

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Thu Sep 10 12:22:08 PDT 2015


Author: jlpeyton
Date: Thu Sep 10 14:22:07 2015
New Revision: 247326

URL: http://llvm.org/viewvc/llvm-project?rev=247326&view=rev
Log:
Cleanup of affinity hierarchy code.

Some of this is improvement to code suggested by Hal Finkel. Four changes here:
1.Cleanup of hierarchy code to handle all hierarchy cases whether affinity is available or not
2.Separated this and other classes and common functions out to a header file
3.Added a destructor-like fini function for the hierarchy (and call in __kmp_cleanup)
4.Remove some redundant code that is hopefully no longer needed

Differential Revision: http://reviews.llvm.org/D12449

Added:
    openmp/trunk/runtime/src/kmp_affinity.h   (with props)
Modified:
    openmp/trunk/runtime/src/kmp.h
    openmp/trunk/runtime/src/kmp_affinity.cpp
    openmp/trunk/runtime/src/kmp_runtime.c

Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=247326&r1=247325&r2=247326&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Sep 10 14:22:07 2015
@@ -2984,6 +2984,7 @@ extern int __kmp_aux_get_affinity_mask_p
 extern void __kmp_balanced_affinity( int tid, int team_size );
 #endif /* KMP_AFFINITY_SUPPORTED */
 
+extern void __kmp_cleanup_hierarchy();
 extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
 
 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)

Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=247326&r1=247325&r2=247326&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Thu Sep 10 14:22:07 2015
@@ -18,6 +18,34 @@
 #include "kmp_io.h"
 #include "kmp_str.h"
 #include "kmp_wrapper_getpid.h"
+#include "kmp_affinity.h"
+
+// Store the real or imagined machine hierarchy here
+static hierarchy_info machine_hierarchy;
+
+void __kmp_cleanup_hierarchy() {
+    machine_hierarchy.fini();
+}
+
+void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
+    kmp_uint32 depth;
+    // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
+    if (TCR_1(machine_hierarchy.uninitialized))
+        machine_hierarchy.init(NULL, nproc);
+    // Adjust the hierarchy in case num threads exceeds original
+    if (nproc > machine_hierarchy.base_num_threads)
+        machine_hierarchy.resize(nproc);
+
+    depth = machine_hierarchy.depth;
+    KMP_DEBUG_ASSERT(depth > 0);
+    // The loop below adjusts the depth in the case of a resize
+    while (nproc > machine_hierarchy.skipPerLevel[depth-1])
+        depth++;
+
+    thr_bar->depth = depth;
+    thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
+    thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
+}
 
 #if KMP_AFFINITY_SUPPORTED
 
@@ -108,393 +136,6 @@ __kmp_affinity_entire_machine_mask(kmp_a
     }
 }
 
-
-//
-// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
-// functions.
-//
-// The icc codegen emits sections with extremely long names, of the form
-// ".gnu.linkonce.<mangled_name>".  There seems to have been a linker bug
-// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
-// some sort of memory corruption or table overflow that is triggered by
-// these long strings.  I checked the latest version of the linker -
-// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
-// fixed.
-//
-// Unfortunately, my attempts to reproduce it in a smaller example have
-// failed - I'm not sure what the prospects are of getting it fixed
-// properly - but we need a reproducer smaller than all of libomp.
-//
-// Work around the problem by avoiding inline constructors in such builds.
-// We do this for all platforms, not just Linux* OS - non-inline functions are
-// more debuggable and provide better coverage into than inline functions.
-// Use inline functions in shipping libs, for performance.
-//
-
-# if !defined(KMP_DEBUG) && !defined(COVER)
-
-class Address {
-public:
-    static const unsigned maxDepth = 32;
-    unsigned labels[maxDepth];
-    unsigned childNums[maxDepth];
-    unsigned depth;
-    unsigned leader;
-    Address(unsigned _depth)
-      : depth(_depth), leader(FALSE) {
-    }
-    Address &operator=(const Address &b) {
-        depth = b.depth;
-        for (unsigned i = 0; i < depth; i++) {
-            labels[i] = b.labels[i];
-            childNums[i] = b.childNums[i];
-        }
-        leader = FALSE;
-        return *this;
-    }
-    bool operator==(const Address &b) const {
-        if (depth != b.depth)
-            return false;
-        for (unsigned i = 0; i < depth; i++)
-            if(labels[i] != b.labels[i])
-                return false;
-        return true;
-    }
-    bool isClose(const Address &b, int level) const {
-        if (depth != b.depth)
-            return false;
-        if ((unsigned)level >= depth)
-            return true;
-        for (unsigned i = 0; i < (depth - level); i++)
-            if(labels[i] != b.labels[i])
-                return false;
-        return true;
-    }
-    bool operator!=(const Address &b) const {
-        return !operator==(b);
-    }
-};
-
-class AddrUnsPair {
-public:
-    Address first;
-    unsigned second;
-    AddrUnsPair(Address _first, unsigned _second)
-      : first(_first), second(_second) {
-    }
-    AddrUnsPair &operator=(const AddrUnsPair &b)
-    {
-        first = b.first;
-        second = b.second;
-        return *this;
-    }
-};
-
-# else
-
-class Address {
-public:
-    static const unsigned maxDepth = 32;
-    unsigned labels[maxDepth];
-    unsigned childNums[maxDepth];
-    unsigned depth;
-    unsigned leader;
-    Address(unsigned _depth);
-    Address &operator=(const Address &b);
-    bool operator==(const Address &b) const;
-    bool isClose(const Address &b, int level) const;
-    bool operator!=(const Address &b) const;
-};
-
-Address::Address(unsigned _depth)
-{
-    depth = _depth;
-    leader = FALSE;
-}
-
-Address &Address::operator=(const Address &b) {
-    depth = b.depth;
-    for (unsigned i = 0; i < depth; i++) {
-        labels[i] = b.labels[i];
-        childNums[i] = b.childNums[i];
-    }
-    leader = FALSE;
-    return *this;
-}
-
-bool Address::operator==(const Address &b) const {
-    if (depth != b.depth)
-        return false;
-    for (unsigned i = 0; i < depth; i++)
-        if(labels[i] != b.labels[i])
-            return false;
-    return true;
-}
-
-bool Address::isClose(const Address &b, int level) const {
-    if (depth != b.depth)
-        return false;
-    if ((unsigned)level >= depth)
-        return true;
-    for (unsigned i = 0; i < (depth - level); i++)
-        if(labels[i] != b.labels[i])
-            return false;
-    return true;
-}
-
-bool Address::operator!=(const Address &b) const {
-    return !operator==(b);
-}
-
-class AddrUnsPair {
-public:
-    Address first;
-    unsigned second;
-    AddrUnsPair(Address _first, unsigned _second);
-    AddrUnsPair &operator=(const AddrUnsPair &b);
-};
-
-AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
-  : first(_first), second(_second)
-{
-}
-
-AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
-{
-    first = b.first;
-    second = b.second;
-    return *this;
-}
-
-# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
-
-
-static int
-__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
-{
-    const Address *aa = (const Address *)&(((AddrUnsPair *)a)
-      ->first);
-    const Address *bb = (const Address *)&(((AddrUnsPair *)b)
-      ->first);
-    unsigned depth = aa->depth;
-    unsigned i;
-    KMP_DEBUG_ASSERT(depth == bb->depth);
-    for (i  = 0; i < depth; i++) {
-        if (aa->labels[i] < bb->labels[i]) return -1;
-        if (aa->labels[i] > bb->labels[i]) return 1;
-    }
-    return 0;
-}
-
-
-static int
-__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
-{
-    const Address *aa = (const Address *)&(((AddrUnsPair *)a)
-      ->first);
-    const Address *bb = (const Address *)&(((AddrUnsPair *)b)
-      ->first);
-    unsigned depth = aa->depth;
-    unsigned i;
-    KMP_DEBUG_ASSERT(depth == bb->depth);
-    KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
-    KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
-    for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
-        int j = depth - i - 1;
-        if (aa->childNums[j] < bb->childNums[j]) return -1;
-        if (aa->childNums[j] > bb->childNums[j]) return 1;
-    }
-    for (; i < depth; i++) {
-        int j = i - __kmp_affinity_compact;
-        if (aa->childNums[j] < bb->childNums[j]) return -1;
-        if (aa->childNums[j] > bb->childNums[j]) return 1;
-    }
-    return 0;
-}
-
-/** A structure for holding machine-specific hierarchy info to be computed once at init.
-    This structure represents a mapping of threads to the actual machine hierarchy, or to
-    our best guess at what the hierarchy might be, for the purpose of performing an
-    efficient barrier.  In the worst case, when there is no machine hierarchy information,
-    it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
-class hierarchy_info {
-public:
-    /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
-    or socket, packages/node, nodes/machine, etc.  We don't want to get specific with
-    nomenclature.  When the machine is oversubscribed we add levels to duplicate the
-    hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
-    kmp_uint32 maxLevels;
-
-    /** This is specifically the depth of the machine configuration hierarchy, in terms of the
-        number of levels along the longest path from root to any leaf. It corresponds to the
-        number of entries in numPerLevel if we exclude all but one trailing 1. */
-    kmp_uint32 depth;
-    kmp_uint32 base_num_threads;
-    volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
-    volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
-
-    /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
-        node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
-        and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
-    kmp_uint32 *numPerLevel;
-    kmp_uint32 *skipPerLevel;
-
-    void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
-        int hier_depth = adr2os[0].first.depth;
-        int level = 0;
-        for (int i=hier_depth-1; i>=0; --i) {
-            int max = -1;
-            for (int j=0; j<num_addrs; ++j) {
-                int next = adr2os[j].first.childNums[i];
-                if (next > max) max = next;
-            }
-            numPerLevel[level] = max+1;
-            ++level;
-        }
-    }
-
-    hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
-
-    // TO FIX: This destructor causes a segfault in the library at shutdown.
-    //~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
-
-    void init(AddrUnsPair *adr2os, int num_addrs)
-    {
-        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
-        if (bool_result == 0) { // Wait for initialization
-            while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
-            return;
-        }
-        KMP_DEBUG_ASSERT(bool_result==1);
-
-        /* Added explicit initialization of the data fields here to prevent usage of dirty value
-           observed when static library is re-initialized multiple times (e.g. when
-           non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
-        depth = 1;
-        resizing = 0;
-        maxLevels = 7;
-        numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
-        skipPerLevel = &(numPerLevel[maxLevels]);
-        for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
-            numPerLevel[i] = 1;
-            skipPerLevel[i] = 1;
-        }
-
-        // Sort table by physical ID
-        if (adr2os) {
-            qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
-            deriveLevels(adr2os, num_addrs);
-        }
-        else {
-            numPerLevel[0] = 4;
-            numPerLevel[1] = num_addrs/4;
-            if (num_addrs%4) numPerLevel[1]++;
-        }
-
-        base_num_threads = num_addrs;
-        for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
-            if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
-                depth++;
-
-        kmp_uint32 branch = 4;
-        if (numPerLevel[0] == 1) branch = num_addrs/4;
-        if (branch<4) branch=4;
-        for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
-            while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
-                if (numPerLevel[d] & 1) numPerLevel[d]++;
-                numPerLevel[d] = numPerLevel[d] >> 1;
-                if (numPerLevel[d+1] == 1) depth++;
-                numPerLevel[d+1] = numPerLevel[d+1] << 1;
-            }
-            if(numPerLevel[0] == 1) {
-                branch = branch >> 1;
-                if (branch<4) branch = 4;
-            }
-        }
-
-        for (kmp_uint32 i=1; i<depth; ++i)
-            skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
-        // Fill in hierarchy in the case of oversubscription
-        for (kmp_uint32 i=depth; i<maxLevels; ++i)
-            skipPerLevel[i] = 2*skipPerLevel[i-1];
-
-        uninitialized = 0; // One writer
-
-    }
-
-    void resize(kmp_uint32 nproc)
-    {
-        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
-        if (bool_result == 0) { // Someone else is resizing
-            while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
-            return;
-        }
-        KMP_DEBUG_ASSERT(bool_result!=0);
-        KMP_DEBUG_ASSERT(nproc > base_num_threads);
-
-        // Calculate new max_levels
-        kmp_uint32 old_sz = skipPerLevel[depth-1];
-        kmp_uint32 incs = 0, old_maxLevels= maxLevels;
-        while (nproc > old_sz) {
-            old_sz *=2;
-            incs++;
-        }
-        maxLevels += incs;
-
-        // Resize arrays
-        kmp_uint32 *old_numPerLevel = numPerLevel;
-        kmp_uint32 *old_skipPerLevel = skipPerLevel;
-        numPerLevel = skipPerLevel = NULL;
-        numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
-        skipPerLevel = &(numPerLevel[maxLevels]);
-
-        // Copy old elements from old arrays
-        for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
-            numPerLevel[i] = old_numPerLevel[i];
-            skipPerLevel[i] = old_skipPerLevel[i];
-        }
-
-        // Init new elements in arrays to 1
-        for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
-            numPerLevel[i] = 1;
-            skipPerLevel[i] = 1;
-        }
-
-        // Free old arrays
-        __kmp_free(old_numPerLevel);
-
-        // Fill in oversubscription levels of hierarchy
-        for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
-            skipPerLevel[i] = 2*skipPerLevel[i-1];
-
-        base_num_threads = nproc;
-        resizing = 0; // One writer
-
-    }
-};
-
-static hierarchy_info machine_hierarchy;
-
-void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
-    kmp_uint32 depth;
-    // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
-    if (TCR_1(machine_hierarchy.uninitialized))
-        machine_hierarchy.init(NULL, nproc);
-    // Adjust the hierarchy in case num threads exceeds original 
-    if (nproc > machine_hierarchy.base_num_threads)
-        machine_hierarchy.resize(nproc);
-
-    depth = machine_hierarchy.depth;
-    KMP_DEBUG_ASSERT(depth > 0);
-    // The loop below adjusts the depth in the case of a resize
-    while (nproc > machine_hierarchy.skipPerLevel[depth-1])
-        depth++;
-
-    thr_bar->depth = depth;
-    thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
-    thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
-}
-
 //
 // When sorting by labels, __kmp_affinity_assign_child_nums() must first be
 // called to renumber the labels from [0..n] and place them into the child_num
@@ -4683,73 +4324,4 @@ void __kmp_balanced_affinity( int tid, i
     }
 }
 
-#else
-    // affinity not supported
-
-static const kmp_uint32 noaff_maxLevels=7;
-kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
-kmp_uint32 noaff_depth;
-kmp_uint8 noaff_leaf_kids;
-kmp_int8 noaff_uninitialized=1;
-
-void noaff_init(int nprocs)
-{
-    kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
-    if (result == 0) return; // Already initialized
-    else if (result == 2) { // Someone else is initializing
-        while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
-        return;
-    }
-    KMP_DEBUG_ASSERT(result==1);
-
-    kmp_uint32 numPerLevel[noaff_maxLevels];
-    noaff_depth = 1;
-    for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
-        numPerLevel[i] = 1;
-        noaff_skipPerLevel[i] = 1;
-    }
-
-    numPerLevel[0] = 4;
-    numPerLevel[1] = nprocs/4;
-    if (nprocs%4) numPerLevel[1]++;
-
-    for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
-        if (numPerLevel[i] != 1 || noaff_depth > 1) // only count one top-level '1'
-            noaff_depth++;
-
-    kmp_uint32 branch = 4;
-    if (numPerLevel[0] == 1) branch = nprocs/4;
-    if (branch<4) branch=4;
-    for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
-        while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
-            if (numPerLevel[d] & 1) numPerLevel[d]++;
-            numPerLevel[d] = numPerLevel[d] >> 1;
-            if (numPerLevel[d+1] == 1) noaff_depth++;
-            numPerLevel[d+1] = numPerLevel[d+1] << 1;
-        }
-        if(numPerLevel[0] == 1) {
-            branch = branch >> 1;
-            if (branch<4) branch = 4;
-        }
-    }
-
-    for (kmp_uint32 i=1; i<noaff_depth; ++i)
-        noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
-    // Fill in hierarchy in the case of oversubscription
-    for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
-        noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
-    noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
-    noaff_uninitialized = 0; // One writer
-
-}
-
-void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
-    if (noaff_uninitialized)
-        noaff_init(nproc);
-
-    thr_bar->depth = noaff_depth;
-    thr_bar->base_leaf_kids = noaff_leaf_kids;
-    thr_bar->skip_per_level = noaff_skipPerLevel;
-}
-
 #endif // KMP_AFFINITY_SUPPORTED

Added: openmp/trunk/runtime/src/kmp_affinity.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.h?rev=247326&view=auto
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.h (added)
+++ openmp/trunk/runtime/src/kmp_affinity.h Thu Sep 10 14:22:07 2015
@@ -0,0 +1,280 @@
+/*
+ * kmp_affinity.h -- header for affinity management
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_AFFINITY_H
+#define KMP_AFFINITY_H
+
+extern int __kmp_affinity_compact; /* Affinity 'compact' value */
+
+class Address {
+public:
+    static const unsigned maxDepth = 32;
+    unsigned labels[maxDepth];
+    unsigned childNums[maxDepth];
+    unsigned depth;
+    unsigned leader;
+    Address(unsigned _depth)
+      : depth(_depth), leader(FALSE) {
+    }
+    Address &operator=(const Address &b) {
+        depth = b.depth;
+        for (unsigned i = 0; i < depth; i++) {
+            labels[i] = b.labels[i];
+            childNums[i] = b.childNums[i];
+        }
+        leader = FALSE;
+        return *this;
+    }
+    bool operator==(const Address &b) const {
+        if (depth != b.depth)
+            return false;
+        for (unsigned i = 0; i < depth; i++)
+            if(labels[i] != b.labels[i])
+                return false;
+        return true;
+    }
+    bool isClose(const Address &b, int level) const {
+        if (depth != b.depth)
+            return false;
+        if ((unsigned)level >= depth)
+            return true;
+        for (unsigned i = 0; i < (depth - level); i++)
+            if(labels[i] != b.labels[i])
+                return false;
+        return true;
+    }
+    bool operator!=(const Address &b) const {
+        return !operator==(b);
+    }
+};
+
+class AddrUnsPair {
+public:
+    Address first;
+    unsigned second;
+    AddrUnsPair(Address _first, unsigned _second)
+      : first(_first), second(_second) {
+    }
+    AddrUnsPair &operator=(const AddrUnsPair &b)
+    {
+        first = b.first;
+        second = b.second;
+        return *this;
+    }
+};
+
+
+static int
+__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
+{
+    const Address *aa = (const Address *)&(((AddrUnsPair *)a)
+      ->first);
+    const Address *bb = (const Address *)&(((AddrUnsPair *)b)
+      ->first);
+    unsigned depth = aa->depth;
+    unsigned i;
+    KMP_DEBUG_ASSERT(depth == bb->depth);
+    for (i  = 0; i < depth; i++) {
+        if (aa->labels[i] < bb->labels[i]) return -1;
+        if (aa->labels[i] > bb->labels[i]) return 1;
+    }
+    return 0;
+}
+
+
+static int
+__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
+{
+    const Address *aa = (const Address *)&(((AddrUnsPair *)a)
+      ->first);
+    const Address *bb = (const Address *)&(((AddrUnsPair *)b)
+      ->first);
+    unsigned depth = aa->depth;
+    unsigned i;
+    KMP_DEBUG_ASSERT(depth == bb->depth);
+    KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
+    KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
+    for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
+        int j = depth - i - 1;
+        if (aa->childNums[j] < bb->childNums[j]) return -1;
+        if (aa->childNums[j] > bb->childNums[j]) return 1;
+    }
+    for (; i < depth; i++) {
+        int j = i - __kmp_affinity_compact;
+        if (aa->childNums[j] < bb->childNums[j]) return -1;
+        if (aa->childNums[j] > bb->childNums[j]) return 1;
+    }
+    return 0;
+}
+
+
+/** A structure for holding machine-specific hierarchy info to be computed once at init. */
+class hierarchy_info {
+public:
+    /** Good default values for number of leaves and branching factor, given no affinity information.
+	Behaves a bit like hyper barrier. */
+    static const kmp_uint32 maxLeaves=4;
+    static const kmp_uint32 minBranch=4;
+    /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
+        etc.  We don't want to get specific with nomenclature */
+    kmp_uint32 maxLevels;
+
+    /** This is specifically the depth of the machine configuration hierarchy, in terms of the
+        number of levels along the longest path from root to any leaf. It corresponds to the
+        number of entries in numPerLevel if we exclude all but one trailing 1. */
+    kmp_uint32 depth;
+    kmp_uint32 base_num_threads;
+    enum init_status { initialized=0, not_initialized=1, initializing=2 };
+    volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
+    volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
+
+    /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
+        node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
+        and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
+    kmp_uint32 *numPerLevel;
+    kmp_uint32 *skipPerLevel;
+
+    void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
+        int hier_depth = adr2os[0].first.depth;
+        int level = 0;
+        for (int i=hier_depth-1; i>=0; --i) {
+            int max = -1;
+            for (int j=0; j<num_addrs; ++j) {
+                int next = adr2os[j].first.childNums[i];
+                if (next > max) max = next;
+            }
+            numPerLevel[level] = max+1;
+            ++level;
+        }
+    }
+
+    hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
+
+    void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
+
+    void init(AddrUnsPair *adr2os, int num_addrs)
+    {
+        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
+        if (bool_result == 0) { // Wait for initialization
+            while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
+            return;
+        }
+        KMP_DEBUG_ASSERT(bool_result==1);
+
+        /* Added explicit initialization of the data fields here to prevent usage of dirty value
+           observed when static library is re-initialized multiple times (e.g. when
+           non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
+        depth = 1;
+        resizing = 0;
+        maxLevels = 7;
+        numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+        skipPerLevel = &(numPerLevel[maxLevels]);
+        for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+            numPerLevel[i] = 1;
+            skipPerLevel[i] = 1;
+        }
+
+        // Sort table by physical ID
+        if (adr2os) {
+            qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
+            deriveLevels(adr2os, num_addrs);
+        }
+        else {
+            numPerLevel[0] = maxLeaves;
+            numPerLevel[1] = num_addrs/maxLeaves;
+            if (num_addrs%maxLeaves) numPerLevel[1]++;
+        }
+
+        base_num_threads = num_addrs;
+        for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
+            if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
+                depth++;
+
+        kmp_uint32 branch = minBranch;
+        if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
+        if (branch<minBranch) branch=minBranch;
+        for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
+            while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
+                if (numPerLevel[d] & 1) numPerLevel[d]++;
+                numPerLevel[d] = numPerLevel[d] >> 1;
+                if (numPerLevel[d+1] == 1) depth++;
+                numPerLevel[d+1] = numPerLevel[d+1] << 1;
+            }
+            if(numPerLevel[0] == 1) {
+                branch = branch >> 1;
+                if (branch<4) branch = minBranch;
+            }
+        }
+
+        for (kmp_uint32 i=1; i<depth; ++i)
+            skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
+        // Fill in hierarchy in the case of oversubscription
+        for (kmp_uint32 i=depth; i<maxLevels; ++i)
+            skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+        uninitialized = initialized; // One writer
+
+    }
+
+    void resize(kmp_uint32 nproc)
+    {
+        kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+        if (bool_result == 0) { // Someone else is resizing
+            while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
+            return;
+        }
+        KMP_DEBUG_ASSERT(bool_result!=0);
+        KMP_DEBUG_ASSERT(nproc > base_num_threads);
+
+        // Calculate new max_levels
+        kmp_uint32 old_sz = skipPerLevel[depth-1];
+        kmp_uint32 incs = 0, old_maxLevels= maxLevels;
+        while (nproc > old_sz) {
+            old_sz *=2;
+            incs++;
+        }
+        maxLevels += incs;
+
+        // Resize arrays
+        kmp_uint32 *old_numPerLevel = numPerLevel;
+        kmp_uint32 *old_skipPerLevel = skipPerLevel;
+        numPerLevel = skipPerLevel = NULL;
+        numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+        skipPerLevel = &(numPerLevel[maxLevels]);
+
+        // Copy old elements from old arrays
+        for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+            numPerLevel[i] = old_numPerLevel[i];
+            skipPerLevel[i] = old_skipPerLevel[i];
+        }
+
+        // Init new elements in arrays to 1
+        for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+            numPerLevel[i] = 1;
+            skipPerLevel[i] = 1;
+        }
+
+        // Free old arrays
+        __kmp_free(old_numPerLevel);
+
+        // Fill in oversubscription levels of hierarchy
+        for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
+            skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+        base_num_threads = nproc;
+        resizing = 0; // One writer
+
+    }
+};
+#endif // KMP_AFFINITY_H

Propchange: openmp/trunk/runtime/src/kmp_affinity.h
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: openmp/trunk/runtime/src/kmp_affinity.h
------------------------------------------------------------------------------
    svn:keywords = Author Date Id Rev URL

Propchange: openmp/trunk/runtime/src/kmp_affinity.h
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=247326&r1=247325&r2=247326&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Thu Sep 10 14:22:07 2015
@@ -7286,6 +7286,7 @@ __kmp_cleanup( void )
 #if KMP_AFFINITY_SUPPORTED
         __kmp_affinity_uninitialize();
 #endif /* KMP_AFFINITY_SUPPORTED */
+        __kmp_cleanup_hierarchy();
         TCW_4(__kmp_init_middle, FALSE);
     }
 




More information about the Openmp-commits mailing list