[Openmp-commits] [openmp] r247326 - Cleanup of affinity hierarchy code.
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Thu Sep 10 12:22:08 PDT 2015
Author: jlpeyton
Date: Thu Sep 10 14:22:07 2015
New Revision: 247326
URL: http://llvm.org/viewvc/llvm-project?rev=247326&view=rev
Log:
Cleanup of affinity hierarchy code.
Some of this is improvement to code suggested by Hal Finkel. Four changes here:
1.Cleanup of hierarchy code to handle all hierarchy cases whether affinity is available or not
2.Separated this and other classes and common functions out to a header file
3.Added a destructor-like fini function for the hierarchy (and call in __kmp_cleanup)
4.Remove some redundant code that is hopefully no longer needed
Differential Revision: http://reviews.llvm.org/D12449
Added:
openmp/trunk/runtime/src/kmp_affinity.h (with props)
Modified:
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_affinity.cpp
openmp/trunk/runtime/src/kmp_runtime.c
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=247326&r1=247325&r2=247326&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Sep 10 14:22:07 2015
@@ -2984,6 +2984,7 @@ extern int __kmp_aux_get_affinity_mask_p
extern void __kmp_balanced_affinity( int tid, int team_size );
#endif /* KMP_AFFINITY_SUPPORTED */
+extern void __kmp_cleanup_hierarchy();
extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=247326&r1=247325&r2=247326&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Thu Sep 10 14:22:07 2015
@@ -18,6 +18,34 @@
#include "kmp_io.h"
#include "kmp_str.h"
#include "kmp_wrapper_getpid.h"
+#include "kmp_affinity.h"
+
+// Store the real or imagined machine hierarchy here
+static hierarchy_info machine_hierarchy;
+
+void __kmp_cleanup_hierarchy() {
+ machine_hierarchy.fini();
+}
+
+void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
+ kmp_uint32 depth;
+ // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
+ if (TCR_1(machine_hierarchy.uninitialized))
+ machine_hierarchy.init(NULL, nproc);
+ // Adjust the hierarchy in case num threads exceeds original
+ if (nproc > machine_hierarchy.base_num_threads)
+ machine_hierarchy.resize(nproc);
+
+ depth = machine_hierarchy.depth;
+ KMP_DEBUG_ASSERT(depth > 0);
+ // The loop below adjusts the depth in the case of a resize
+ while (nproc > machine_hierarchy.skipPerLevel[depth-1])
+ depth++;
+
+ thr_bar->depth = depth;
+ thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
+ thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
+}
#if KMP_AFFINITY_SUPPORTED
@@ -108,393 +136,6 @@ __kmp_affinity_entire_machine_mask(kmp_a
}
}
-
-//
-// In Linux* OS debug & cover (-O0) builds, we need to avoid inline member
-// functions.
-//
-// The icc codegen emits sections with extremely long names, of the form
-// ".gnu.linkonce.<mangled_name>". There seems to have been a linker bug
-// introduced between GNU ld version 2.14.90.0.4 and 2.15.92.0.2 involving
-// some sort of memory corruption or table overflow that is triggered by
-// these long strings. I checked the latest version of the linker -
-// GNU ld (Linux* OS/GNU Binutils) 2.18.50.0.7.20080422 - and the bug is not
-// fixed.
-//
-// Unfortunately, my attempts to reproduce it in a smaller example have
-// failed - I'm not sure what the prospects are of getting it fixed
-// properly - but we need a reproducer smaller than all of libomp.
-//
-// Work around the problem by avoiding inline constructors in such builds.
-// We do this for all platforms, not just Linux* OS - non-inline functions are
-// more debuggable and provide better coverage into than inline functions.
-// Use inline functions in shipping libs, for performance.
-//
-
-# if !defined(KMP_DEBUG) && !defined(COVER)
-
-class Address {
-public:
- static const unsigned maxDepth = 32;
- unsigned labels[maxDepth];
- unsigned childNums[maxDepth];
- unsigned depth;
- unsigned leader;
- Address(unsigned _depth)
- : depth(_depth), leader(FALSE) {
- }
- Address &operator=(const Address &b) {
- depth = b.depth;
- for (unsigned i = 0; i < depth; i++) {
- labels[i] = b.labels[i];
- childNums[i] = b.childNums[i];
- }
- leader = FALSE;
- return *this;
- }
- bool operator==(const Address &b) const {
- if (depth != b.depth)
- return false;
- for (unsigned i = 0; i < depth; i++)
- if(labels[i] != b.labels[i])
- return false;
- return true;
- }
- bool isClose(const Address &b, int level) const {
- if (depth != b.depth)
- return false;
- if ((unsigned)level >= depth)
- return true;
- for (unsigned i = 0; i < (depth - level); i++)
- if(labels[i] != b.labels[i])
- return false;
- return true;
- }
- bool operator!=(const Address &b) const {
- return !operator==(b);
- }
-};
-
-class AddrUnsPair {
-public:
- Address first;
- unsigned second;
- AddrUnsPair(Address _first, unsigned _second)
- : first(_first), second(_second) {
- }
- AddrUnsPair &operator=(const AddrUnsPair &b)
- {
- first = b.first;
- second = b.second;
- return *this;
- }
-};
-
-# else
-
-class Address {
-public:
- static const unsigned maxDepth = 32;
- unsigned labels[maxDepth];
- unsigned childNums[maxDepth];
- unsigned depth;
- unsigned leader;
- Address(unsigned _depth);
- Address &operator=(const Address &b);
- bool operator==(const Address &b) const;
- bool isClose(const Address &b, int level) const;
- bool operator!=(const Address &b) const;
-};
-
-Address::Address(unsigned _depth)
-{
- depth = _depth;
- leader = FALSE;
-}
-
-Address &Address::operator=(const Address &b) {
- depth = b.depth;
- for (unsigned i = 0; i < depth; i++) {
- labels[i] = b.labels[i];
- childNums[i] = b.childNums[i];
- }
- leader = FALSE;
- return *this;
-}
-
-bool Address::operator==(const Address &b) const {
- if (depth != b.depth)
- return false;
- for (unsigned i = 0; i < depth; i++)
- if(labels[i] != b.labels[i])
- return false;
- return true;
-}
-
-bool Address::isClose(const Address &b, int level) const {
- if (depth != b.depth)
- return false;
- if ((unsigned)level >= depth)
- return true;
- for (unsigned i = 0; i < (depth - level); i++)
- if(labels[i] != b.labels[i])
- return false;
- return true;
-}
-
-bool Address::operator!=(const Address &b) const {
- return !operator==(b);
-}
-
-class AddrUnsPair {
-public:
- Address first;
- unsigned second;
- AddrUnsPair(Address _first, unsigned _second);
- AddrUnsPair &operator=(const AddrUnsPair &b);
-};
-
-AddrUnsPair::AddrUnsPair(Address _first, unsigned _second)
- : first(_first), second(_second)
-{
-}
-
-AddrUnsPair &AddrUnsPair::operator=(const AddrUnsPair &b)
-{
- first = b.first;
- second = b.second;
- return *this;
-}
-
-# endif /* !defined(KMP_DEBUG) && !defined(COVER) */
-
-
-static int
-__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
-{
- const Address *aa = (const Address *)&(((AddrUnsPair *)a)
- ->first);
- const Address *bb = (const Address *)&(((AddrUnsPair *)b)
- ->first);
- unsigned depth = aa->depth;
- unsigned i;
- KMP_DEBUG_ASSERT(depth == bb->depth);
- for (i = 0; i < depth; i++) {
- if (aa->labels[i] < bb->labels[i]) return -1;
- if (aa->labels[i] > bb->labels[i]) return 1;
- }
- return 0;
-}
-
-
-static int
-__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
-{
- const Address *aa = (const Address *)&(((AddrUnsPair *)a)
- ->first);
- const Address *bb = (const Address *)&(((AddrUnsPair *)b)
- ->first);
- unsigned depth = aa->depth;
- unsigned i;
- KMP_DEBUG_ASSERT(depth == bb->depth);
- KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
- KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
- for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
- int j = depth - i - 1;
- if (aa->childNums[j] < bb->childNums[j]) return -1;
- if (aa->childNums[j] > bb->childNums[j]) return 1;
- }
- for (; i < depth; i++) {
- int j = i - __kmp_affinity_compact;
- if (aa->childNums[j] < bb->childNums[j]) return -1;
- if (aa->childNums[j] > bb->childNums[j]) return 1;
- }
- return 0;
-}
-
-/** A structure for holding machine-specific hierarchy info to be computed once at init.
- This structure represents a mapping of threads to the actual machine hierarchy, or to
- our best guess at what the hierarchy might be, for the purpose of performing an
- efficient barrier. In the worst case, when there is no machine hierarchy information,
- it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
-class hierarchy_info {
-public:
- /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
- or socket, packages/node, nodes/machine, etc. We don't want to get specific with
- nomenclature. When the machine is oversubscribed we add levels to duplicate the
- hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
- kmp_uint32 maxLevels;
-
- /** This is specifically the depth of the machine configuration hierarchy, in terms of the
- number of levels along the longest path from root to any leaf. It corresponds to the
- number of entries in numPerLevel if we exclude all but one trailing 1. */
- kmp_uint32 depth;
- kmp_uint32 base_num_threads;
- volatile kmp_int8 uninitialized; // 0=initialized, 1=uninitialized, 2=initialization in progress
- volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
-
- /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
- node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
- and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
- kmp_uint32 *numPerLevel;
- kmp_uint32 *skipPerLevel;
-
- void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
- int hier_depth = adr2os[0].first.depth;
- int level = 0;
- for (int i=hier_depth-1; i>=0; --i) {
- int max = -1;
- for (int j=0; j<num_addrs; ++j) {
- int next = adr2os[j].first.childNums[i];
- if (next > max) max = next;
- }
- numPerLevel[level] = max+1;
- ++level;
- }
- }
-
- hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
-
- // TO FIX: This destructor causes a segfault in the library at shutdown.
- //~hierarchy_info() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
-
- void init(AddrUnsPair *adr2os, int num_addrs)
- {
- kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
- if (bool_result == 0) { // Wait for initialization
- while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
- return;
- }
- KMP_DEBUG_ASSERT(bool_result==1);
-
- /* Added explicit initialization of the data fields here to prevent usage of dirty value
- observed when static library is re-initialized multiple times (e.g. when
- non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
- depth = 1;
- resizing = 0;
- maxLevels = 7;
- numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
- skipPerLevel = &(numPerLevel[maxLevels]);
- for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- skipPerLevel[i] = 1;
- }
-
- // Sort table by physical ID
- if (adr2os) {
- qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
- deriveLevels(adr2os, num_addrs);
- }
- else {
- numPerLevel[0] = 4;
- numPerLevel[1] = num_addrs/4;
- if (num_addrs%4) numPerLevel[1]++;
- }
-
- base_num_threads = num_addrs;
- for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
- if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
- depth++;
-
- kmp_uint32 branch = 4;
- if (numPerLevel[0] == 1) branch = num_addrs/4;
- if (branch<4) branch=4;
- for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
- while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
- if (numPerLevel[d] & 1) numPerLevel[d]++;
- numPerLevel[d] = numPerLevel[d] >> 1;
- if (numPerLevel[d+1] == 1) depth++;
- numPerLevel[d+1] = numPerLevel[d+1] << 1;
- }
- if(numPerLevel[0] == 1) {
- branch = branch >> 1;
- if (branch<4) branch = 4;
- }
- }
-
- for (kmp_uint32 i=1; i<depth; ++i)
- skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
- // Fill in hierarchy in the case of oversubscription
- for (kmp_uint32 i=depth; i<maxLevels; ++i)
- skipPerLevel[i] = 2*skipPerLevel[i-1];
-
- uninitialized = 0; // One writer
-
- }
-
- void resize(kmp_uint32 nproc)
- {
- kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
- if (bool_result == 0) { // Someone else is resizing
- while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
- return;
- }
- KMP_DEBUG_ASSERT(bool_result!=0);
- KMP_DEBUG_ASSERT(nproc > base_num_threads);
-
- // Calculate new max_levels
- kmp_uint32 old_sz = skipPerLevel[depth-1];
- kmp_uint32 incs = 0, old_maxLevels= maxLevels;
- while (nproc > old_sz) {
- old_sz *=2;
- incs++;
- }
- maxLevels += incs;
-
- // Resize arrays
- kmp_uint32 *old_numPerLevel = numPerLevel;
- kmp_uint32 *old_skipPerLevel = skipPerLevel;
- numPerLevel = skipPerLevel = NULL;
- numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
- skipPerLevel = &(numPerLevel[maxLevels]);
-
- // Copy old elements from old arrays
- for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = old_numPerLevel[i];
- skipPerLevel[i] = old_skipPerLevel[i];
- }
-
- // Init new elements in arrays to 1
- for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- skipPerLevel[i] = 1;
- }
-
- // Free old arrays
- __kmp_free(old_numPerLevel);
-
- // Fill in oversubscription levels of hierarchy
- for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
- skipPerLevel[i] = 2*skipPerLevel[i-1];
-
- base_num_threads = nproc;
- resizing = 0; // One writer
-
- }
-};
-
-static hierarchy_info machine_hierarchy;
-
-void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
- kmp_uint32 depth;
- // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
- if (TCR_1(machine_hierarchy.uninitialized))
- machine_hierarchy.init(NULL, nproc);
- // Adjust the hierarchy in case num threads exceeds original
- if (nproc > machine_hierarchy.base_num_threads)
- machine_hierarchy.resize(nproc);
-
- depth = machine_hierarchy.depth;
- KMP_DEBUG_ASSERT(depth > 0);
- // The loop below adjusts the depth in the case of a resize
- while (nproc > machine_hierarchy.skipPerLevel[depth-1])
- depth++;
-
- thr_bar->depth = depth;
- thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
- thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
-}
-
//
// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
// called to renumber the labels from [0..n] and place them into the child_num
@@ -4683,73 +4324,4 @@ void __kmp_balanced_affinity( int tid, i
}
}
-#else
- // affinity not supported
-
-static const kmp_uint32 noaff_maxLevels=7;
-kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
-kmp_uint32 noaff_depth;
-kmp_uint8 noaff_leaf_kids;
-kmp_int8 noaff_uninitialized=1;
-
-void noaff_init(int nprocs)
-{
- kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
- if (result == 0) return; // Already initialized
- else if (result == 2) { // Someone else is initializing
- while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
- return;
- }
- KMP_DEBUG_ASSERT(result==1);
-
- kmp_uint32 numPerLevel[noaff_maxLevels];
- noaff_depth = 1;
- for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- noaff_skipPerLevel[i] = 1;
- }
-
- numPerLevel[0] = 4;
- numPerLevel[1] = nprocs/4;
- if (nprocs%4) numPerLevel[1]++;
-
- for (int i=noaff_maxLevels-1; i>=0; --i) // count non-empty levels to get depth
- if (numPerLevel[i] != 1 || noaff_depth > 1) // only count one top-level '1'
- noaff_depth++;
-
- kmp_uint32 branch = 4;
- if (numPerLevel[0] == 1) branch = nprocs/4;
- if (branch<4) branch=4;
- for (kmp_uint32 d=0; d<noaff_depth-1; ++d) { // optimize hierarchy width
- while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
- if (numPerLevel[d] & 1) numPerLevel[d]++;
- numPerLevel[d] = numPerLevel[d] >> 1;
- if (numPerLevel[d+1] == 1) noaff_depth++;
- numPerLevel[d+1] = numPerLevel[d+1] << 1;
- }
- if(numPerLevel[0] == 1) {
- branch = branch >> 1;
- if (branch<4) branch = 4;
- }
- }
-
- for (kmp_uint32 i=1; i<noaff_depth; ++i)
- noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
- // Fill in hierarchy in the case of oversubscription
- for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
- noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
- noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
- noaff_uninitialized = 0; // One writer
-
-}
-
-void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
- if (noaff_uninitialized)
- noaff_init(nproc);
-
- thr_bar->depth = noaff_depth;
- thr_bar->base_leaf_kids = noaff_leaf_kids;
- thr_bar->skip_per_level = noaff_skipPerLevel;
-}
-
#endif // KMP_AFFINITY_SUPPORTED
Added: openmp/trunk/runtime/src/kmp_affinity.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.h?rev=247326&view=auto
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.h (added)
+++ openmp/trunk/runtime/src/kmp_affinity.h Thu Sep 10 14:22:07 2015
@@ -0,0 +1,280 @@
+/*
+ * kmp_affinity.h -- header for affinity management
+ */
+
+
+//===----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef KMP_AFFINITY_H
+#define KMP_AFFINITY_H
+
+extern int __kmp_affinity_compact; /* Affinity 'compact' value */
+
+class Address {
+public:
+ static const unsigned maxDepth = 32;
+ unsigned labels[maxDepth];
+ unsigned childNums[maxDepth];
+ unsigned depth;
+ unsigned leader;
+ Address(unsigned _depth)
+ : depth(_depth), leader(FALSE) {
+ }
+ Address &operator=(const Address &b) {
+ depth = b.depth;
+ for (unsigned i = 0; i < depth; i++) {
+ labels[i] = b.labels[i];
+ childNums[i] = b.childNums[i];
+ }
+ leader = FALSE;
+ return *this;
+ }
+ bool operator==(const Address &b) const {
+ if (depth != b.depth)
+ return false;
+ for (unsigned i = 0; i < depth; i++)
+ if(labels[i] != b.labels[i])
+ return false;
+ return true;
+ }
+ bool isClose(const Address &b, int level) const {
+ if (depth != b.depth)
+ return false;
+ if ((unsigned)level >= depth)
+ return true;
+ for (unsigned i = 0; i < (depth - level); i++)
+ if(labels[i] != b.labels[i])
+ return false;
+ return true;
+ }
+ bool operator!=(const Address &b) const {
+ return !operator==(b);
+ }
+};
+
+class AddrUnsPair {
+public:
+ Address first;
+ unsigned second;
+ AddrUnsPair(Address _first, unsigned _second)
+ : first(_first), second(_second) {
+ }
+ AddrUnsPair &operator=(const AddrUnsPair &b)
+ {
+ first = b.first;
+ second = b.second;
+ return *this;
+ }
+};
+
+
+static int
+__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
+{
+ const Address *aa = (const Address *)&(((AddrUnsPair *)a)
+ ->first);
+ const Address *bb = (const Address *)&(((AddrUnsPair *)b)
+ ->first);
+ unsigned depth = aa->depth;
+ unsigned i;
+ KMP_DEBUG_ASSERT(depth == bb->depth);
+ for (i = 0; i < depth; i++) {
+ if (aa->labels[i] < bb->labels[i]) return -1;
+ if (aa->labels[i] > bb->labels[i]) return 1;
+ }
+ return 0;
+}
+
+
+static int
+__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
+{
+ const Address *aa = (const Address *)&(((AddrUnsPair *)a)
+ ->first);
+ const Address *bb = (const Address *)&(((AddrUnsPair *)b)
+ ->first);
+ unsigned depth = aa->depth;
+ unsigned i;
+ KMP_DEBUG_ASSERT(depth == bb->depth);
+ KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
+ KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
+ for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
+ int j = depth - i - 1;
+ if (aa->childNums[j] < bb->childNums[j]) return -1;
+ if (aa->childNums[j] > bb->childNums[j]) return 1;
+ }
+ for (; i < depth; i++) {
+ int j = i - __kmp_affinity_compact;
+ if (aa->childNums[j] < bb->childNums[j]) return -1;
+ if (aa->childNums[j] > bb->childNums[j]) return 1;
+ }
+ return 0;
+}
+
+
+/** A structure for holding machine-specific hierarchy info to be computed once at init. */
+class hierarchy_info {
+public:
+ /** Good default values for number of leaves and branching factor, given no affinity information.
+ Behaves a bit like hyper barrier. */
+ static const kmp_uint32 maxLeaves=4;
+ static const kmp_uint32 minBranch=4;
+ /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
+ etc. We don't want to get specific with nomenclature */
+ kmp_uint32 maxLevels;
+
+ /** This is specifically the depth of the machine configuration hierarchy, in terms of the
+ number of levels along the longest path from root to any leaf. It corresponds to the
+ number of entries in numPerLevel if we exclude all but one trailing 1. */
+ kmp_uint32 depth;
+ kmp_uint32 base_num_threads;
+ enum init_status { initialized=0, not_initialized=1, initializing=2 };
+ volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
+ volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
+
+ /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
+ node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
+ and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
+ kmp_uint32 *numPerLevel;
+ kmp_uint32 *skipPerLevel;
+
+ void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
+ int hier_depth = adr2os[0].first.depth;
+ int level = 0;
+ for (int i=hier_depth-1; i>=0; --i) {
+ int max = -1;
+ for (int j=0; j<num_addrs; ++j) {
+ int next = adr2os[j].first.childNums[i];
+ if (next > max) max = next;
+ }
+ numPerLevel[level] = max+1;
+ ++level;
+ }
+ }
+
+ hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
+
+ void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
+
+ void init(AddrUnsPair *adr2os, int num_addrs)
+ {
+ kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
+ if (bool_result == 0) { // Wait for initialization
+ while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
+ return;
+ }
+ KMP_DEBUG_ASSERT(bool_result==1);
+
+ /* Added explicit initialization of the data fields here to prevent usage of dirty value
+ observed when static library is re-initialized multiple times (e.g. when
+ non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
+ depth = 1;
+ resizing = 0;
+ maxLevels = 7;
+ numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+ skipPerLevel = &(numPerLevel[maxLevels]);
+ for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ skipPerLevel[i] = 1;
+ }
+
+ // Sort table by physical ID
+ if (adr2os) {
+ qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
+ deriveLevels(adr2os, num_addrs);
+ }
+ else {
+ numPerLevel[0] = maxLeaves;
+ numPerLevel[1] = num_addrs/maxLeaves;
+ if (num_addrs%maxLeaves) numPerLevel[1]++;
+ }
+
+ base_num_threads = num_addrs;
+ for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
+ if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
+ depth++;
+
+ kmp_uint32 branch = minBranch;
+ if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
+ if (branch<minBranch) branch=minBranch;
+ for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
+ while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
+ if (numPerLevel[d] & 1) numPerLevel[d]++;
+ numPerLevel[d] = numPerLevel[d] >> 1;
+ if (numPerLevel[d+1] == 1) depth++;
+ numPerLevel[d+1] = numPerLevel[d+1] << 1;
+ }
+ if(numPerLevel[0] == 1) {
+ branch = branch >> 1;
+ if (branch<4) branch = minBranch;
+ }
+ }
+
+ for (kmp_uint32 i=1; i<depth; ++i)
+ skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
+ // Fill in hierarchy in the case of oversubscription
+ for (kmp_uint32 i=depth; i<maxLevels; ++i)
+ skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+ uninitialized = initialized; // One writer
+
+ }
+
+ void resize(kmp_uint32 nproc)
+ {
+ kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+ if (bool_result == 0) { // Someone else is resizing
+ while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
+ return;
+ }
+ KMP_DEBUG_ASSERT(bool_result!=0);
+ KMP_DEBUG_ASSERT(nproc > base_num_threads);
+
+ // Calculate new max_levels
+ kmp_uint32 old_sz = skipPerLevel[depth-1];
+ kmp_uint32 incs = 0, old_maxLevels= maxLevels;
+ while (nproc > old_sz) {
+ old_sz *=2;
+ incs++;
+ }
+ maxLevels += incs;
+
+ // Resize arrays
+ kmp_uint32 *old_numPerLevel = numPerLevel;
+ kmp_uint32 *old_skipPerLevel = skipPerLevel;
+ numPerLevel = skipPerLevel = NULL;
+ numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
+ skipPerLevel = &(numPerLevel[maxLevels]);
+
+ // Copy old elements from old arrays
+ for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = old_numPerLevel[i];
+ skipPerLevel[i] = old_skipPerLevel[i];
+ }
+
+ // Init new elements in arrays to 1
+ for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ skipPerLevel[i] = 1;
+ }
+
+ // Free old arrays
+ __kmp_free(old_numPerLevel);
+
+ // Fill in oversubscription levels of hierarchy
+ for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
+ skipPerLevel[i] = 2*skipPerLevel[i-1];
+
+ base_num_threads = nproc;
+ resizing = 0; // One writer
+
+ }
+};
+#endif // KMP_AFFINITY_H
Propchange: openmp/trunk/runtime/src/kmp_affinity.h
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: openmp/trunk/runtime/src/kmp_affinity.h
------------------------------------------------------------------------------
svn:keywords = Author Date Id Rev URL
Propchange: openmp/trunk/runtime/src/kmp_affinity.h
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: openmp/trunk/runtime/src/kmp_runtime.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.c?rev=247326&r1=247325&r2=247326&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.c (original)
+++ openmp/trunk/runtime/src/kmp_runtime.c Thu Sep 10 14:22:07 2015
@@ -7286,6 +7286,7 @@ __kmp_cleanup( void )
#if KMP_AFFINITY_SUPPORTED
__kmp_affinity_uninitialize();
#endif /* KMP_AFFINITY_SUPPORTED */
+ __kmp_cleanup_hierarchy();
TCW_4(__kmp_init_middle, FALSE);
}
More information about the Openmp-commits
mailing list