[Openmp-commits] [openmp] r300220 - KMP_HW_SUBSET extended with NUMA support when HWLOC enabled
Andrey Churbanov via Openmp-commits
openmp-commits at lists.llvm.org
Thu Apr 13 10:15:08 PDT 2017
Author: achurbanov
Date: Thu Apr 13 12:15:07 2017
New Revision: 300220
URL: http://llvm.org/viewvc/llvm-project?rev=300220&view=rev
Log:
KMP_HW_SUBSET extended with NUMA support when HWLOC enabled
Differential Revision: https://reviews.llvm.org/D31600
Modified:
openmp/trunk/runtime/src/dllexports
openmp/trunk/runtime/src/i18n/en_US.txt
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_affinity.cpp
openmp/trunk/runtime/src/kmp_csupport.cpp
openmp/trunk/runtime/src/kmp_global.cpp
openmp/trunk/runtime/src/kmp_settings.cpp
Modified: openmp/trunk/runtime/src/dllexports
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/dllexports?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/dllexports (original)
+++ openmp/trunk/runtime/src/dllexports Thu Apr 13 12:15:07 2017
@@ -351,7 +351,7 @@ kmpc_set_defaults
%ifdef OMP_30
__kmpc_omp_taskyield 235
%endif # OMP_30
- __kmpc_place_threads 236
+# __kmpc_place_threads 236
%endif
# OpenMP 4.0 entry points
Modified: openmp/trunk/runtime/src/i18n/en_US.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/i18n/en_US.txt?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/i18n/en_US.txt (original)
+++ openmp/trunk/runtime/src/i18n/en_US.txt Thu Apr 13 12:15:07 2017
@@ -38,7 +38,7 @@ Language "English"
Country "USA"
LangId "1033"
Version "2"
-Revision "20160714"
+Revision "20161216"
@@ -388,8 +388,8 @@ OBSOLETE "%1$s: gran
EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored."
FutexNotSupported "futex system call not supported; %1$s=%2$s ignored."
AffGranUsing "%1$s: granularity=%2$s will be used."
-AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"Ns[@N],Nc[@N],Nt "
- "(nSockets at offset, nCores at offset, nTthreads per core)\"."
+AffHWSubsetInvalid "%1$s: invalid value \"%2$s\", valid format is \"N<item>[@N][,...][,Nt] "
+ "(<item> can be S, N, L2, C, T for Socket, NUMA Node, L2 Cache, Core, Thread)\"."
AffHWSubsetUnsupported "KMP_HW_SUBSET ignored: unsupported architecture."
AffHWSubsetManyCores "KMP_HW_SUBSET ignored: too many cores requested."
SyntaxErrorUsing "%1$s: syntax error, using %2$s."
@@ -411,6 +411,10 @@ AffHwlocErrorOccurred "%1$s: Hwlo
EnvSerialWarn "%1$s must be set prior to OpenMP runtime library initialization; ignored."
EnvVarDeprecated "%1$s variable deprecated, please use %2$s instead."
RedMethodNotSupported "KMP_FORCE_REDUCTION: %1$s method is not supported; using critical."
+AffHWSubsetNoHWLOC "KMP_HW_SUBSET ignored: unsupported item requested for non-HWLOC topology method (KMP_TOPOLOGY_METHOD)"
+AffHWSubsetManyNodes "KMP_HW_SUBSET ignored: too many NUMA Nodes requested."
+AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested."
+AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested."
# --------------------------------------------------------------------------------------------------
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Apr 13 12:15:07 2017
@@ -774,11 +774,19 @@ typedef enum kmp_cancel_kind_t {
} kmp_cancel_kind_t;
#endif // OMP_40_ENABLED
-extern int __kmp_place_num_sockets;
-extern int __kmp_place_socket_offset;
-extern int __kmp_place_num_cores;
-extern int __kmp_place_core_offset;
-extern int __kmp_place_num_threads_per_core;
+// KMP_HW_SUBSET support:
+typedef struct kmp_hws_item {
+ int num;
+ int offset;
+} kmp_hws_item_t;
+
+extern kmp_hws_item_t __kmp_hws_socket;
+extern kmp_hws_item_t __kmp_hws_node;
+extern kmp_hws_item_t __kmp_hws_tile;
+extern kmp_hws_item_t __kmp_hws_core;
+extern kmp_hws_item_t __kmp_hws_proc;
+extern int __kmp_hws_requested;
+extern int __kmp_hws_abs_flag; // absolute or per-item number requested
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@@ -3494,9 +3502,6 @@ KMP_EXPORT kmp_int32 __kmp_get_reduce_me
KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
-// this function exported for testing of KMP_PLACE_THREADS functionality
-KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
-
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Thu Apr 13 12:15:07 2017
@@ -3405,102 +3405,665 @@ __kmp_affinity_process_placelist(kmp_aff
#undef ADD_MASK
#undef ADD_MASK_OSID
+#if KMP_USE_HWLOC
+static int
+__kmp_hwloc_count_children_by_type(
+ hwloc_topology_t t, hwloc_obj_t o, hwloc_obj_type_t type, hwloc_obj_t* f)
+{
+ if (!hwloc_compare_types(o->type, type)) {
+ if (*f == NULL)
+ *f = o; // output first descendant found
+ return 1;
+ }
+ int sum = 0;
+ for (unsigned i = 0; i < o->arity; i++)
+ sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
+ return sum; // will be 0 if no one found (as PU arity is 0)
+}
+
+static int
+__kmp_hwloc_count_children_by_depth(
+ hwloc_topology_t t, hwloc_obj_t o, unsigned depth, hwloc_obj_t* f)
+{
+ if (o->depth == depth) {
+ if (*f == NULL)
+ *f = o; // output first descendant found
+ return 1;
+ }
+ int sum = 0;
+ for (unsigned i = 0; i < o->arity; i++)
+ sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
+ return sum; // will be 0 if no one found (as PU arity is 0)
+}
+
+static int
+__kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o)
+{ // skip PUs descendants of the object o
+ int skipped = 0;
+ hwloc_obj_t hT = NULL;
+ int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
+ for (int i = 0; i < N; ++i) {
+ KMP_DEBUG_ASSERT(hT);
+ unsigned idx = hT->os_index;
+ if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ ++skipped;
+ }
+ hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
+ }
+ return skipped; // count number of skipped units
+}
+
+static int
+__kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o)
+{ // check if obj has PUs present in fullMask
+ hwloc_obj_t hT = NULL;
+ int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
+ for (int i = 0; i < N; ++i) {
+ KMP_DEBUG_ASSERT(hT);
+ unsigned idx = hT->os_index;
+ if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
+ return 1; // found PU
+ hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
+ }
+ return 0; // no PUs found
+}
+#endif // KMP_USE_HWLOC
+
static void
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
{
- int i, j, k, n_old = 0, n_new = 0, proc_num = 0;
- if (__kmp_place_num_sockets == 0 &&
- __kmp_place_num_cores == 0 &&
- __kmp_place_num_threads_per_core == 0 )
- goto _exit; // no topology limiting actions requested, exit
- if (__kmp_place_num_sockets == 0)
- __kmp_place_num_sockets = nPackages; // use all available sockets
- if (__kmp_place_num_cores == 0)
- __kmp_place_num_cores = nCoresPerPkg; // use all available cores
- if (__kmp_place_num_threads_per_core == 0 ||
- __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
- __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
+ AddrUnsPair *newAddr;
+ if (__kmp_hws_requested == 0)
+ goto _exit; // no topology limiting actions requested, exit
+#if KMP_USE_HWLOC
+ if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
+ // Number of subobjects calculated dynamically, this works fine for
+ // any non-uniform topology.
+ // L2 cache objects are determined by depth, other objects - by type.
+ hwloc_topology_t tp = __kmp_hwloc_topology;
+ int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
+ int nCr=0, nTr=0; // number of requested units
+ int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
+ hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
+ int L2depth, idx;
- if ( !__kmp_affinity_uniform_topology() ) {
+ // check support of extensions ----------------------------------
+ int numa_support = 0, tile_support = 0;
+ if (__kmp_pu_os_idx)
+ hT = hwloc_get_pu_obj_by_os_index(
+ tp, __kmp_pu_os_idx[__kmp_avail_proc - 1]);
+ else
+ hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
+ if (hT == NULL) { // something's gone wrong
+ KMP_WARNING(AffHWSubsetUnsupported);
+ goto _exit;
+ }
+ // check NUMA node
+ hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
+ hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
+ if (hN != NULL && hN->depth > hS->depth) {
+ numa_support = 1; // 1 in case socket includes node(s)
+ } else if (__kmp_hws_node.num > 0) {
+ // don't support sockets inside NUMA node (no such HW found for testing)
+ KMP_WARNING(AffHWSubsetUnsupported);
+ goto _exit;
+ }
+ // check L2 cahce, get object by depth because of multiple caches
+ L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
+ hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
+ if (hL != NULL && __kmp_hwloc_count_children_by_type(
+ tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
+ tile_support = 1; // no sense to count L2 if it includes single core
+ } else if (__kmp_hws_tile.num > 0) {
+ if (__kmp_hws_core.num == 0) {
+ __kmp_hws_core = __kmp_hws_tile; // replace L2 with core
+ __kmp_hws_tile.num = 0;
+ } else {
+ // L2 and core are both requested, but represent same object
+ KMP_WARNING(AffHWSubsetInvalid);
+ goto _exit;
+ }
+ }
+ // end of check of extensions -----------------------------------
+
+ // fill in unset items, validate settings -----------------------
+ if (__kmp_hws_socket.num == 0)
+ __kmp_hws_socket.num = nPackages; // use all available sockets
+ if (__kmp_hws_socket.offset >= nPackages) {
+ KMP_WARNING(AffHWSubsetManySockets);
+ goto _exit;
+ }
+ if (numa_support) {
+ int NN = __kmp_hwloc_count_children_by_type(
+ tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in socket
+ if (__kmp_hws_node.num == 0)
+ __kmp_hws_node.num = NN; // use all available nodes
+ if (__kmp_hws_node.offset >= NN) {
+ KMP_WARNING(AffHWSubsetManyNodes);
+ goto _exit;
+ }
+ if (tile_support) {
+ // get num tiles in node
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
+ if (__kmp_hws_tile.num == 0) {
+ __kmp_hws_tile.num = NL + 1;
+ } // use all available tiles, some node may have more tiles, thus +1
+ if (__kmp_hws_tile.offset >= NL) {
+ KMP_WARNING(AffHWSubsetManyTiles);
+ goto _exit;
+ }
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
+ goto _exit;
+ }
+ } else { // tile_support
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in node
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
+ goto _exit;
+ }
+ } // tile_support
+ } else { // numa_support
+ if (tile_support) {
+ // get num tiles in socket
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
+ if (__kmp_hws_tile.num == 0)
+ __kmp_hws_tile.num = NL; // use all available tiles
+ if (__kmp_hws_tile.offset >= NL) {
+ KMP_WARNING(AffHWSubsetManyTiles);
+ goto _exit;
+ }
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
+ goto _exit;
+ }
+ } else { // tile_support
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
+ goto _exit;
+ }
+ } // tile_support
+ }
+ if (__kmp_hws_proc.num == 0)
+ __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
+ if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
+ KMP_WARNING(AffHWSubsetManyProcs);
+ goto _exit;
+ }
+ // end of validation --------------------------------------------
+
+ if (pAddr) // pAddr is NULL in case of affinity_none
+ newAddr = (AddrUnsPair *)__kmp_allocate(
+ sizeof(AddrUnsPair) * __kmp_avail_proc); // max size
+ // main loop to form HW subset ----------------------------------
+ hS = NULL;
+ int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
+ for (int s = 0; s < NP; ++s) {
+ // Check Socket -----------------------------------------------
+ hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
+ if (!__kmp_hwloc_obj_has_PUs(tp, hS))
+ continue; // skip socket if all PUs are out of fullMask
+ ++nS; // only count objects those have PUs in affinity mask
+ if (nS <= __kmp_hws_socket.offset ||
+ nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
+ continue; // move to next socket
+ }
+ nCr = 0; // count number of cores per socket
+ // socket requested, go down the topology tree
+ // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
+ if (numa_support) {
+ nN = 0;
+ hN = NULL;
+ int NN = __kmp_hwloc_count_children_by_type(
+ tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in current socket
+ for (int n = 0; n < NN; ++n) {
+ // Check NUMA Node ----------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
+ hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
+ continue; // skip node if all PUs are out of fullMask
+ }
+ ++nN;
+ if (nN <= __kmp_hws_node.offset ||
+ nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
+ hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
+ continue; // move to next node
+ }
+ // node requested, go down the topology tree
+ if (tile_support) {
+ nL = 0;
+ hL = NULL;
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
+ for (int l = 0; l < NL; ++l) {
+ // Check L2 (tile) ------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ continue; // skip tile if all PUs are out of fullMask
+ }
+ ++nL;
+ if (nL <= __kmp_hws_tile.offset ||
+ nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
+ // skip tile as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ continue; // move to next tile
+ }
+ // tile requested, go down the topology tree
+ nC = 0;
+ hC = NULL;
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in current tile
+ for (int c = 0; c < NC; ++c) {
+ // Check Core ---------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // skip core if all PUs are out of fullMask
+ }
+ ++nC;
+ if (nC <= __kmp_hws_core.offset ||
+ nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // move to next node
+ }
+ // core requested, go down to PUs
+ nT = 0;
+ nTr = 0;
+ hT = NULL;
+ int NT = __kmp_hwloc_count_children_by_type(
+ tp, hC, HWLOC_OBJ_PU, &hT); // num procs in current core
+ for (int t = 0; t < NT; ++t) {
+ // Check PU ---------------------------------------
+ idx = hT->os_index;
+ if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // skip PU if not in fullMask
+ }
+ ++nT;
+ if (nT <= __kmp_hws_proc.offset ||
+ nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
+ // skip PU
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ ++n_old;
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // move to next node
+ }
+ ++nTr;
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ ++n_new;
+ ++n_old;
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ } // threads loop
+ if (nTr > 0) {
+ ++nCr; // num cores per socket
+ ++nCo; // total num cores
+ if (nTr > nTpC)
+ nTpC = nTr; // calc max threads per core
+ }
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ } // cores loop
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ } // tiles loop
+ } else { // tile_support
+ // no tiles, check cores
+ nC = 0;
+ hC = NULL;
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in current node
+ for (int c = 0; c < NC; ++c) {
+ // Check Core ---------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // skip core if all PUs are out of fullMask
+ }
+ ++nC;
+ if (nC <= __kmp_hws_core.offset ||
+ nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // move to next node
+ }
+ // core requested, go down to PUs
+ nT = 0;
+ nTr = 0;
+ hT = NULL;
+ int NT = __kmp_hwloc_count_children_by_type(
+ tp, hC, HWLOC_OBJ_PU, &hT);
+ for (int t = 0; t < NT; ++t) {
+ // Check PU ---------------------------------------
+ idx = hT->os_index;
+ if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // skip PU if not in fullMask
+ }
+ ++nT;
+ if (nT <= __kmp_hws_proc.offset ||
+ nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
+ // skip PU
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ ++n_old;
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // move to next node
+ }
+ ++nTr;
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ ++n_new;
+ ++n_old;
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ } // threads loop
+ if (nTr > 0) {
+ ++nCr; // num cores per socket
+ ++nCo; // total num cores
+ if (nTr > nTpC)
+ nTpC = nTr; // calc max threads per core
+ }
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ } // cores loop
+ } // tiles support
+ hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
+ } // nodes loop
+ } else { // numa_support
+ // no NUMA support
+ if (tile_support) {
+ nL = 0;
+ hL = NULL;
+ int NL = __kmp_hwloc_count_children_by_depth(
+ tp, hS, L2depth, &hL); // num tiles in current socket
+ for (int l = 0; l < NL; ++l) {
+ // Check L2 (tile) ------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ continue; // skip tile if all PUs are out of fullMask
+ }
+ ++nL;
+ if (nL <= __kmp_hws_tile.offset ||
+ nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
+ // skip tile as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ continue; // move to next tile
+ }
+ // tile requested, go down the topology tree
+ nC = 0;
+ hC = NULL;
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hL, HWLOC_OBJ_CORE, &hC); // num cores per tile
+ for (int c = 0; c < NC; ++c) {
+ // Check Core ---------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // skip core if all PUs are out of fullMask
+ }
+ ++nC;
+ if (nC <= __kmp_hws_core.offset ||
+ nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // move to next node
+ }
+ // core requested, go down to PUs
+ nT = 0;
+ nTr = 0;
+ hT = NULL;
+ int NT = __kmp_hwloc_count_children_by_type(
+ tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
+ for (int t = 0; t < NT; ++t) {
+ // Check PU ---------------------------------------
+ idx = hT->os_index;
+ if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // skip PU if not in fullMask
+ }
+ ++nT;
+ if (nT <= __kmp_hws_proc.offset ||
+ nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
+ // skip PU
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ ++n_old;
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // move to next node
+ }
+ ++nTr;
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ ++n_new;
+ ++n_old;
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ } // threads loop
+ if (nTr > 0) {
+ ++nCr; // num cores per socket
+ ++nCo; // total num cores
+ if (nTr > nTpC)
+ nTpC = nTr; // calc max threads per core
+ }
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ } // cores loop
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ } // tiles loop
+ } else { // tile_support
+ // no tiles, check cores
+ nC = 0;
+ hC = NULL;
+ int NC = __kmp_hwloc_count_children_by_type(
+ tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
+ for (int c = 0; c < NC; ++c) {
+ // Check Core -------------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // skip core if all PUs are out of fullMask
+ }
+ ++nC;
+ if (nC <= __kmp_hws_core.offset ||
+ nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // move to next node
+ }
+ // core requested, go down to PUs
+ nT = 0;
+ nTr = 0;
+ hT = NULL;
+ int NT = __kmp_hwloc_count_children_by_type(
+ tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
+ for (int t = 0; t < NT; ++t) {
+ // Check PU ---------------------------------------
+ idx = hT->os_index;
+ if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // skip PU if not in fullMask
+ }
+ ++nT;
+ if (nT <= __kmp_hws_proc.offset ||
+ nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
+ // skip PU
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ ++n_old;
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // move to next node
+ }
+ ++nTr;
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ ++n_new;
+ ++n_old;
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ } // threads loop
+ if (nTr > 0) {
+ ++nCr; // num cores per socket
+ ++nCo; // total num cores
+ if (nTr > nTpC)
+ nTpC = nTr; // calc max threads per core
+ }
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ } // cores loop
+ } // tiles support
+ } // numa_support
+ if (nCr > 0) { // found cores?
+ ++nPkg; // num sockets
+ if (nCr > nCpP)
+ nCpP = nCr; // calc max cores per socket
+ }
+ } // sockets loop
+
+ // check the subset is valid
+ KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
+ KMP_DEBUG_ASSERT(nPkg > 0);
+ KMP_DEBUG_ASSERT(nCpP > 0);
+ KMP_DEBUG_ASSERT(nTpC > 0);
+ KMP_DEBUG_ASSERT(nCo > 0);
+ KMP_DEBUG_ASSERT(nPkg <= nPackages);
+ KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
+ KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
+ KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
+
+ nPackages = nPkg; // correct num sockets
+ nCoresPerPkg = nCpP; // correct num cores per socket
+ __kmp_nThreadsPerCore = nTpC; // correct num threads per core
+ __kmp_avail_proc = n_new; // correct num procs
+ __kmp_ncores = nCo; // correct num cores
+ // hwloc topology method end
+ } else
+#endif // KMP_USE_HWLOC
+ {
+ int n_old = 0, n_new = 0, proc_num = 0;
+ if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
+ KMP_WARNING(AffHWSubsetNoHWLOC);
+ goto _exit;
+ }
+ if (__kmp_hws_socket.num == 0)
+ __kmp_hws_socket.num = nPackages; // use all available sockets
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = nCoresPerPkg; // use all available cores
+ if (__kmp_hws_proc.num == 0 ||
+ __kmp_hws_proc.num > __kmp_nThreadsPerCore)
+ __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
+ if ( !__kmp_affinity_uniform_topology() ) {
KMP_WARNING( AffHWSubsetNonUniform );
goto _exit; // don't support non-uniform topology
- }
- if ( depth > 3 ) {
+ }
+ if ( depth > 3 ) {
KMP_WARNING( AffHWSubsetNonThreeLevel );
goto _exit; // don't support not-3-level topology
- }
- if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
+ }
+ if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
KMP_WARNING(AffHWSubsetManySockets);
goto _exit;
- }
- if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
+ }
+ if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
KMP_WARNING( AffHWSubsetManyCores );
goto _exit;
- }
-
- AddrUnsPair *newAddr;
- if (pAddr) // pAddr is NULL in case of affinity_none
+ }
+ // Form the requested subset
+ if (pAddr) // pAddr is NULL in case of affinity_none
newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
- __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
-
- for (i = 0; i < nPackages; ++i) {
- if (i < __kmp_place_socket_offset ||
- i >= __kmp_place_socket_offset + __kmp_place_num_sockets) {
- n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
- if (__kmp_pu_os_idx != NULL) {
- for (j = 0; j < nCoresPerPkg; ++j) { // walk through skipped socket
- for (k = 0; k < __kmp_nThreadsPerCore; ++k) {
- KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
- ++proc_num;
- }
- }
+ __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num);
+ for (int i = 0; i < nPackages; ++i) {
+ if (i < __kmp_hws_socket.offset ||
+ i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
+ // skip not-requested socket
+ n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
+ if (__kmp_pu_os_idx != NULL) {
+ // walk through skipped socket
+ for (int j = 0; j < nCoresPerPkg; ++j) {
+ for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+ ++proc_num;
+ }
}
+ }
} else {
- for (j = 0; j < nCoresPerPkg; ++j) { // walk through requested socket
- if (j < __kmp_place_core_offset ||
- j >= __kmp_place_core_offset + __kmp_place_num_cores) {
- n_old += __kmp_nThreadsPerCore; // skip not-requested core
- if (__kmp_pu_os_idx != NULL) {
- for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through skipped core
- KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
- ++proc_num;
- }
- }
+ // walk through requested socket
+ for (int j = 0; j < nCoresPerPkg; ++j) {
+ if (j < __kmp_hws_core.offset ||
+ j >= __kmp_hws_core.offset + __kmp_hws_core.num)
+ { // skip not-requested core
+ n_old += __kmp_nThreadsPerCore;
+ if (__kmp_pu_os_idx != NULL) {
+ for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+ ++proc_num;
+ }
+ }
+ } else {
+ // walk through requested core
+ for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
+ if (k < __kmp_hws_proc.num) {
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ n_new++;
} else {
- for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
- if (k < __kmp_place_num_threads_per_core) {
- if (pAddr)
- newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
- n_new++;
- } else {
- if (__kmp_pu_os_idx != NULL)
- KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
- }
- n_old++;
- ++proc_num;
- }
+ if (__kmp_pu_os_idx != NULL)
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
}
+ n_old++;
+ ++proc_num;
+ }
}
+ }
}
- }
- KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
- KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
- __kmp_place_num_threads_per_core);
-
- nPackages = __kmp_place_num_sockets; // correct nPackages
- nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
- __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
- __kmp_avail_proc = n_new; // correct avail_proc
- __kmp_ncores = nPackages * __kmp_place_num_cores; // correct ncores
-
+ }
+ KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
+ KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
+ __kmp_hws_proc.num);
+ nPackages = __kmp_hws_socket.num; // correct nPackages
+ nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
+ __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
+ __kmp_avail_proc = n_new; // correct avail_proc
+ __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
+ } // non-hwloc topology method
if (pAddr) {
- __kmp_free( *pAddr );
- *pAddr = newAddr; // replace old topology with new one
+ __kmp_free( *pAddr );
+ *pAddr = newAddr; // replace old topology with new one
+ }
+ if (__kmp_affinity_verbose) {
+ char m[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
+ }
+ KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ __kmp_str_buf_print(&buf, "%d", nPackages);
+ KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
+ __kmp_str_buf_free(&buf);
}
_exit:
if (__kmp_pu_os_idx != NULL) {
- __kmp_free(__kmp_pu_os_idx);
- __kmp_pu_os_idx = NULL;
+ __kmp_free(__kmp_pu_os_idx);
+ __kmp_pu_os_idx = NULL;
}
}
Modified: openmp/trunk/runtime/src/kmp_csupport.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.cpp?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.cpp (original)
+++ openmp/trunk/runtime/src/kmp_csupport.cpp Thu Apr 13 12:15:07 2017
@@ -3038,18 +3038,6 @@ __kmpc_get_parent_taskid() {
} // __kmpc_get_parent_taskid
-void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
-{
- if ( ! __kmp_init_serial ) {
- __kmp_serial_initialize();
- }
- __kmp_place_num_sockets = nS;
- __kmp_place_socket_offset = sO;
- __kmp_place_num_cores = nC;
- __kmp_place_core_offset = cO;
- __kmp_place_num_threads_per_core = nT;
-}
-
#if OMP_45_ENABLED
/*!
@ingroup WORK_SHARING
Modified: openmp/trunk/runtime/src/kmp_global.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.cpp?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.cpp (original)
+++ openmp/trunk/runtime/src/kmp_global.cpp Thu Apr 13 12:15:07 2017
@@ -264,11 +264,13 @@ kmp_nested_proc_bind_t __kmp_nested_proc
int __kmp_affinity_num_places = 0;
#endif
-int __kmp_place_num_sockets = 0;
-int __kmp_place_socket_offset = 0;
-int __kmp_place_num_cores = 0;
-int __kmp_place_core_offset = 0;
-int __kmp_place_num_threads_per_core = 0;
+kmp_hws_item_t __kmp_hws_socket = {0, 0};
+kmp_hws_item_t __kmp_hws_node = {0, 0};
+kmp_hws_item_t __kmp_hws_tile = {0, 0};
+kmp_hws_item_t __kmp_hws_core = {0, 0};
+kmp_hws_item_t __kmp_hws_proc = {0, 0};
+int __kmp_hws_requested = 0;
+int __kmp_hws_abs_flag = 0; // absolute or per-item number requested
#if OMP_40_ENABLED
kmp_int32 __kmp_default_device = 0;
Modified: openmp/trunk/runtime/src/kmp_settings.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_settings.cpp?rev=300220&r1=300219&r2=300220&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_settings.cpp (original)
+++ openmp/trunk/runtime/src/kmp_settings.cpp Thu Apr 13 12:15:07 2017
@@ -24,6 +24,7 @@
#include "kmp_lock.h"
#include "kmp_io.h"
#include "kmp_affinity.h"
+#include <ctype.h> // toupper()
static int __kmp_env_toPrint( char const * name, int flag );
@@ -3108,6 +3109,12 @@ __kmp_stg_print_topology_method( kmp_str
break;
# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+# if KMP_USE_HWLOC
+ case affinity_top_method_hwloc:
+ value = "hwloc";
+ break;
+# endif
+
case affinity_top_method_cpuinfo:
value = "cpuinfo";
break;
@@ -4297,275 +4304,152 @@ __kmp_stg_print_speculative_statsfile( k
// KMP_HW_SUBSET (was KMP_PLACE_THREADS)
// -------------------------------------------------------------------------------------------------
+// The longest observable sequense of items is
+// Socket-Node-Tile-Core-Thread
+// So, let's limit to 5 levels for now
+// The input string is usually short enough, let's use 512 limit for now
+#define MAX_T_LEVEL 5
+#define MAX_STR_LEN 512
static void
__kmp_stg_parse_hw_subset( char const * name, char const * value, void * data ) {
- // Value example: 5Cx2Tx15O
- // Which means "use 5 cores with offset 15, 2 threads per core"
- // AC: extended to sockets level, examples of
- // "use 2 sockets with offset 6, 2 cores with offset 2 per socket, 2 threads per core":
- // 2s,6o,2c,2o,2t; 2s,6o,2c,2t,2o; 2s at 6,2c at 2,2t
- // To not break legacy code core-offset can be last;
- // postfix "o" or prefix @ can be offset designator.
- // Note: not all syntax errors are analyzed, some may be skipped.
-#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == 'x')
- static int parsed = 0;
- int num;
- int single_warning = 0;
- int flagS = 0, flagC = 0, flagT = 0, flagSO = 0, flagCO = 0;
- const char *next = value;
- const char *prev;
-
- if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
- KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
- if( parsed == 1 ) {
- return; // already parsed KMP_HW_SUBSET
- }
- }
- parsed = 1;
-
- SKIP_WS(next); // skip white spaces
- if (*next == '\0')
- return; // no data provided, retain default values
- if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
- KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
- if( parsed == 1 ) {
- return; // already parsed KMP_HW_SUBSET
- }
- }
- parsed = 1;
-
- SKIP_WS(next); // skip white spaces
- if (*next == '\0')
- return; // no data provided, retain default values
- // Get num_sockets first (or whatever specified)
- if (*next >= '0' && *next <= '9') {
- prev = next;
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- SKIP_WS(next);
- if (*next == 's' || *next == 'S') { // e.g. "2s"
- __kmp_place_num_sockets = num;
- flagS = 1; // got num sockets
- next++;
- if (*next == '@') { // socket offset, e.g. "2s at 4"
- flagSO = 1;
- prev = ++next; // don't allow spaces for simplicity
- if (!(*next >= '0' && *next <= '9')) {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- __kmp_place_socket_offset = num;
- }
- } else if (*next == 'c' || *next == 'C') {
- __kmp_place_num_cores = num;
- flagS = flagC = 1; // sockets were not specified - use default
- next++;
- if (*next == '@') { // core offset, e.g. "2c at 6"
- flagCO = 1;
- prev = ++next; // don't allow spaces for simplicity
- if (!(*next >= '0' && *next <= '9')) {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- __kmp_place_core_offset = num;
- }
- } else if (CHECK_DELIM(next)) {
- __kmp_place_num_cores = num; // no letter-designator - num cores
- flagS = flagC = 1; // sockets were not specified - use default
- next++;
- } else if (*next == 't' || *next == 'T') {
- __kmp_place_num_threads_per_core = num;
- // sockets, cores were not specified - use default
- return; // we ignore offset value in case all cores are used
- } else if (*next == '\0') {
- __kmp_place_num_cores = num;
- return; // the only value provided - set num cores
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here
- SKIP_WS(next);
- if (*next == '\0')
- return; // " n " - something like this
- if (CHECK_DELIM(next)) {
- next++; // skip delimiter
- SKIP_WS(next);
- }
-
- // Get second value (could be offset, num_cores, num_threads)
- if (*next >= '0' && *next <= '9') {
- prev = next;
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- SKIP_WS(next);
- if (*next == 'c' || *next == 'C') {
- KMP_DEBUG_ASSERT(flagC == 0);
- __kmp_place_num_cores = num;
- flagC = 1;
- next++;
- if (*next == '@') { // core offset, e.g. "2c at 6"
- flagCO = 1;
- prev = ++next; // don't allow spaces for simplicity
- if (!(*next >= '0' && *next <= '9')) {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- __kmp_place_core_offset = num;
- }
- } else if (*next == 'o' || *next == 'O') { // offset specified
- KMP_WARNING(AffHWSubsetDeprecated);
- single_warning = 1;
- if (flagC) { // whether num_cores already specified (sockets skipped)
- KMP_DEBUG_ASSERT(!flagCO); // either "o" or @, not both
- __kmp_place_core_offset = num;
- } else {
- KMP_DEBUG_ASSERT(!flagSO); // either "o" or @, not both
- __kmp_place_socket_offset = num;
- }
- next++;
- } else if (*next == 't' || *next == 'T') {
- KMP_DEBUG_ASSERT(flagT == 0);
- __kmp_place_num_threads_per_core = num;
- flagC = 1; // num_cores could be skipped ?
- flagT = 1;
- next++; // can have core-offset specified after num threads
- } else if (*next == '\0') {
- KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core
- __kmp_place_num_threads_per_core = num;
- return; // two values provided without letter-designator
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- SKIP_WS(next);
- if (*next == '\0')
- return; // " Ns,Nc " - something like this
- if (CHECK_DELIM(next)) {
- next++; // skip delimiter
- SKIP_WS(next);
- }
-
- // Get third value (could be core-offset, num_cores, num_threads)
- if (*next >= '0' && *next <= '9') {
- prev = next;
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- SKIP_WS(next);
- if (*next == 't' || *next == 'T') {
- KMP_DEBUG_ASSERT(flagT == 0);
- __kmp_place_num_threads_per_core = num;
- if (flagC == 0)
- return; // num_cores could be skipped (e.g. 2s,4o,2t)
- flagT = 1;
- next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o)
- } else if (*next == 'c' || *next == 'C') {
- KMP_DEBUG_ASSERT(flagC == 0);
- __kmp_place_num_cores = num;
- flagC = 1;
- next++;
- //KMP_DEBUG_ASSERT(*next != '@'); // socket offset used "o" designator
- } else if (*next == 'o' || *next == 'O') {
- KMP_WARNING(AffHWSubsetDeprecated);
- single_warning = 1;
- KMP_DEBUG_ASSERT(flagC);
- //KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
- __kmp_place_core_offset = num;
- next++;
+ // Value example: 1s,5c at 3,2T
+ // Which means "use 1 socket, 5 cores with offset 3, 2 threads per core"
+ static int parsed = 0;
+ if( strcmp(name, "KMP_PLACE_THREADS") == 0 ) {
+ KMP_INFORM(EnvVarDeprecated,name,"KMP_HW_SUBSET");
+ if( parsed == 1 ) {
+ return; // already parsed KMP_HW_SUBSET
+ }
+ }
+ parsed = 1;
+
+ char *components[MAX_T_LEVEL];
+ char const *digits = "0123456789";
+ char input[MAX_STR_LEN];
+ size_t len = 0, mlen = MAX_STR_LEN;
+ int level = 0;
+ // Canonize the string (remove spaces, unify delimiters, etc.)
+ char *pos = (char *)value;
+ while (*pos && mlen) {
+ if (*pos != ' ') { // skip spaces
+ if (len == 0 && *pos == ':') {
+ __kmp_hws_abs_flag = 1; // if the first symbol is ":", skip it
+ } else {
+ input[len] = toupper(*pos);
+ if (input[len] == 'X')
+ input[len] = ','; // unify delimiters of levels
+ if (input[len] == 'O' && strchr(digits, *(pos + 1)))
+ input[len] = '@'; // unify delimiters of offset
+ len++;
+ }
+ }
+ mlen--;
+ pos++;
+ }
+ if (len == 0 || mlen == 0)
+ goto err; // contents is either empty or too long
+ input[len] = '\0';
+ __kmp_hws_requested = 1; // mark that subset requested
+ // Split by delimiter
+ pos = input;
+ components[level++] = pos;
+ while (pos = strchr(pos, ',')) {
+ *pos = '\0'; // modify input and avoid more copying
+ components[level++] = ++pos; // expect something after ","
+ if (level > MAX_T_LEVEL)
+ goto err; // too many components provided
+ }
+ // Check each component
+ for (int i = 0; i < level; ++i) {
+ int offset = 0;
+ int num = atoi(components[i]); // each component should start with a number
+ if ((pos = strchr(components[i], '@'))) {
+ offset = atoi(pos + 1); // save offset
+ *pos = '\0'; // cut the offset from the component
+ }
+ pos = components[i] + strspn(components[i], digits);
+ if (pos == components[i])
+ goto err;
+ // detect the component type
+ switch (*pos) {
+ case 'S': // Socket
+ if (__kmp_hws_socket.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_socket.num = num;
+ __kmp_hws_socket.offset = offset;
+ break;
+ case 'N': // NUMA Node
+ if (__kmp_hws_node.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_node.num = num;
+ __kmp_hws_node.offset = offset;
+ break;
+ case 'L': // Cache
+ if (*(pos + 1) == '2') { // L2 - Tile
+ if (__kmp_hws_tile.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_tile.num = num;
+ __kmp_hws_tile.offset = offset;
+ } else if (*(pos + 1) == '3') { // L3 - Socket
+ if (__kmp_hws_socket.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_socket.num = num;
+ __kmp_hws_socket.offset = offset;
+ } else if (*(pos + 1) == '1') { // L1 - Core
+ if (__kmp_hws_core.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_core.num = num;
+ __kmp_hws_core.offset = offset;
+ }
+ break;
+ case 'C': // Core (or Cache?)
+ if (*(pos + 1) != 'A') {
+ if (__kmp_hws_core.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_core.num = num;
+ __kmp_hws_core.offset = offset;
+ } else { // Cache
+ char *d = pos + strcspn(pos, digits); // find digit
+ if (*d == '2') { // L2 - Tile
+ if (__kmp_hws_tile.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_tile.num = num;
+ __kmp_hws_tile.offset = offset;
+ } else if (*d == '3') { // L3 - Socket
+ if (__kmp_hws_socket.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_socket.num = num;
+ __kmp_hws_socket.offset = offset;
+ } else if (*d == '1') { // L1 - Core
+ if (__kmp_hws_core.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_core.num = num;
+ __kmp_hws_core.offset = offset;
} else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
+ goto err;
}
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- KMP_DEBUG_ASSERT(flagC);
- SKIP_WS(next);
- if ( *next == '\0' )
- return;
- if (CHECK_DELIM(next)) {
- next++; // skip delimiter
- SKIP_WS(next);
- }
-
- // Get 4-th value (could be core-offset, num_threads)
- if (*next >= '0' && *next <= '9') {
- prev = next;
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- SKIP_WS(next);
- if (*next == 'o' || *next == 'O') {
- if (!single_warning) { // warn once
- KMP_WARNING(AffHWSubsetDeprecated);
- }
- KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
- __kmp_place_core_offset = num;
- next++;
- } else if (*next == 't' || *next == 'T') {
- KMP_DEBUG_ASSERT(flagT == 0);
- __kmp_place_num_threads_per_core = num;
- flagT = 1;
- next++; // can have core-offset specified after num threads
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- return;
- }
- SKIP_WS(next);
- if ( *next == '\0' )
- return;
- if (CHECK_DELIM(next)) {
- next++; // skip delimiter
- SKIP_WS(next);
- }
-
- // Get 5-th value (could be core-offset, num_threads)
- if (*next >= '0' && *next <= '9') {
- prev = next;
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(prev, *next);
- SKIP_WS(next);
- if (*next == 'o' || *next == 'O') {
- if (!single_warning) { // warn once
- KMP_WARNING(AffHWSubsetDeprecated);
- }
- KMP_DEBUG_ASSERT(flagT);
- KMP_DEBUG_ASSERT(!flagSO); // socket offset couldn't use @ designator
- __kmp_place_core_offset = num;
- } else if (*next == 't' || *next == 'T') {
- KMP_DEBUG_ASSERT(flagT == 0);
- __kmp_place_num_threads_per_core = num;
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
- }
- } else {
- KMP_WARNING(AffHWSubsetInvalid, name, value);
+ }
+ break;
+ case 'T': // Thread
+ if (__kmp_hws_proc.num > 0)
+ goto err; // duplicate is not allowed
+ __kmp_hws_proc.num = num;
+ __kmp_hws_proc.offset = offset;
+ break;
+ default:
+ goto err;
}
- return;
-#undef CHECK_DELIM
+ }
+ return;
+err:
+ KMP_WARNING(AffHWSubsetInvalid, name, value);
+ __kmp_hws_requested = 0; // mark that subset not requested
+ return;
}
static void
__kmp_stg_print_hw_subset( kmp_str_buf_t * buffer, char const * name, void * data ) {
- if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) {
+ if (__kmp_hws_requested) {
int comma = 0;
kmp_str_buf_t buf;
__kmp_str_buf_init(&buf);
@@ -4573,26 +4457,34 @@ __kmp_stg_print_hw_subset( kmp_str_buf_t
KMP_STR_BUF_PRINT_NAME_EX(name);
else
__kmp_str_buf_print(buffer, " %s='", name);
- if (__kmp_place_num_sockets) {
- __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets);
- if (__kmp_place_socket_offset)
- __kmp_str_buf_print(&buf, "@%d", __kmp_place_socket_offset);
+ if (__kmp_hws_socket.num) {
+ __kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num);
+ if (__kmp_hws_socket.offset)
+ __kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset);
comma = 1;
}
- if (__kmp_place_num_cores) {
- __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores);
- if (__kmp_place_core_offset)
- __kmp_str_buf_print(&buf, "@%d", __kmp_place_core_offset);
+ if (__kmp_hws_node.num) {
+ __kmp_str_buf_print(&buf, "%s%dn", comma?",":"", __kmp_hws_node.num);
+ if (__kmp_hws_node.offset)
+ __kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset);
comma = 1;
}
- if (__kmp_place_num_threads_per_core)
- __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core);
+ if (__kmp_hws_tile.num) {
+ __kmp_str_buf_print(&buf, "%s%dL2", comma?",":"", __kmp_hws_tile.num);
+ if (__kmp_hws_tile.offset)
+ __kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset);
+ comma = 1;
+ }
+ if (__kmp_hws_core.num) {
+ __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_hws_core.num);
+ if (__kmp_hws_core.offset)
+ __kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset);
+ comma = 1;
+ }
+ if (__kmp_hws_proc.num)
+ __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_hws_proc.num);
__kmp_str_buf_print(buffer, "%s'\n", buf.str );
__kmp_str_buf_free(&buf);
-/*
- } else {
- __kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) );
-*/
}
}
More information about the Openmp-commits
mailing list