[Openmp-commits] [openmp] r249708 - Added sockets to the syntax of KMP_PLACE_THREADS environment variable.
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Thu Oct 8 10:55:54 PDT 2015
Author: jlpeyton
Date: Thu Oct 8 12:55:54 2015
New Revision: 249708
URL: http://llvm.org/viewvc/llvm-project?rev=249708&view=rev
Log:
Added sockets to the syntax of KMP_PLACE_THREADS environment variable.
Added (optional) sockets to the syntax of the KMP_PLACE_THREADS environment variable.
Some limitations:
* The number of sockets and then optional offset should be specified first (before other parameters).
* The letter designation is mandatory for sockets and then for other parameters.
* If number of cores is specified first, then the number of sockets is defaulted to all sockets on the machine; also, the old syntax is partially supported if sockets are skipped.
* If number of threads per core is specified first, then the number of sockets and cores per socket are defaulted to all sockets and all cores per socket respectively.
* The number of cores per socket cannot be specified before sockets or after threads per core.
* The number of threads per core can be specified before or after core-offset (old syntax required it to be before core-offset);
* Parameters delimiter can be: empty, comma, lower-case x;
* Spaces are allowed around numbers, around letters, around delimiter.
Approximate shorthand specification:
KMP_PLACE_THREADS="[num_sockets(S|s)[[delim]offset(O|o)][delim]][num_cores_per_socket(C|c)[[delim]offset(O|o)][delim]][num_threads_per_core(T|t)]"
Differential Revision: http://reviews.llvm.org/D13175
Modified:
openmp/trunk/runtime/src/i18n/en_US.txt
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_affinity.cpp
openmp/trunk/runtime/src/kmp_csupport.c
openmp/trunk/runtime/src/kmp_global.c
openmp/trunk/runtime/src/kmp_settings.c
Modified: openmp/trunk/runtime/src/i18n/en_US.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/i18n/en_US.txt?rev=249708&r1=249707&r2=249708&view=diff
==============================================================================
--- openmp/trunk/runtime/src/i18n/en_US.txt (original)
+++ openmp/trunk/runtime/src/i18n/en_US.txt Thu Oct 8 12:55:54 2015
@@ -388,7 +388,8 @@ OBSOLETE "%1$s: gran
EnvLockWarn "%1$s must be set prior to first OMP lock call or critical section; ignored."
FutexNotSupported "futex system call not supported; %1$s=%2$s ignored."
AffGranUsing "%1$s: granularity=%2$s will be used."
-AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"nC,mT[,kO]\"."
+AffThrPlaceInvalid "%1$s: invalid value \"%2$s\", valid format is \"nS[,nO],nC[,nO],nT "
+ "(nSockets at offset, nCores at offset, nTthreads per core)\"."
AffThrPlaceUnsupported "KMP_PLACE_THREADS ignored: unsupported architecture."
AffThrPlaceManyCores "KMP_PLACE_THREADS ignored: too many cores requested."
SyntaxErrorUsing "%1$s: syntax error, using %2$s."
@@ -402,6 +403,7 @@ AffThrPlaceNonUniform "KMP_PLACE_
AffThrPlaceNonThreeLevel "KMP_PLACE_THREADS ignored: only three-level topology is supported."
AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_TOPOLOGY_METHOD=group. Using \"granularity=fine\"."
AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"."
+AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested."
# --------------------------------------------------------------------------------------------------
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=249708&r1=249707&r2=249708&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Oct 8 12:55:54 2015
@@ -788,9 +788,11 @@ typedef enum kmp_cancel_kind_t {
} kmp_cancel_kind_t;
#endif // OMP_40_ENABLED
+extern int __kmp_place_num_sockets;
+extern int __kmp_place_socket_offset;
extern int __kmp_place_num_cores;
-extern int __kmp_place_num_threads_per_core;
extern int __kmp_place_core_offset;
+extern int __kmp_place_num_threads_per_core;
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
@@ -3388,7 +3390,8 @@ KMP_EXPORT kmp_int32 __kmp_get_reduce_me
KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
-KMP_EXPORT void __kmpc_place_threads(int,int,int);
+// this function exported for testing of KMP_PLACE_THREADS functionality
+KMP_EXPORT void __kmpc_place_threads(int,int,int,int,int);
/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=249708&r1=249707&r2=249708&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Thu Oct 8 12:55:54 2015
@@ -3055,12 +3055,18 @@ __kmp_affinity_process_placelist(kmp_aff
static void
__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
{
- if ( __kmp_place_num_cores == 0 ) {
- if ( __kmp_place_num_threads_per_core == 0 ) {
- return; // no cores limiting actions requested, exit
- }
+ if (__kmp_place_num_sockets == 0 &&
+ __kmp_place_num_cores == 0 &&
+ __kmp_place_num_threads_per_core == 0 )
+ return; // no topology limiting actions requested, exit
+ if (__kmp_place_num_sockets == 0)
+ __kmp_place_num_sockets = nPackages; // use all available sockets
+ if (__kmp_place_num_cores == 0)
__kmp_place_num_cores = nCoresPerPkg; // use all available cores
- }
+ if (__kmp_place_num_threads_per_core == 0 ||
+ __kmp_place_num_threads_per_core > __kmp_nThreadsPerCore)
+ __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
+
if ( !__kmp_affinity_uniform_topology() ) {
KMP_WARNING( AffThrPlaceNonUniform );
return; // don't support non-uniform topology
@@ -3069,8 +3075,9 @@ __kmp_apply_thread_places(AddrUnsPair **
KMP_WARNING( AffThrPlaceNonThreeLevel );
return; // don't support not-3-level topology
}
- if ( __kmp_place_num_threads_per_core == 0 ) {
- __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
+ if (__kmp_place_socket_offset + __kmp_place_num_sockets > nPackages) {
+ KMP_WARNING(AffThrPlaceManySockets);
+ return;
}
if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
KMP_WARNING( AffThrPlaceManyCores );
@@ -3078,23 +3085,31 @@ __kmp_apply_thread_places(AddrUnsPair **
}
AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
- nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
+ __kmp_place_num_sockets * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
+
int i, j, k, n_old = 0, n_new = 0;
- for ( i = 0; i < nPackages; ++i ) {
- for ( j = 0; j < nCoresPerPkg; ++j ) {
- if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
- n_old += __kmp_nThreadsPerCore; // skip not-requested core
- } else {
- for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
- if ( k < __kmp_place_num_threads_per_core ) {
- newAddr[n_new] = (*pAddr)[n_old]; // copy requested core' data to new location
- n_new++;
+ for (i = 0; i < nPackages; ++i)
+ if (i < __kmp_place_socket_offset ||
+ i >= __kmp_place_socket_offset + __kmp_place_num_sockets)
+ n_old += nCoresPerPkg * __kmp_nThreadsPerCore; // skip not-requested socket
+ else
+ for (j = 0; j < nCoresPerPkg; ++j) // walk through requested socket
+ if (j < __kmp_place_core_offset ||
+ j >= __kmp_place_core_offset + __kmp_place_num_cores)
+ n_old += __kmp_nThreadsPerCore; // skip not-requested core
+ else
+ for (k = 0; k < __kmp_nThreadsPerCore; ++k) { // walk through requested core
+ if (k < __kmp_place_num_threads_per_core) {
+ newAddr[n_new] = (*pAddr)[n_old]; // collect requested thread's data
+ n_new++;
+ }
+ n_old++;
}
- n_old++;
- }
- }
- }
- }
+ KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
+ KMP_DEBUG_ASSERT(n_new == __kmp_place_num_sockets * __kmp_place_num_cores *
+ __kmp_place_num_threads_per_core);
+
+ nPackages = __kmp_place_num_sockets; // correct nPackages
nCoresPerPkg = __kmp_place_num_cores; // correct nCoresPerPkg
__kmp_nThreadsPerCore = __kmp_place_num_threads_per_core; // correct __kmp_nThreadsPerCore
__kmp_avail_proc = n_new; // correct avail_proc
Modified: openmp/trunk/runtime/src/kmp_csupport.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_csupport.c?rev=249708&r1=249707&r2=249708&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_csupport.c (original)
+++ openmp/trunk/runtime/src/kmp_csupport.c Thu Oct 8 12:55:54 2015
@@ -2855,14 +2855,16 @@ __kmpc_get_parent_taskid() {
} // __kmpc_get_parent_taskid
-void __kmpc_place_threads(int nC, int nT, int nO)
+void __kmpc_place_threads(int nS, int sO, int nC, int cO, int nT)
{
if ( ! __kmp_init_serial ) {
__kmp_serial_initialize();
}
+ __kmp_place_num_sockets = nS;
+ __kmp_place_socket_offset = sO;
__kmp_place_num_cores = nC;
+ __kmp_place_core_offset = cO;
__kmp_place_num_threads_per_core = nT;
- __kmp_place_core_offset = nO;
}
// end of file //
Modified: openmp/trunk/runtime/src/kmp_global.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.c?rev=249708&r1=249707&r2=249708&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.c (original)
+++ openmp/trunk/runtime/src/kmp_global.c Thu Oct 8 12:55:54 2015
@@ -249,9 +249,11 @@ kmp_nested_proc_bind_t __kmp_nested_proc
int __kmp_affinity_num_places = 0;
#endif
+int __kmp_place_num_sockets = 0;
+int __kmp_place_socket_offset = 0;
int __kmp_place_num_cores = 0;
-int __kmp_place_num_threads_per_core = 0;
int __kmp_place_core_offset = 0;
+int __kmp_place_num_threads_per_core = 0;
kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams;
Modified: openmp/trunk/runtime/src/kmp_settings.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_settings.c?rev=249708&r1=249707&r2=249708&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_settings.c (original)
+++ openmp/trunk/runtime/src/kmp_settings.c Thu Oct 8 12:55:54 2015
@@ -4117,127 +4117,220 @@ static void
__kmp_stg_parse_place_threads( char const * name, char const * value, void * data ) {
// Value example: 5Cx2Tx15O
// Which means "use 5 cores with offset 15, 2 threads per core"
-
+ // AC: extended to sockets level:
+ // 2s,6o,2c,2o,2t or 2s,6o,2c,2t,2o
+ // (to not break legacy code core-offset can be last).
+ // Note: not all syntax errors are analyzed, some may be skipped.
+#define CHECK_DELIM(_x) (*(_x) == ',' || *(_x) == '@' || *(_x) == 'x')
int num;
- int prev_delim = 0;
+ int flagS = 0, flagC = 0, flagT = 0;
const char *next = value;
const char *prev;
- SKIP_WS( next );
- if ( *next == '\0' ) {
- return; // leave default values
- }
-
- // Get num_cores first
- if ( *next >= '0' && *next <= '9' ) {
+ SKIP_WS(next); // skip white spaces
+ if (*next == '\0')
+ return; // no data provided, retain default values
+ // Get num_sockets first (or whatever specified)
+ if (*next >= '0' && *next <= '9') {
prev = next;
- SKIP_DIGITS( next );
- num = __kmp_str_to_int( prev, *next );
- SKIP_WS( next );
- if ( *next == 'C' || *next == 'c' ) {
- __kmp_place_num_cores = num;
+ SKIP_DIGITS(next);
+ num = __kmp_str_to_int(prev, *next);
+ SKIP_WS(next);
+ if (*next == 's' || *next == 'S') {
+ __kmp_place_num_sockets = num;
+ flagS = 1; // got num sockets
next++;
- } else if ( *next == ',' || *next == 'x' ) {
+ } else if (*next == 'c' || *next == 'C') {
__kmp_place_num_cores = num;
- prev_delim = 1;
+ flagS = flagC = 1; // sockets were not specified - use default
+ next++;
+ } else if (CHECK_DELIM(next)) {
+ __kmp_place_num_cores = num; // no letter-designator - num cores
+ flagS = flagC = 1; // sockets were not specified - use default
next++;
- } else if ( *next == 'T' || *next == 't' ) {
+ } else if (*next == 't' || *next == 'T') {
__kmp_place_num_threads_per_core = num;
+ // sockets, cores were not specified - use default
return; // we ignore offset value in case all cores are used
- } else if ( *next == '\0' ) {
+ } else if (*next == '\0') {
__kmp_place_num_cores = num;
- return; // the only value provided
+ return; // the only value provided - set num cores
} else {
- KMP_WARNING( AffThrPlaceInvalid, name, value );
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
- } else if ( *next == ',' || *next == 'x' ) {
- // First character is delimiter, skip it, leave num_cores default value
- prev_delim = 2;
- next++;
} else {
- KMP_WARNING( AffThrPlaceInvalid, name, value );
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
- SKIP_WS( next );
- if ( *next == '\0' ) {
+ KMP_DEBUG_ASSERT(flagS); // num sockets should already be set here
+ SKIP_WS(next);
+ if (*next == '\0')
return; // " n " - something like this
- }
- if ( ( *next == ',' || *next == 'x' ) && !prev_delim ) {
- prev_delim = 1;
- next++; // skip delimiter after num_core value
- SKIP_WS( next );
+ if (CHECK_DELIM(next)) {
+ next++; // skip delimiter
+ SKIP_WS(next);
}
- // Get threads_per_core next
- if ( *next >= '0' && *next <= '9' ) {
- prev_delim = 0;
+ // Get second value (could be offset, num_cores, num_threads)
+ if (*next >= '0' && *next <= '9') {
prev = next;
- SKIP_DIGITS( next );
- num = __kmp_str_to_int( prev, *next );
- SKIP_WS( next );
- if ( *next == 'T' || *next == 't' ) {
- __kmp_place_num_threads_per_core = num;
+ SKIP_DIGITS(next);
+ num = __kmp_str_to_int(prev, *next);
+ SKIP_WS(next);
+ if (*next == 'o' || *next == 'O') { // offset specified
+ if (flagC) { // whether num_cores already specified (when sockets skipped)
+ __kmp_place_core_offset = num;
+ } else {
+ __kmp_place_socket_offset = num;
+ }
+ next++;
+ } else if (*next == 'c' || *next == 'C') {
+ KMP_DEBUG_ASSERT(flagC == 0);
+ __kmp_place_num_cores = num;
+ flagC = 1;
next++;
- } else if ( *next == ',' || *next == 'x' ) {
+ } else if (*next == 't' || *next == 'T') {
+ KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
- prev_delim = 1;
+ flagC = 1; // num_cores could be skipped ?
+ flagT = 1;
+ next++; // can have core-offset specified after num threads
+ } else if (*next == '\0') {
+ KMP_DEBUG_ASSERT(flagC); // 4x2 means 4 cores 2 threads per core
+ __kmp_place_num_threads_per_core = num;
+ return; // two values provided without letter-designator
+ } else {
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
+ return;
+ }
+ } else {
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
+ return;
+ }
+ SKIP_WS(next);
+ if (*next == '\0')
+ return; // " Ns,Nc " - something like this
+ if (CHECK_DELIM(next)) {
+ next++; // skip delimiter
+ SKIP_WS(next);
+ }
+
+ // Get third value (could be core-offset, num_cores, num_threads)
+ if (*next >= '0' && *next <= '9') {
+ prev = next;
+ SKIP_DIGITS(next);
+ num = __kmp_str_to_int(prev, *next);
+ SKIP_WS(next);
+ if (*next == 'c' || *next == 'C') {
+ KMP_DEBUG_ASSERT(flagC == 0);
+ __kmp_place_num_cores = num;
+ flagC = 1;
next++;
- } else if ( *next == 'O' || *next == 'o' ) {
+ } else if (*next == 'o' || *next == 'O') {
+ KMP_DEBUG_ASSERT(flagC);
__kmp_place_core_offset = num;
- return; // threads_per_core remains default
- } else if ( *next == '\0' ) {
+ next++;
+ } else if (*next == 't' || *next == 'T') {
+ KMP_DEBUG_ASSERT(flagT == 0);
__kmp_place_num_threads_per_core = num;
- return;
+ if (flagC == 0)
+ return; // num_cores could be skipped (e.g. 2s,4o,2t)
+ flagT = 1;
+ next++; // can have core-offset specified later (e.g. 2s,1c,2t,3o)
} else {
- KMP_WARNING( AffThrPlaceInvalid, name, value );
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
- } else if ( *next == ',' || *next == 'x' ) {
- if ( prev_delim == 2 ) {
- return; // no sense in the only offset value, thus skip the rest
+ } else {
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
+ return;
+ }
+ KMP_DEBUG_ASSERT(flagC);
+ SKIP_WS(next);
+ if ( *next == '\0' )
+ return;
+ if (CHECK_DELIM(next)) {
+ next++; // skip delimiter
+ SKIP_WS(next);
+ }
+
+ // Get 4-th value (could be core-offset, num_threads)
+ if (*next >= '0' && *next <= '9') {
+ prev = next;
+ SKIP_DIGITS(next);
+ num = __kmp_str_to_int(prev, *next);
+ SKIP_WS(next);
+ if (*next == 'o' || *next == 'O') {
+ __kmp_place_core_offset = num;
+ next++;
+ } else if (*next == 't' || *next == 'T') {
+ KMP_DEBUG_ASSERT(flagT == 0);
+ __kmp_place_num_threads_per_core = num;
+ flagT = 1;
+ next++; // can have core-offset specified after num threads
+ } else {
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
+ return;
}
- KMP_DEBUG_ASSERT( prev_delim == 1 );
- next++; // no value for threads_per_core provided
} else {
- KMP_WARNING( AffThrPlaceInvalid, name, value );
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
return;
}
- SKIP_WS( next );
- if ( *next == '\0' ) {
- return; // " nC,mT " - something like this
- }
- if ( ( *next == ',' || *next == 'x' ) && !prev_delim ) {
- prev_delim = 1;
- next++; // skip delimiter after threads_per_core value
- SKIP_WS( next );
+ SKIP_WS(next);
+ if ( *next == '\0' )
+ return;
+ if (CHECK_DELIM(next)) {
+ next++; // skip delimiter
+ SKIP_WS(next);
}
- // Get core offset last if any,
- // don't bother checking syntax after all data obtained
- if ( *next >= '0' && *next <= '9' ) {
+ // Get 5-th value (could be core-offset, num_threads)
+ if (*next >= '0' && *next <= '9') {
prev = next;
- SKIP_DIGITS( next );
- num = __kmp_str_to_int( prev, *next );
- __kmp_place_core_offset = num;
+ SKIP_DIGITS(next);
+ num = __kmp_str_to_int(prev, *next);
+ SKIP_WS(next);
+ if (*next == 'o' || *next == 'O') {
+ KMP_DEBUG_ASSERT(flagT);
+ __kmp_place_core_offset = num;
+ } else if (*next == 't' || *next == 'T') {
+ KMP_DEBUG_ASSERT(flagT == 0);
+ __kmp_place_num_threads_per_core = num;
+ } else {
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
+ }
+ } else {
+ KMP_WARNING(AffThrPlaceInvalid, name, value);
}
+ return;
+#undef CHECK_DELIM
}
static void
__kmp_stg_print_place_threads( kmp_str_buf_t * buffer, char const * name, void * data ) {
- if ( __kmp_place_num_cores + __kmp_place_num_threads_per_core ) {
+ if (__kmp_place_num_sockets + __kmp_place_num_cores + __kmp_place_num_threads_per_core) {
+ int comma = 0;
kmp_str_buf_t buf;
- __kmp_str_buf_init( &buf );
- if( __kmp_env_format ) {
+ __kmp_str_buf_init(&buf);
+ if(__kmp_env_format)
KMP_STR_BUF_PRINT_NAME_EX(name);
- } else {
- __kmp_str_buf_print( buffer, " %s='", name );
+ else
+ __kmp_str_buf_print(buffer, " %s='", name);
+ if (__kmp_place_num_sockets) {
+ __kmp_str_buf_print(&buf, "%ds", __kmp_place_num_sockets);
+ if (__kmp_place_socket_offset)
+ __kmp_str_buf_print(&buf, "@%do", __kmp_place_socket_offset);
+ comma = 1;
}
- __kmp_str_buf_print( &buf, "%dC", __kmp_place_num_cores );
- __kmp_str_buf_print( &buf, "x%dT", __kmp_place_num_threads_per_core );
- if ( __kmp_place_core_offset ) {
- __kmp_str_buf_print( &buf, ",%dO", __kmp_place_core_offset );
+ if (__kmp_place_num_cores) {
+ __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_place_num_cores);
+ if (__kmp_place_core_offset)
+ __kmp_str_buf_print(&buf, "@%do", __kmp_place_core_offset);
+ comma = 1;
}
+ if (__kmp_place_num_threads_per_core)
+ __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_place_num_threads_per_core);
__kmp_str_buf_print(buffer, "%s'\n", buf.str );
__kmp_str_buf_free(&buf);
/*
More information about the Openmp-commits
mailing list