[Openmp-commits] [openmp] r302929 - Clang-format and whitespace cleanup of source code
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Fri May 12 11:01:35 PDT 2017
Author: jlpeyton
Date: Fri May 12 13:01:32 2017
New Revision: 302929
URL: http://llvm.org/viewvc/llvm-project?rev=302929&view=rev
Log:
Clang-format and whitespace cleanup of source code
This patch contains the clang-format and cleanup of the entire code base. Some
of clang-formats changes made the code look worse in places. A best effort was
made to resolve the bulk of these problems, but many remain. Most of the
problems were mangling line-breaks and tabbing of comments.
Patch by Terry Wilmarth
Differential Revision: https://reviews.llvm.org/D32659
Modified:
openmp/trunk/runtime/src/extractExternal.cpp
openmp/trunk/runtime/src/kmp.h
openmp/trunk/runtime/src/kmp_affinity.cpp
openmp/trunk/runtime/src/kmp_affinity.h
openmp/trunk/runtime/src/kmp_alloc.cpp
openmp/trunk/runtime/src/kmp_atomic.cpp
openmp/trunk/runtime/src/kmp_atomic.h
openmp/trunk/runtime/src/kmp_barrier.cpp
openmp/trunk/runtime/src/kmp_cancel.cpp
openmp/trunk/runtime/src/kmp_csupport.cpp
openmp/trunk/runtime/src/kmp_debug.cpp
openmp/trunk/runtime/src/kmp_debug.h
openmp/trunk/runtime/src/kmp_debugger.cpp
openmp/trunk/runtime/src/kmp_debugger.h
openmp/trunk/runtime/src/kmp_dispatch.cpp
openmp/trunk/runtime/src/kmp_environment.cpp
openmp/trunk/runtime/src/kmp_environment.h
openmp/trunk/runtime/src/kmp_error.cpp
openmp/trunk/runtime/src/kmp_error.h
openmp/trunk/runtime/src/kmp_ftn_cdecl.cpp
openmp/trunk/runtime/src/kmp_ftn_entry.h
openmp/trunk/runtime/src/kmp_ftn_extra.cpp
openmp/trunk/runtime/src/kmp_ftn_os.h
openmp/trunk/runtime/src/kmp_ftn_stdcall.cpp
openmp/trunk/runtime/src/kmp_global.cpp
openmp/trunk/runtime/src/kmp_gsupport.cpp
openmp/trunk/runtime/src/kmp_i18n.cpp
openmp/trunk/runtime/src/kmp_i18n.h
openmp/trunk/runtime/src/kmp_import.cpp
openmp/trunk/runtime/src/kmp_io.cpp
openmp/trunk/runtime/src/kmp_io.h
openmp/trunk/runtime/src/kmp_itt.cpp
openmp/trunk/runtime/src/kmp_itt.h
openmp/trunk/runtime/src/kmp_itt.inl
openmp/trunk/runtime/src/kmp_lock.cpp
openmp/trunk/runtime/src/kmp_lock.h
openmp/trunk/runtime/src/kmp_omp.h
openmp/trunk/runtime/src/kmp_os.h
openmp/trunk/runtime/src/kmp_platform.h
openmp/trunk/runtime/src/kmp_runtime.cpp
openmp/trunk/runtime/src/kmp_safe_c_api.h
openmp/trunk/runtime/src/kmp_sched.cpp
openmp/trunk/runtime/src/kmp_settings.cpp
openmp/trunk/runtime/src/kmp_settings.h
openmp/trunk/runtime/src/kmp_stats.cpp
openmp/trunk/runtime/src/kmp_stats.h
openmp/trunk/runtime/src/kmp_stats_timing.cpp
openmp/trunk/runtime/src/kmp_stats_timing.h
openmp/trunk/runtime/src/kmp_str.cpp
openmp/trunk/runtime/src/kmp_str.h
openmp/trunk/runtime/src/kmp_stub.cpp
openmp/trunk/runtime/src/kmp_stub.h
openmp/trunk/runtime/src/kmp_taskdeps.cpp
openmp/trunk/runtime/src/kmp_tasking.cpp
openmp/trunk/runtime/src/kmp_taskq.cpp
openmp/trunk/runtime/src/kmp_threadprivate.cpp
openmp/trunk/runtime/src/kmp_utility.cpp
openmp/trunk/runtime/src/kmp_version.cpp
openmp/trunk/runtime/src/kmp_version.h
openmp/trunk/runtime/src/kmp_wait_release.cpp
openmp/trunk/runtime/src/kmp_wait_release.h
openmp/trunk/runtime/src/kmp_wrapper_getpid.h
openmp/trunk/runtime/src/kmp_wrapper_malloc.h
openmp/trunk/runtime/src/ompt-event-specific.h
openmp/trunk/runtime/src/ompt-general.cpp
openmp/trunk/runtime/src/ompt-internal.h
openmp/trunk/runtime/src/ompt-specific.cpp
openmp/trunk/runtime/src/ompt-specific.h
openmp/trunk/runtime/src/tsan_annotations.cpp
openmp/trunk/runtime/src/tsan_annotations.h
openmp/trunk/runtime/src/z_Linux_asm.s
openmp/trunk/runtime/src/z_Linux_util.cpp
openmp/trunk/runtime/src/z_Windows_NT-586_asm.asm
openmp/trunk/runtime/src/z_Windows_NT-586_util.cpp
openmp/trunk/runtime/src/z_Windows_NT_util.cpp
Modified: openmp/trunk/runtime/src/extractExternal.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/extractExternal.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/extractExternal.cpp (original)
+++ openmp/trunk/runtime/src/extractExternal.cpp Fri May 12 13:01:32 2017
@@ -13,13 +13,13 @@
//===----------------------------------------------------------------------===//
-#include <stdlib.h>
-#include <iostream>
-#include <strstream>
#include <fstream>
-#include <string>
-#include <set>
+#include <iostream>
#include <map>
+#include <set>
+#include <stdlib.h>
+#include <string>
+#include <strstream>
/* Given a set of n object files h ('external' object files) and a set of m
object files o ('internal' object files),
@@ -30,468 +30,457 @@
Usage:
hide.exe <n> <filenames for h> <filenames for o>
- Thus, the prefixed symbols become hidden in the sense that they now have a special
- prefix.
+ Thus, the prefixed symbols become hidden in the sense that they now have a
+ special prefix.
*/
using namespace std;
-void stop(char* errorMsg) {
- printf("%s\n", errorMsg);
- exit(1);
+void stop(char *errorMsg) {
+ printf("%s\n", errorMsg);
+ exit(1);
}
// an entry in the symbol table of a .OBJ file
class Symbol {
public:
- __int64 name;
- unsigned value;
- unsigned short sectionNum, type;
- char storageClass, nAux;
+ __int64 name;
+ unsigned value;
+ unsigned short sectionNum, type;
+ char storageClass, nAux;
};
class _rstream : public istrstream {
private:
- const char *buf;
+ const char *buf;
+
protected:
- _rstream(pair<const char*, streamsize> p):istrstream(p.first,p.second),buf(p.first){}
- ~_rstream() {
- delete[]buf;
- }
+ _rstream(pair<const char *, streamsize> p)
+ : istrstream(p.first, p.second), buf(p.first) {}
+ ~_rstream() { delete[] buf; }
};
-/* A stream encapuslating the content of a file or the content of a string, overriding the
- >> operator to read various integer types in binary form, as well as a symbol table
- entry.
-*/
+// A stream encapuslating the content of a file or the content of a string,
+// overriding the >> operator to read various integer types in binary form,
+// as well as a symbol table entry.
class rstream : public _rstream {
private:
- template<class T>
- inline rstream& doRead(T &x) {
- read((char*)&x, sizeof(T));
- return *this;
- }
- static pair<const char*, streamsize> getBuf(const char *fileName) {
- ifstream raw(fileName,ios::binary | ios::in);
- if(!raw.is_open())
- stop("rstream.getBuf: Error opening file");
- raw.seekg(0,ios::end);
- streampos fileSize = raw.tellg();
- if(fileSize < 0)
- stop("rstream.getBuf: Error reading file");
- char *buf = new char[fileSize];
- raw.seekg(0,ios::beg);
- raw.read(buf, fileSize);
- return pair<const char*, streamsize>(buf,fileSize);
- }
+ template <class T> inline rstream &doRead(T &x) {
+ read((char *)&x, sizeof(T));
+ return *this;
+ }
+ static pair<const char *, streamsize> getBuf(const char *fileName) {
+ ifstream raw(fileName, ios::binary | ios::in);
+ if (!raw.is_open())
+ stop("rstream.getBuf: Error opening file");
+ raw.seekg(0, ios::end);
+ streampos fileSize = raw.tellg();
+ if (fileSize < 0)
+ stop("rstream.getBuf: Error reading file");
+ char *buf = new char[fileSize];
+ raw.seekg(0, ios::beg);
+ raw.read(buf, fileSize);
+ return pair<const char *, streamsize>(buf, fileSize);
+ }
+
public:
- // construct from a string
- rstream(const char *buf,streamsize size):_rstream(pair<const char*,streamsize>(buf, size)){}
- /* construct from a file whole content is fully read once to initialize the content of
- this stream
- */
- rstream(const char *fileName):_rstream(getBuf(fileName)){}
- rstream& operator>>(int &x) {
- return doRead(x);
- }
- rstream& operator>>(unsigned &x) {
- return doRead(x);
- }
- rstream& operator>>(short &x) {
- return doRead(x);
- }
- rstream& operator>>(unsigned short &x) {
- return doRead(x);
- }
- rstream& operator>>(Symbol &e) {
- read((char*)&e, 18);
- return *this;
- }
+ // construct from a string
+ rstream(const char *buf, streamsize size)
+ : _rstream(pair<const char *, streamsize>(buf, size)) {}
+ // construct from a file whole content is fully read once to initialize the
+ // content of this stream
+ rstream(const char *fileName) : _rstream(getBuf(fileName)) {}
+ rstream &operator>>(int &x) { return doRead(x); }
+ rstream &operator>>(unsigned &x) { return doRead(x); }
+ rstream &operator>>(short &x) { return doRead(x); }
+ rstream &operator>>(unsigned short &x) { return doRead(x); }
+ rstream &operator>>(Symbol &e) {
+ read((char *)&e, 18);
+ return *this;
+ }
};
// string table in a .OBJ file
class StringTable {
private:
- map<string, unsigned> directory;
- size_t length;
- char *data;
-
- // make <directory> from <length> bytes in <data>
- void makeDirectory(void) {
- unsigned i = 4;
- while(i < length) {
- string s = string(data + i);
- directory.insert(make_pair(s, i));
- i += s.size() + 1;
- }
- }
- // initialize <length> and <data> with contents specified by the arguments
- void init(const char *_data) {
- unsigned _length = *(unsigned*)_data;
-
- if(_length < sizeof(unsigned) || _length != *(unsigned*)_data)
- stop("StringTable.init: Invalid symbol table");
- if(_data[_length - 1]) {
- // to prevent runaway strings, make sure the data ends with a zero
- data = new char[length = _length + 1];
- data[_length] = 0;
- } else {
- data = new char[length = _length];
- }
- *(unsigned*)data = length;
- KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
- length - sizeof(unsigned));
- makeDirectory();
- }
+ map<string, unsigned> directory;
+ size_t length;
+ char *data;
+
+ // make <directory> from <length> bytes in <data>
+ void makeDirectory(void) {
+ unsigned i = 4;
+ while (i < length) {
+ string s = string(data + i);
+ directory.insert(make_pair(s, i));
+ i += s.size() + 1;
+ }
+ }
+ // initialize <length> and <data> with contents specified by the arguments
+ void init(const char *_data) {
+ unsigned _length = *(unsigned *)_data;
+
+ if (_length < sizeof(unsigned) || _length != *(unsigned *)_data)
+ stop("StringTable.init: Invalid symbol table");
+ if (_data[_length - 1]) {
+ // to prevent runaway strings, make sure the data ends with a zero
+ data = new char[length = _length + 1];
+ data[_length] = 0;
+ } else {
+ data = new char[length = _length];
+ }
+ *(unsigned *)data = length;
+ KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned),
+ length - sizeof(unsigned));
+ makeDirectory();
+ }
+
public:
- StringTable(rstream &f) {
- /* Construct string table by reading from f.
- */
- streampos s;
- unsigned strSize;
- char *strData;
-
- s = f.tellg();
- f>>strSize;
- if(strSize < sizeof(unsigned))
- stop("StringTable: Invalid string table");
- strData = new char[strSize];
- *(unsigned*)strData = strSize;
- // read the raw data into <strData>
- f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
- s = f.tellg() - s;
- if(s < strSize)
- stop("StringTable: Unexpected EOF");
- init(strData);
- delete[]strData;
- }
- StringTable(const set<string> &strings) {
- /* Construct string table from given strings.
- */
- char *p;
- set<string>::const_iterator it;
- size_t s;
-
- // count required size for data
- for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
- size_t l = (*it).size();
-
- if(l > (unsigned) 0xFFFFFFFF)
- stop("StringTable: String too long");
- if(l > 8) {
- length += l + 1;
- if(length > (unsigned) 0xFFFFFFFF)
- stop("StringTable: Symbol table too long");
- }
- }
- data = new char[length];
- *(unsigned*)data = length;
- // populate data and directory
- for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) {
- const string &str = *it;
- size_t l = str.size();
- if(l > 8) {
- directory.insert(make_pair(str, p - data));
- KMP_MEMCPY(p, str.c_str(), l);
- p[l] = 0;
- p += l + 1;
- }
- }
- }
- ~StringTable() {
- delete[] data;
- }
- /* Returns encoding for given string based on this string table.
- Error if string length is greater than 8 but string is not in
- the string table--returns 0.
- */
- __int64 encode(const string &str) {
- __int64 r;
-
- if(str.size() <= 8) {
- // encoded directly
- ((char*)&r)[7] = 0;
- KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8);
- return r;
- } else {
- // represented as index into table
- map<string,unsigned>::const_iterator it = directory.find(str);
- if(it == directory.end())
- stop("StringTable::encode: String now found in string table");
- ((unsigned*)&r)[0] = 0;
- ((unsigned*)&r)[1] = (*it).second;
- return r;
- }
- }
- /* Returns string represented by x based on this string table.
- Error if x references an invalid position in the table--returns
- the empty string.
- */
- string decode(__int64 x) const {
- if(*(unsigned*)&x == 0) {
- // represented as index into table
- unsigned &p = ((unsigned*)&x)[1];
- if(p >= length)
- stop("StringTable::decode: Invalid string table lookup");
- return string(data + p);
- } else {
- // encoded directly
- char *p = (char*)&x;
- int i;
-
- for(i = 0; i < 8 && p[i]; ++i);
- return string(p, i);
- }
- }
- void write(ostream &os) {
- os.write(data, length);
+ StringTable(rstream &f) {
+ // Construct string table by reading from f.
+ streampos s;
+ unsigned strSize;
+ char *strData;
+
+ s = f.tellg();
+ f >> strSize;
+ if (strSize < sizeof(unsigned))
+ stop("StringTable: Invalid string table");
+ strData = new char[strSize];
+ *(unsigned *)strData = strSize;
+ // read the raw data into <strData>
+ f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned));
+ s = f.tellg() - s;
+ if (s < strSize)
+ stop("StringTable: Unexpected EOF");
+ init(strData);
+ delete[] strData;
+ }
+ StringTable(const set<string> &strings) {
+ // Construct string table from given strings.
+ char *p;
+ set<string>::const_iterator it;
+ size_t s;
+
+ // count required size for data
+ for (length = sizeof(unsigned), it = strings.begin(); it != strings.end();
+ ++it) {
+ size_t l = (*it).size();
+
+ if (l > (unsigned)0xFFFFFFFF)
+ stop("StringTable: String too long");
+ if (l > 8) {
+ length += l + 1;
+ if (length > (unsigned)0xFFFFFFFF)
+ stop("StringTable: Symbol table too long");
+ }
+ }
+ data = new char[length];
+ *(unsigned *)data = length;
+ // populate data and directory
+ for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end();
+ ++it) {
+ const string &str = *it;
+ size_t l = str.size();
+ if (l > 8) {
+ directory.insert(make_pair(str, p - data));
+ KMP_MEMCPY(p, str.c_str(), l);
+ p[l] = 0;
+ p += l + 1;
+ }
+ }
+ }
+ ~StringTable() { delete[] data; }
+ // Returns encoding for given string based on this string table. Error if
+ // string length is greater than 8 but string is not in the string table
+ // -- returns 0.
+ __int64 encode(const string &str) {
+ __int64 r;
+
+ if (str.size() <= 8) {
+ // encoded directly
+ ((char *)&r)[7] = 0;
+ KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8);
+ return r;
+ } else {
+ // represented as index into table
+ map<string, unsigned>::const_iterator it = directory.find(str);
+ if (it == directory.end())
+ stop("StringTable::encode: String now found in string table");
+ ((unsigned *)&r)[0] = 0;
+ ((unsigned *)&r)[1] = (*it).second;
+ return r;
+ }
+ }
+ // Returns string represented by x based on this string table. Error if x
+ // references an invalid position in the table--returns the empty string.
+ string decode(__int64 x) const {
+ if (*(unsigned *)&x == 0) {
+ // represented as index into table
+ unsigned &p = ((unsigned *)&x)[1];
+ if (p >= length)
+ stop("StringTable::decode: Invalid string table lookup");
+ return string(data + p);
+ } else {
+ // encoded directly
+ char *p = (char *)&x;
+ int i;
+
+ for (i = 0; i < 8 && p[i]; ++i)
+ ;
+ return string(p, i);
}
+ }
+ void write(ostream &os) { os.write(data, length); }
};
-/* for the named object file, determines the set of defined symbols and the set of undefined external symbols
- and writes them to <defined> and <undefined> respectively
-*/
-void computeExternalSymbols(const char *fileName, set<string> *defined, set<string> *undefined){
- streampos fileSize;
- size_t strTabStart;
- unsigned symTabStart, symNEntries;
- rstream f(fileName);
-
- f.seekg(0,ios::end);
- fileSize = f.tellg();
-
- f.seekg(8);
- f >> symTabStart >> symNEntries;
- // seek to the string table
- f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
- if(f.eof()) {
- printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n",
- fileName, (unsigned long) fileSize, symTabStart, symNEntries);
- stop("computeExternalSymbols: Unexpected EOF 1");
- }
- StringTable stringTable(f); // read the string table
- if(f.tellg() != fileSize)
- stop("computeExternalSymbols: Unexpected data after string table");
-
- f.clear();
- f.seekg(symTabStart); // seek to the symbol table
-
- defined->clear(); undefined->clear();
- for(int i = 0; i < symNEntries; ++i) {
- // process each entry
- Symbol e;
-
- if(f.eof())
- stop("computeExternalSymbols: Unexpected EOF 2");
- f>>e;
- if(f.fail())
- stop("computeExternalSymbols: File read error");
- if(e.nAux) { // auxiliary entry: skip
- f.seekg(e.nAux * 18, ios::cur);
- i += e.nAux;
- }
- // if symbol is extern and defined in the current file, insert it
- if(e.storageClass == 2)
- if(e.sectionNum)
- defined->insert(stringTable.decode(e.name));
- else
- undefined->insert(stringTable.decode(e.name));
- }
+// for the named object file, determines the set of defined symbols and the set
+// of undefined external symbols and writes them to <defined> and <undefined>
+// respectively
+void computeExternalSymbols(const char *fileName, set<string> *defined,
+ set<string> *undefined) {
+ streampos fileSize;
+ size_t strTabStart;
+ unsigned symTabStart, symNEntries;
+ rstream f(fileName);
+
+ f.seekg(0, ios::end);
+ fileSize = f.tellg();
+
+ f.seekg(8);
+ f >> symTabStart >> symNEntries;
+ // seek to the string table
+ f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
+ if (f.eof()) {
+ printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart "
+ "= %u, symNEntries = %u\n",
+ fileName, (unsigned long)fileSize, symTabStart, symNEntries);
+ stop("computeExternalSymbols: Unexpected EOF 1");
+ }
+ StringTable stringTable(f); // read the string table
+ if (f.tellg() != fileSize)
+ stop("computeExternalSymbols: Unexpected data after string table");
+
+ f.clear();
+ f.seekg(symTabStart); // seek to the symbol table
+
+ defined->clear();
+ undefined->clear();
+ for (int i = 0; i < symNEntries; ++i) {
+ // process each entry
+ Symbol e;
+
+ if (f.eof())
+ stop("computeExternalSymbols: Unexpected EOF 2");
+ f >> e;
+ if (f.fail())
+ stop("computeExternalSymbols: File read error");
+ if (e.nAux) { // auxiliary entry: skip
+ f.seekg(e.nAux * 18, ios::cur);
+ i += e.nAux;
+ }
+ // if symbol is extern and defined in the current file, insert it
+ if (e.storageClass == 2)
+ if (e.sectionNum)
+ defined->insert(stringTable.decode(e.name));
+ else
+ undefined->insert(stringTable.decode(e.name));
+ }
}
-/* For each occurrence of an external symbol in the object file named by
- by <fileName> that is a member of <hide>, renames it by prefixing
- with "__kmp_external_", writing back the file in-place
-*/
+// For each occurrence of an external symbol in the object file named by
+// by <fileName> that is a member of <hide>, renames it by prefixing
+// with "__kmp_external_", writing back the file in-place
void hideSymbols(char *fileName, const set<string> &hide) {
- static const string prefix("__kmp_external_");
- set<string> strings; // set of all occurring symbols, appropriately prefixed
- streampos fileSize;
- size_t strTabStart;
- unsigned symTabStart, symNEntries;
- int i;
- rstream in(fileName);
-
- in.seekg(0,ios::end);
- fileSize = in.tellg();
-
- in.seekg(8);
- in >> symTabStart >> symNEntries;
- in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
- if(in.eof())
- stop("hideSymbols: Unexpected EOF");
- StringTable stringTableOld(in); // read original string table
-
- if(in.tellg() != fileSize)
- stop("hideSymbols: Unexpected data after string table");
-
- // compute set of occurring strings with prefix added
- for(i = 0; i < symNEntries; ++i) {
- Symbol e;
-
- in.seekg(symTabStart + i * 18);
- if(in.eof())
- stop("hideSymbols: Unexpected EOF");
- in >> e;
- if(in.fail())
- stop("hideSymbols: File read error");
- if(e.nAux)
- i += e.nAux;
- const string &s = stringTableOld.decode(e.name);
- // if symbol is extern and found in <hide>, prefix and insert into strings,
- // otherwise, just insert into strings without prefix
- strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
- prefix + s : s);
- }
-
- ofstream out(fileName, ios::trunc | ios::out | ios::binary);
- if(!out.is_open())
- stop("hideSymbols: Error opening output file");
-
- // make new string table from string set
- StringTable stringTableNew = StringTable(strings);
-
- // copy input file to output file up to just before the symbol table
- in.seekg(0);
- char *buf = new char[symTabStart];
- in.read(buf, symTabStart);
- out.write(buf, symTabStart);
- delete []buf;
-
- // copy input symbol table to output symbol table with name translation
- for(i = 0; i < symNEntries; ++i) {
- Symbol e;
-
- in.seekg(symTabStart + i*18);
- if(in.eof())
- stop("hideSymbols: Unexpected EOF");
- in >> e;
- if(in.fail())
- stop("hideSymbols: File read error");
- const string &s = stringTableOld.decode(e.name);
- out.seekp(symTabStart + i*18);
- e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ?
- prefix + s : s);
- out.write((char*)&e, 18);
- if(out.fail())
- stop("hideSymbols: File write error");
- if(e.nAux) {
- // copy auxiliary symbol table entries
- int nAux = e.nAux;
- for(int j = 1; j <= nAux; ++j) {
- in >> e;
- out.seekp(symTabStart + (i + j) * 18);
- out.write((char*)&e, 18);
- }
- i += nAux;
- }
- }
- // output string table
- stringTableNew.write(out);
+ static const string prefix("__kmp_external_");
+ set<string> strings; // set of all occurring symbols, appropriately prefixed
+ streampos fileSize;
+ size_t strTabStart;
+ unsigned symTabStart, symNEntries;
+ int i;
+ rstream in(fileName);
+
+ in.seekg(0, ios::end);
+ fileSize = in.tellg();
+
+ in.seekg(8);
+ in >> symTabStart >> symNEntries;
+ in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries);
+ if (in.eof())
+ stop("hideSymbols: Unexpected EOF");
+ StringTable stringTableOld(in); // read original string table
+
+ if (in.tellg() != fileSize)
+ stop("hideSymbols: Unexpected data after string table");
+
+ // compute set of occurring strings with prefix added
+ for (i = 0; i < symNEntries; ++i) {
+ Symbol e;
+
+ in.seekg(symTabStart + i * 18);
+ if (in.eof())
+ stop("hideSymbols: Unexpected EOF");
+ in >> e;
+ if (in.fail())
+ stop("hideSymbols: File read error");
+ if (e.nAux)
+ i += e.nAux;
+ const string &s = stringTableOld.decode(e.name);
+ // if symbol is extern and found in <hide>, prefix and insert into strings,
+ // otherwise, just insert into strings without prefix
+ strings.insert(
+ (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
+ }
+
+ ofstream out(fileName, ios::trunc | ios::out | ios::binary);
+ if (!out.is_open())
+ stop("hideSymbols: Error opening output file");
+
+ // make new string table from string set
+ StringTable stringTableNew = StringTable(strings);
+
+ // copy input file to output file up to just before the symbol table
+ in.seekg(0);
+ char *buf = new char[symTabStart];
+ in.read(buf, symTabStart);
+ out.write(buf, symTabStart);
+ delete[] buf;
+
+ // copy input symbol table to output symbol table with name translation
+ for (i = 0; i < symNEntries; ++i) {
+ Symbol e;
+
+ in.seekg(symTabStart + i * 18);
+ if (in.eof())
+ stop("hideSymbols: Unexpected EOF");
+ in >> e;
+ if (in.fail())
+ stop("hideSymbols: File read error");
+ const string &s = stringTableOld.decode(e.name);
+ out.seekp(symTabStart + i * 18);
+ e.name = stringTableNew.encode(
+ (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s);
+ out.write((char *)&e, 18);
+ if (out.fail())
+ stop("hideSymbols: File write error");
+ if (e.nAux) {
+ // copy auxiliary symbol table entries
+ int nAux = e.nAux;
+ for (int j = 1; j <= nAux; ++j) {
+ in >> e;
+ out.seekp(symTabStart + (i + j) * 18);
+ out.write((char *)&e, 18);
+ }
+ i += nAux;
+ }
+ }
+ // output string table
+ stringTableNew.write(out);
}
// returns true iff <a> and <b> have no common element
-template <class T>
-bool isDisjoint(const set<T> &a, const set<T> &b) {
- set<T>::const_iterator ita, itb;
-
- for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
- const T &ta = *ita, &tb = *itb;
- if(ta < tb)
- ++ita;
- else if (tb < ta)
- ++itb;
- else
- return false;
- }
- return true;
+template <class T> bool isDisjoint(const set<T> &a, const set<T> &b) {
+ set<T>::const_iterator ita, itb;
+
+ for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) {
+ const T &ta = *ita, &tb = *itb;
+ if (ta < tb)
+ ++ita;
+ else if (tb < ta)
+ ++itb;
+ else
+ return false;
+ }
+ return true;
}
-/* precondition: <defined> and <undefined> are arrays with <nTotal> elements where
- <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the external object
- files and the rest correspond to the internal object files.
- postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not
- disjoint. Returns the transitive closure of the set of internal object files, as a set of
- file indexes, under the 'depends on' relation, minus the set of internal object files.
-*/
-set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined, set<string> *undefined) {
- set<int> *required = new set<int>;
- set<int> fresh[2];
- int i, cur = 0;
- bool changed;
-
- for(i = nTotal - 1; i >= nExternal; --i)
- fresh[cur].insert(i);
- do {
- changed = false;
- for(set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) {
- set<string> &s = undefined[*it];
-
- for(i = 0; i < nExternal; ++i) {
- if(required->find(i) == required->end()) {
- if(!isDisjoint(defined[i], s)) {
- // found a new qualifying element
- required->insert(i);
- fresh[1 - cur].insert(i);
- changed = true;
- }
- }
- }
- }
- fresh[cur].clear();
- cur = 1 - cur;
- } while(changed);
- return required;
+// PRE: <defined> and <undefined> are arrays with <nTotal> elements where
+// <nTotal> >= <nExternal>. The first <nExternal> elements correspond to the
+// external object files and the rest correspond to the internal object files.
+// POST: file x is said to depend on file y if undefined[x] and defined[y] are
+// not disjoint. Returns the transitive closure of the set of internal object
+// files, as a set of file indexes, under the 'depends on' relation, minus the
+// set of internal object files.
+set<int> *findRequiredExternal(int nExternal, int nTotal, set<string> *defined,
+ set<string> *undefined) {
+ set<int> *required = new set<int>;
+ set<int> fresh[2];
+ int i, cur = 0;
+ bool changed;
+
+ for (i = nTotal - 1; i >= nExternal; --i)
+ fresh[cur].insert(i);
+ do {
+ changed = false;
+ for (set<int>::iterator it = fresh[cur].begin(); it != fresh[cur].end();
+ ++it) {
+ set<string> &s = undefined[*it];
+
+ for (i = 0; i < nExternal; ++i) {
+ if (required->find(i) == required->end()) {
+ if (!isDisjoint(defined[i], s)) {
+ // found a new qualifying element
+ required->insert(i);
+ fresh[1 - cur].insert(i);
+ changed = true;
+ }
+ }
+ }
+ }
+ fresh[cur].clear();
+ cur = 1 - cur;
+ } while (changed);
+ return required;
}
int main(int argc, char **argv) {
- int nExternal, nInternal, i;
- set<string> *defined, *undefined;
- set<int>::iterator it;
-
- if(argc < 3)
- stop("Please specify a positive integer followed by a list of object filenames");
- nExternal = atoi(argv[1]);
- if(nExternal <= 0)
- stop("Please specify a positive integer followed by a list of object filenames");
- if(nExternal + 2 > argc)
- stop("Too few external objects");
- nInternal = argc - nExternal - 2;
- defined = new set<string>[argc - 2];
- undefined = new set<string>[argc - 2];
-
- // determine the set of defined and undefined external symbols
- for(i = 2; i < argc; ++i)
- computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
-
- // determine the set of required external files
- set<int> *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined);
- set<string> hide;
-
- /* determine the set of symbols to hide--namely defined external symbols of the
- required external files
- */
- for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
- int idx = *it;
- set<string>::iterator it2;
- /* We have to insert one element at a time instead of inserting a range because
- the insert member function taking a range doesn't exist on Windows* OS, at least
- at the time of this writing.
- */
- for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
- hide.insert(*it2);
- }
-
- /* process the external files--removing those that are not required and hiding
- the appropriate symbols in the others
- */
- for(i = 0; i < nExternal; ++i)
- if(requiredExternal->find(i) != requiredExternal->end())
- hideSymbols(argv[2 + i], hide);
- else
- remove(argv[2 + i]);
- // hide the appropriate symbols in the internal files
- for(i = nExternal + 2; i < argc; ++i)
- hideSymbols(argv[i], hide);
- return 0;
+ int nExternal, nInternal, i;
+ set<string> *defined, *undefined;
+ set<int>::iterator it;
+
+ if (argc < 3)
+ stop("Please specify a positive integer followed by a list of object "
+ "filenames");
+ nExternal = atoi(argv[1]);
+ if (nExternal <= 0)
+ stop("Please specify a positive integer followed by a list of object "
+ "filenames");
+ if (nExternal + 2 > argc)
+ stop("Too few external objects");
+ nInternal = argc - nExternal - 2;
+ defined = new set<string>[argc - 2];
+ undefined = new set<string>[argc - 2];
+
+ // determine the set of defined and undefined external symbols
+ for (i = 2; i < argc; ++i)
+ computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2);
+
+ // determine the set of required external files
+ set<int> *requiredExternal =
+ findRequiredExternal(nExternal, argc - 2, defined, undefined);
+ set<string> hide;
+
+ // determine the set of symbols to hide--namely defined external symbols of
+ // the required external files
+ for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) {
+ int idx = *it;
+ set<string>::iterator it2;
+ // We have to insert one element at a time instead of inserting a range
+ // because the insert member function taking a range doesn't exist on
+ // Windows* OS, at least at the time of this writing.
+ for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2)
+ hide.insert(*it2);
+ }
+
+ // process the external files--removing those that are not required and hiding
+ // the appropriate symbols in the others
+ for (i = 0; i < nExternal; ++i)
+ if (requiredExternal->find(i) != requiredExternal->end())
+ hideSymbols(argv[2 + i], hide);
+ else
+ remove(argv[2 + i]);
+ // hide the appropriate symbols in the internal files
+ for (i = nExternal + 2; i < argc; ++i)
+ hideSymbols(argv[i], hide);
+ return 0;
}
Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Fri May 12 13:01:32 2017
@@ -22,36 +22,35 @@
/* #define BUILD_PARALLEL_ORDERED 1 */
/* This fix replaces gettimeofday with clock_gettime for better scalability on
- the Altix. Requires user code to be linked with -lrt.
-*/
+ the Altix. Requires user code to be linked with -lrt. */
//#define FIX_SGI_CLOCK
/* Defines for OpenMP 3.0 tasking and auto scheduling */
-# ifndef KMP_STATIC_STEAL_ENABLED
-# define KMP_STATIC_STEAL_ENABLED 1
-# endif
+#ifndef KMP_STATIC_STEAL_ENABLED
+#define KMP_STATIC_STEAL_ENABLED 1
+#endif
-#define TASK_CURRENT_NOT_QUEUED 0
-#define TASK_CURRENT_QUEUED 1
+#define TASK_CURRENT_NOT_QUEUED 0
+#define TASK_CURRENT_QUEUED 1
#ifdef BUILD_TIED_TASK_STACK
-#define TASK_STACK_EMPTY 0 // entries when the stack is empty
-
-// Used to define TASK_STACK_SIZE and TASK_STACK_MASK
-#define TASK_STACK_BLOCK_BITS 5
-#define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array
-#define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block
+#define TASK_STACK_EMPTY 0 // entries when the stack is empty
+#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK
+// Number of entries in each task stack array
+#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)
+// Mask for determining index into stack block
+#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)
#endif // BUILD_TIED_TASK_STACK
-#define TASK_NOT_PUSHED 1
+#define TASK_NOT_PUSHED 1
#define TASK_SUCCESSFULLY_PUSHED 0
-#define TASK_TIED 1
-#define TASK_UNTIED 0
-#define TASK_EXPLICIT 1
-#define TASK_IMPLICIT 0
-#define TASK_PROXY 1
-#define TASK_FULL 0
+#define TASK_TIED 1
+#define TASK_UNTIED 0
+#define TASK_EXPLICIT 1
+#define TASK_IMPLICIT 0
+#define TASK_PROXY 1
+#define TASK_FULL 0
#define KMP_CANCEL_THREADS
#define KMP_THREAD_ATTR
@@ -62,14 +61,14 @@
#undef KMP_CANCEL_THREADS
#endif
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
-#include <stddef.h>
-#include <stdarg.h>
#include <string.h>
-#include <signal.h>
-/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */
-/* some macros provided below to replace some of these functions */
+/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad
+ Microsoft library. Some macros provided below to replace these functions */
#ifndef __ABSOFT_WIN
#include <sys/types.h>
#endif
@@ -100,9 +99,9 @@ class kmp_stats_list;
#include <xmmintrin.h>
#endif
-#include "kmp_version.h"
#include "kmp_debug.h"
#include "kmp_lock.h"
+#include "kmp_version.h"
#if USE_DEBUGGER
#include "kmp_debugger.h"
#endif
@@ -112,14 +111,14 @@ class kmp_stats_list;
#include "kmp_wrapper_malloc.h"
#if KMP_OS_UNIX
-# include <unistd.h>
-# if !defined NSIG && defined _NSIG
-# define NSIG _NSIG
-# endif
+#include <unistd.h>
+#if !defined NSIG && defined _NSIG
+#define NSIG _NSIG
+#endif
#endif
#if KMP_OS_LINUX
-# pragma weak clock_gettime
+#pragma weak clock_gettime
#endif
#if OMPT_SUPPORT
@@ -128,7 +127,7 @@ class kmp_stats_list;
/*Select data placement in NUMA memory */
#define NO_FIRST_TOUCH 0
-#define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */
+#define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */
/* If not specified on compile command line, assume no first touch */
#ifndef BUILD_MEMORY
@@ -136,27 +135,28 @@ class kmp_stats_list;
#endif
// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
-// 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size.
+// 3 - fast allocation using sync, non-sync free lists of any size, non-self
+// free lists of limited size.
#ifndef USE_FAST_MEMORY
#define USE_FAST_MEMORY 3
#endif
#ifndef KMP_NESTED_HOT_TEAMS
-# define KMP_NESTED_HOT_TEAMS 0
-# define USE_NESTED_HOT_ARG(x)
+#define KMP_NESTED_HOT_TEAMS 0
+#define USE_NESTED_HOT_ARG(x)
+#else
+#if KMP_NESTED_HOT_TEAMS
+#if OMP_40_ENABLED
+#define USE_NESTED_HOT_ARG(x) , x
#else
-# if KMP_NESTED_HOT_TEAMS
-# if OMP_40_ENABLED
-# define USE_NESTED_HOT_ARG(x) ,x
-# else
// Nested hot teams feature depends on omp 4.0, disable it for earlier versions
-# undef KMP_NESTED_HOT_TEAMS
-# define KMP_NESTED_HOT_TEAMS 0
-# define USE_NESTED_HOT_ARG(x)
-# endif
-# else
-# define USE_NESTED_HOT_ARG(x)
-# endif
+#undef KMP_NESTED_HOT_TEAMS
+#define KMP_NESTED_HOT_TEAMS 0
+#define USE_NESTED_HOT_ARG(x)
+#endif
+#else
+#define USE_NESTED_HOT_ARG(x)
+#endif
#endif
// Assume using BGET compare_exchange instruction instead of lock by default.
@@ -177,129 +177,134 @@ class kmp_stats_list;
@{
*/
-// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...)
+// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous
+// enum would do it...)
/*!
Values for bit flags used in the ident_t to describe the fields.
*/
/*! Use trampoline for internal microtasks */
-#define KMP_IDENT_IMB 0x01
+#define KMP_IDENT_IMB 0x01
/*! Use c-style ident structure */
-#define KMP_IDENT_KMPC 0x02
+#define KMP_IDENT_KMPC 0x02
/* 0x04 is no longer used */
/*! Entry point generated by auto-parallelization */
-#define KMP_IDENT_AUTOPAR 0x08
+#define KMP_IDENT_AUTOPAR 0x08
/*! Compiler generates atomic reduction option for kmpc_reduce* */
-#define KMP_IDENT_ATOMIC_REDUCE 0x10
+#define KMP_IDENT_ATOMIC_REDUCE 0x10
/*! To mark a 'barrier' directive in user code */
-#define KMP_IDENT_BARRIER_EXPL 0x20
+#define KMP_IDENT_BARRIER_EXPL 0x20
/*! To Mark implicit barriers. */
-#define KMP_IDENT_BARRIER_IMPL 0x0040
-#define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0
-#define KMP_IDENT_BARRIER_IMPL_FOR 0x0040
-#define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0
+#define KMP_IDENT_BARRIER_IMPL 0x0040
+#define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0
+#define KMP_IDENT_BARRIER_IMPL_FOR 0x0040
+#define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0
-#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140
+#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140
#define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
/*!
* The ident structure that describes a source location.
*/
typedef struct ident {
- kmp_int32 reserved_1; /**< might be used in Fortran; see above */
- kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */
- kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
+ identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
#if USE_ITT_BUILD
- /* but currently used for storing region-specific ITT */
- /* contextual information. */
+/* but currently used for storing region-specific ITT */
+/* contextual information. */
#endif /* USE_ITT_BUILD */
- kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
- char const *psource; /**< String describing the source location.
- The string is composed of semi-colon separated fields which describe the source file,
- the function and a pair of line numbers that delimit the construct.
- */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields
+ which describe the source file, the function and a pair
+ of line numbers that delimit the construct. */
} ident_t;
/*!
@}
*/
// Some forward declarations.
-
-typedef union kmp_team kmp_team_t;
-typedef struct kmp_taskdata kmp_taskdata_t;
-typedef union kmp_task_team kmp_task_team_t;
-typedef union kmp_team kmp_team_p;
-typedef union kmp_info kmp_info_p;
-typedef union kmp_root kmp_root_p;
+typedef union kmp_team kmp_team_t;
+typedef struct kmp_taskdata kmp_taskdata_t;
+typedef union kmp_task_team kmp_task_team_t;
+typedef union kmp_team kmp_team_p;
+typedef union kmp_info kmp_info_p;
+typedef union kmp_root kmp_root_p;
#ifdef __cplusplus
extern "C" {
#endif
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
/* Pack two 32-bit signed integers into a 64-bit signed integer */
/* ToDo: Fix word ordering for big-endian machines. */
-#define KMP_PACK_64(HIGH_32,LOW_32) \
- ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) )
-
+#define KMP_PACK_64(HIGH_32, LOW_32) \
+ ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))
-/*
- * Generic string manipulation macros.
- * Assume that _x is of type char *
- */
-#define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; }
-#define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; }
-#define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; }
+// Generic string manipulation macros. Assume that _x is of type char *
+#define SKIP_WS(_x) \
+ { \
+ while (*(_x) == ' ' || *(_x) == '\t') \
+ (_x)++; \
+ }
+#define SKIP_DIGITS(_x) \
+ { \
+ while (*(_x) >= '0' && *(_x) <= '9') \
+ (_x)++; \
+ }
+#define SKIP_TO(_x, _c) \
+ { \
+ while (*(_x) != '\0' && *(_x) != (_c)) \
+ (_x)++; \
+ }
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) )
-#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) )
+#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-
/* Enumeration types */
enum kmp_state_timer {
- ts_stop,
- ts_start,
- ts_pause,
+ ts_stop,
+ ts_start,
+ ts_pause,
- ts_last_state
+ ts_last_state
};
enum dynamic_mode {
- dynamic_default,
+ dynamic_default,
#ifdef USE_LOAD_BALANCE
- dynamic_load_balance,
+ dynamic_load_balance,
#endif /* USE_LOAD_BALANCE */
- dynamic_random,
- dynamic_thread_limit,
- dynamic_max
+ dynamic_random,
+ dynamic_thread_limit,
+ dynamic_max
};
-/* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */
+/* external schedule constants, duplicate enum omp_sched in omp.h in order to
+ * not include it here */
#ifndef KMP_SCHED_TYPE_DEFINED
#define KMP_SCHED_TYPE_DEFINED
typedef enum kmp_sched {
- kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
- // Note: need to adjust __kmp_sch_map global array in case this enum is changed
- kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
- kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
- kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
- kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
- kmp_sched_upper_std = 5, // upper bound for standard schedules
- kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
- kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
+ kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check
+ // Note: need to adjust __kmp_sch_map global array in case enum is changed
+ kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
+ kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
+ kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
+ kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
+ kmp_sched_upper_std = 5, // upper bound for standard schedules
+ kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules
+ kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)
#if KMP_STATIC_STEAL_ENABLED
- kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
+ kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
#endif
- kmp_sched_upper,
- kmp_sched_default = kmp_sched_static // default scheduling
+ kmp_sched_upper,
+ kmp_sched_default = kmp_sched_static // default scheduling
} kmp_sched_t;
#endif
@@ -308,149 +313,148 @@ typedef enum kmp_sched {
* Describes the loop schedule to be used for a parallel for loop.
*/
enum sched_type {
- kmp_sch_lower = 32, /**< lower bound for unordered values */
- kmp_sch_static_chunked = 33,
- kmp_sch_static = 34, /**< static unspecialized */
- kmp_sch_dynamic_chunked = 35,
- kmp_sch_guided_chunked = 36, /**< guided unspecialized */
- kmp_sch_runtime = 37,
- kmp_sch_auto = 38, /**< auto */
- kmp_sch_trapezoidal = 39,
-
- /* accessible only through KMP_SCHEDULE environment variable */
- kmp_sch_static_greedy = 40,
- kmp_sch_static_balanced = 41,
- /* accessible only through KMP_SCHEDULE environment variable */
- kmp_sch_guided_iterative_chunked = 42,
- kmp_sch_guided_analytical_chunked = 43,
-
- kmp_sch_static_steal = 44, /**< accessible only through KMP_SCHEDULE environment variable */
+ kmp_sch_lower = 32, /**< lower bound for unordered values */
+ kmp_sch_static_chunked = 33,
+ kmp_sch_static = 34, /**< static unspecialized */
+ kmp_sch_dynamic_chunked = 35,
+ kmp_sch_guided_chunked = 36, /**< guided unspecialized */
+ kmp_sch_runtime = 37,
+ kmp_sch_auto = 38, /**< auto */
+ kmp_sch_trapezoidal = 39,
+
+ /* accessible only through KMP_SCHEDULE environment variable */
+ kmp_sch_static_greedy = 40,
+ kmp_sch_static_balanced = 41,
+ /* accessible only through KMP_SCHEDULE environment variable */
+ kmp_sch_guided_iterative_chunked = 42,
+ kmp_sch_guided_analytical_chunked = 43,
+ /* accessible only through KMP_SCHEDULE environment variable */
+ kmp_sch_static_steal = 44,
#if OMP_45_ENABLED
- kmp_sch_static_balanced_chunked = 45, /**< static with chunk adjustment (e.g., simd) */
+ /* static with chunk adjustment (e.g., simd) */
+ kmp_sch_static_balanced_chunked = 45,
#endif
- /* accessible only through KMP_SCHEDULE environment variable */
- kmp_sch_upper = 46, /**< upper bound for unordered values */
+ /* accessible only through KMP_SCHEDULE environment variable */
+ kmp_sch_upper = 46, /**< upper bound for unordered values */
- kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
- kmp_ord_static_chunked = 65,
- kmp_ord_static = 66, /**< ordered static unspecialized */
- kmp_ord_dynamic_chunked = 67,
- kmp_ord_guided_chunked = 68,
- kmp_ord_runtime = 69,
- kmp_ord_auto = 70, /**< ordered auto */
- kmp_ord_trapezoidal = 71,
- kmp_ord_upper = 72, /**< upper bound for ordered values */
-
-#if OMP_40_ENABLED
- /* Schedules for Distribute construct */
- kmp_distribute_static_chunked = 91, /**< distribute static chunked */
- kmp_distribute_static = 92, /**< distribute static unspecialized */
-#endif
-
- /*
- * For the "nomerge" versions, kmp_dispatch_next*() will always return
- * a single iteration/chunk, even if the loop is serialized. For the
- * schedule types listed above, the entire iteration vector is returned
- * if the loop is serialized. This doesn't work for gcc/gcomp sections.
- */
- kmp_nm_lower = 160, /**< lower bound for nomerge values */
-
- kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
- kmp_nm_static = 162, /**< static unspecialized */
- kmp_nm_dynamic_chunked = 163,
- kmp_nm_guided_chunked = 164, /**< guided unspecialized */
- kmp_nm_runtime = 165,
- kmp_nm_auto = 166, /**< auto */
- kmp_nm_trapezoidal = 167,
-
- /* accessible only through KMP_SCHEDULE environment variable */
- kmp_nm_static_greedy = 168,
- kmp_nm_static_balanced = 169,
- /* accessible only through KMP_SCHEDULE environment variable */
- kmp_nm_guided_iterative_chunked = 170,
- kmp_nm_guided_analytical_chunked = 171,
- kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */
-
- kmp_nm_ord_static_chunked = 193,
- kmp_nm_ord_static = 194, /**< ordered static unspecialized */
- kmp_nm_ord_dynamic_chunked = 195,
- kmp_nm_ord_guided_chunked = 196,
- kmp_nm_ord_runtime = 197,
- kmp_nm_ord_auto = 198, /**< auto */
- kmp_nm_ord_trapezoidal = 199,
- kmp_nm_upper = 200, /**< upper bound for nomerge values */
+ kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */
+ kmp_ord_static_chunked = 65,
+ kmp_ord_static = 66, /**< ordered static unspecialized */
+ kmp_ord_dynamic_chunked = 67,
+ kmp_ord_guided_chunked = 68,
+ kmp_ord_runtime = 69,
+ kmp_ord_auto = 70, /**< ordered auto */
+ kmp_ord_trapezoidal = 71,
+ kmp_ord_upper = 72, /**< upper bound for ordered values */
+
+#if OMP_40_ENABLED
+ /* Schedules for Distribute construct */
+ kmp_distribute_static_chunked = 91, /**< distribute static chunked */
+ kmp_distribute_static = 92, /**< distribute static unspecialized */
+#endif
+
+ /* For the "nomerge" versions, kmp_dispatch_next*() will always return a
+ single iteration/chunk, even if the loop is serialized. For the schedule
+ types listed above, the entire iteration vector is returned if the loop is
+ serialized. This doesn't work for gcc/gcomp sections. */
+ kmp_nm_lower = 160, /**< lower bound for nomerge values */
+
+ kmp_nm_static_chunked =
+ (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),
+ kmp_nm_static = 162, /**< static unspecialized */
+ kmp_nm_dynamic_chunked = 163,
+ kmp_nm_guided_chunked = 164, /**< guided unspecialized */
+ kmp_nm_runtime = 165,
+ kmp_nm_auto = 166, /**< auto */
+ kmp_nm_trapezoidal = 167,
+
+ /* accessible only through KMP_SCHEDULE environment variable */
+ kmp_nm_static_greedy = 168,
+ kmp_nm_static_balanced = 169,
+ /* accessible only through KMP_SCHEDULE environment variable */
+ kmp_nm_guided_iterative_chunked = 170,
+ kmp_nm_guided_analytical_chunked = 171,
+ kmp_nm_static_steal =
+ 172, /* accessible only through OMP_SCHEDULE environment variable */
+
+ kmp_nm_ord_static_chunked = 193,
+ kmp_nm_ord_static = 194, /**< ordered static unspecialized */
+ kmp_nm_ord_dynamic_chunked = 195,
+ kmp_nm_ord_guided_chunked = 196,
+ kmp_nm_ord_runtime = 197,
+ kmp_nm_ord_auto = 198, /**< auto */
+ kmp_nm_ord_trapezoidal = 199,
+ kmp_nm_upper = 200, /**< upper bound for nomerge values */
#if OMP_45_ENABLED
- /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
- * Since we need to distinguish the three possible cases (no modifier, monotonic modifier,
- * nonmonotonic modifier), we need separate bits for each modifier.
- * The absence of monotonic does not imply nonmonotonic, especially since 4.5 says
- * that the behaviour of the "no modifier" case is implementation defined in 4.5,
- * but will become "nonmonotonic" in 5.0.
- *
- * Since we're passing a full 32 bit value, we can use a couple of high bits for these
- * flags; out of paranoia we avoid the sign bit.
- *
- * These modifiers can be or-ed into non-static schedules by the compiler to pass
- * the additional information.
- * They will be stripped early in the processing in __kmp_dispatch_init when setting up schedules, so
- * most of the code won't ever see schedules with these bits set.
- */
- kmp_sch_modifier_monotonic = (1<<29), /**< Set if the monotonic schedule modifier was present */
- kmp_sch_modifier_nonmonotonic = (1<<30), /**< Set if the nonmonotonic schedule modifier was present */
-
-# define SCHEDULE_WITHOUT_MODIFIERS(s) (enum sched_type)((s) & ~ (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
-# define SCHEDULE_HAS_MONOTONIC(s) (((s) & kmp_sch_modifier_monotonic) != 0)
-# define SCHEDULE_HAS_NONMONOTONIC(s) (((s) & kmp_sch_modifier_nonmonotonic) != 0)
-# define SCHEDULE_HAS_NO_MODIFIERS(s) (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
-#else
- /* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers can now eliminate tests on compile time
- * constants and dead code that results from them, so we can leave code guarded by such an if in place.
- */
-# define SCHEDULE_WITHOUT_MODIFIERS(s) (s)
-# define SCHEDULE_HAS_MONOTONIC(s) false
-# define SCHEDULE_HAS_NONMONOTONIC(s) false
-# define SCHEDULE_HAS_NO_MODIFIERS(s) true
+ /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since
+ we need to distinguish the three possible cases (no modifier, monotonic
+ modifier, nonmonotonic modifier), we need separate bits for each modifier.
+ The absence of monotonic does not imply nonmonotonic, especially since 4.5
+ says that the behaviour of the "no modifier" case is implementation defined
+ in 4.5, but will become "nonmonotonic" in 5.0.
+
+ Since we're passing a full 32 bit value, we can use a couple of high bits
+ for these flags; out of paranoia we avoid the sign bit.
+
+ These modifiers can be or-ed into non-static schedules by the compiler to
+ pass the additional information. They will be stripped early in the
+ processing in __kmp_dispatch_init when setting up schedules, so most of the
+ code won't ever see schedules with these bits set. */
+ kmp_sch_modifier_monotonic =
+ (1 << 29), /**< Set if the monotonic schedule modifier was present */
+ kmp_sch_modifier_nonmonotonic =
+ (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */
+
+#define SCHEDULE_WITHOUT_MODIFIERS(s) \
+ (enum sched_type)( \
+ (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))
+#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)
+#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
+#define SCHEDULE_HAS_NO_MODIFIERS(s) \
+ (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
+#else
+/* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers
+ can now eliminate tests on compile time constants and dead code that results
+ from them, so we can leave code guarded by such an if in place. */
+#define SCHEDULE_WITHOUT_MODIFIERS(s) (s)
+#define SCHEDULE_HAS_MONOTONIC(s) false
+#define SCHEDULE_HAS_NONMONOTONIC(s) false
+#define SCHEDULE_HAS_NO_MODIFIERS(s) true
#endif
- kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
+ kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
};
/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
typedef struct kmp_r_sched {
- enum sched_type r_sched_type;
- int chunk;
+ enum sched_type r_sched_type;
+ int chunk;
} kmp_r_sched_t;
-extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types
+extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our
+// internal schedule types
enum library_type {
- library_none,
- library_serial,
- library_turnaround,
- library_throughput
+ library_none,
+ library_serial,
+ library_turnaround,
+ library_throughput
};
#if KMP_OS_LINUX
enum clock_function_type {
- clock_function_gettimeofday,
- clock_function_clock_gettime
+ clock_function_gettimeofday,
+ clock_function_clock_gettime
};
#endif /* KMP_OS_LINUX */
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
-enum mic_type {
- non_mic,
- mic1,
- mic2,
- mic3,
- dummy
-};
+enum mic_type { non_mic, mic1, mic2, mic3, dummy };
#endif
-/* ------------------------------------------------------------------------ */
/* -- fast reduction stuff ------------------------------------------------ */
#undef KMP_FAST_REDUCTION_BARRIER
@@ -458,97 +462,94 @@ enum mic_type {
#undef KMP_FAST_REDUCTION_CORE_DUO
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- #define KMP_FAST_REDUCTION_CORE_DUO 1
+#define KMP_FAST_REDUCTION_CORE_DUO 1
#endif
enum _reduction_method {
- reduction_method_not_defined = 0,
- critical_reduce_block = ( 1 << 8 ),
- atomic_reduce_block = ( 2 << 8 ),
- tree_reduce_block = ( 3 << 8 ),
- empty_reduce_block = ( 4 << 8 )
-};
-
-// description of the packed_reduction_method variable
-// the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte:
-// 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier
-// 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction;
-// reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty,
-// so no need to execute a shift instruction while packing/unpacking
+ reduction_method_not_defined = 0,
+ critical_reduce_block = (1 << 8),
+ atomic_reduce_block = (2 << 8),
+ tree_reduce_block = (3 << 8),
+ empty_reduce_block = (4 << 8)
+};
+
+// Description of the packed_reduction_method variable:
+// The packed_reduction_method variable consists of two enum types variables
+// that are packed together into 0-th byte and 1-st byte:
+// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of
+// barrier that will be used in fast reduction: bs_plain_barrier or
+// bs_reduction_barrier
+// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will
+// be used in fast reduction;
+// Reduction method is of 'enum _reduction_method' type and it's defined the way
+// so that the bits of 0-th byte are empty, so no need to execute a shift
+// instruction while packing/unpacking
#if KMP_FAST_REDUCTION_BARRIER
- #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
- ( ( reduction_method ) | ( barrier_type ) )
+#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
+ ((reduction_method) | (barrier_type))
- #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
- ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) )
+#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
+ ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))
- #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
- ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) )
+#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
+ ((enum barrier_type)((packed_reduction_method) & (0x000000FF)))
#else
- #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
- ( reduction_method )
+#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \
+ (reduction_method)
- #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
- ( packed_reduction_method )
+#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
+ (packed_reduction_method)
- #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
- ( bs_plain_barrier )
+#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)
#endif
-#define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \
- ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) )
+#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \
+ ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \
+ (which_reduction_block))
#if KMP_FAST_REDUCTION_BARRIER
- #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
- ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) )
+#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
+ (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))
- #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
- ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) )
+#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
+ (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))
#endif
typedef int PACKED_REDUCTION_METHOD_T;
/* -- end of fast reduction stuff ----------------------------------------- */
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
#if KMP_OS_WINDOWS
-# define USE_CBLKDATA
-# pragma warning( push )
-# pragma warning( disable: 271 310 )
-# include <windows.h>
-# pragma warning( pop )
+#define USE_CBLKDATA
+#pragma warning(push)
+#pragma warning(disable : 271 310)
+#include <windows.h>
+#pragma warning(pop)
#endif
#if KMP_OS_UNIX
-# include <pthread.h>
-# include <dlfcn.h>
+#include <dlfcn.h>
+#include <pthread.h>
#endif
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-/*
- * Only Linux* OS and Windows* OS support thread affinity.
- */
+/* Only Linux* OS and Windows* OS support thread affinity. */
#if KMP_AFFINITY_SUPPORTED
// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
-# if KMP_OS_WINDOWS
-# if _MSC_VER < 1600
+#if KMP_OS_WINDOWS
+#if _MSC_VER < 1600
typedef struct GROUP_AFFINITY {
- KAFFINITY Mask;
- WORD Group;
- WORD Reserved[3];
+ KAFFINITY Mask;
+ WORD Group;
+ WORD Reserved[3];
} GROUP_AFFINITY;
-# endif /* _MSC_VER < 1600 */
-# if KMP_GROUP_AFFINITY
+#endif /* _MSC_VER < 1600 */
+#if KMP_GROUP_AFFINITY
extern int __kmp_num_proc_groups;
-# else
+#else
static const int __kmp_num_proc_groups = 1;
-# endif /* KMP_GROUP_AFFINITY */
+#endif /* KMP_GROUP_AFFINITY */
typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
@@ -558,164 +559,170 @@ extern kmp_GetActiveProcessorGroupCount_
typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
-typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
+typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,
+ GROUP_AFFINITY *);
extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
-# endif /* KMP_OS_WINDOWS */
+#endif /* KMP_OS_WINDOWS */
-# if KMP_USE_HWLOC
+#if KMP_USE_HWLOC
extern hwloc_topology_t __kmp_hwloc_topology;
extern int __kmp_hwloc_error;
-# endif
+#endif
extern size_t __kmp_affin_mask_size;
-# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
-# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
-# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
-# define KMP_CPU_SET_ITERATE(i,mask) \
- for (i = (mask)->begin(); i != (mask)->end() ; i = (mask)->next(i))
-# define KMP_CPU_SET(i,mask) (mask)->set(i)
-# define KMP_CPU_ISSET(i,mask) (mask)->is_set(i)
-# define KMP_CPU_CLR(i,mask) (mask)->clear(i)
-# define KMP_CPU_ZERO(mask) (mask)->zero()
-# define KMP_CPU_COPY(dest, src) (dest)->copy(src)
-# define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
-# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
-# define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
-# define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
-# define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
-# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
-# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
-# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
-# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
-# define KMP_CPU_INDEX(arr,i) __kmp_affinity_dispatch->index_mask_array(arr, i)
-# define KMP_CPU_ALLOC_ARRAY(arr, n) (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
-# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_affinity_dispatch->deallocate_mask_array(arr)
-# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
-# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
-# define __kmp_get_system_affinity(mask, abort_bool) (mask)->get_system_affinity(abort_bool)
-# define __kmp_set_system_affinity(mask, abort_bool) (mask)->set_system_affinity(abort_bool)
-# define __kmp_get_proc_group(mask) (mask)->get_proc_group()
+#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
+#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)
+#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
+#define KMP_CPU_SET_ITERATE(i, mask) \
+ for (i = (mask)->begin(); i != (mask)->end(); i = (mask)->next(i))
+#define KMP_CPU_SET(i, mask) (mask)->set(i)
+#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
+#define KMP_CPU_CLR(i, mask) (mask)->clear(i)
+#define KMP_CPU_ZERO(mask) (mask)->zero()
+#define KMP_CPU_COPY(dest, src) (dest)->copy(src)
+#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
+#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
+#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
+#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
+#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
+#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
+#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
+#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
+#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)
+#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)
+#define KMP_CPU_ALLOC_ARRAY(arr, n) \
+ (arr = __kmp_affinity_dispatch->allocate_mask_array(n))
+#define KMP_CPU_FREE_ARRAY(arr, n) \
+ __kmp_affinity_dispatch->deallocate_mask_array(arr)
+#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)
+#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)
+#define __kmp_get_system_affinity(mask, abort_bool) \
+ (mask)->get_system_affinity(abort_bool)
+#define __kmp_set_system_affinity(mask, abort_bool) \
+ (mask)->set_system_affinity(abort_bool)
+#define __kmp_get_proc_group(mask) (mask)->get_proc_group()
class KMPAffinity {
public:
- class Mask {
- public:
- void* operator new(size_t n);
- void operator delete(void* p);
- void* operator new[](size_t n);
- void operator delete[](void* p);
- virtual ~Mask() {}
- // Set bit i to 1
- virtual void set(int i) {}
- // Return bit i
- virtual bool is_set(int i) const { return false; }
- // Set bit i to 0
- virtual void clear(int i) {}
- // Zero out entire mask
- virtual void zero() {}
- // Copy src into this mask
- virtual void copy(const Mask* src) {}
- // this &= rhs
- virtual void bitwise_and(const Mask* rhs) {}
- // this |= rhs
- virtual void bitwise_or(const Mask* rhs) {}
- // this = ~this
- virtual void bitwise_not() {}
- // API for iterating over an affinity mask
- // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
- virtual int begin() const { return 0; }
- virtual int end() const { return 0; }
- virtual int next(int previous) const { return 0; }
- // Set the system's affinity to this affinity mask's value
- virtual int set_system_affinity(bool abort_on_error) const { return -1; }
- // Set this affinity mask to the current system affinity
- virtual int get_system_affinity(bool abort_on_error) { return -1; }
- // Only 1 DWORD in the mask should have any procs set.
- // Return the appropriate index, or -1 for an invalid mask.
- virtual int get_proc_group() const { return -1; }
- };
- void* operator new(size_t n);
- void operator delete(void* p);
- // Need virtual destructor
- virtual ~KMPAffinity() = default;
- // Determine if affinity is capable
- virtual void determine_capable(const char* env_var) {}
- // Bind the current thread to os proc
- virtual void bind_thread(int proc) {}
- // Factory functions to allocate/deallocate a mask
- virtual Mask* allocate_mask() { return nullptr; }
- virtual void deallocate_mask(Mask* m) { }
- virtual Mask* allocate_mask_array(int num) { return nullptr; }
- virtual void deallocate_mask_array(Mask* m) { }
- virtual Mask* index_mask_array(Mask* m, int index) { return nullptr; }
- static void pick_api();
- static void destroy_api();
- enum api_type {
- NATIVE_OS
+ class Mask {
+ public:
+ void *operator new(size_t n);
+ void operator delete(void *p);
+ void *operator new[](size_t n);
+ void operator delete[](void *p);
+ virtual ~Mask() {}
+ // Set bit i to 1
+ virtual void set(int i) {}
+ // Return bit i
+ virtual bool is_set(int i) const { return false; }
+ // Set bit i to 0
+ virtual void clear(int i) {}
+ // Zero out entire mask
+ virtual void zero() {}
+ // Copy src into this mask
+ virtual void copy(const Mask *src) {}
+ // this &= rhs
+ virtual void bitwise_and(const Mask *rhs) {}
+ // this |= rhs
+ virtual void bitwise_or(const Mask *rhs) {}
+ // this = ~this
+ virtual void bitwise_not() {}
+ // API for iterating over an affinity mask
+ // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
+ virtual int begin() const { return 0; }
+ virtual int end() const { return 0; }
+ virtual int next(int previous) const { return 0; }
+ // Set the system's affinity to this affinity mask's value
+ virtual int set_system_affinity(bool abort_on_error) const { return -1; }
+ // Set this affinity mask to the current system affinity
+ virtual int get_system_affinity(bool abort_on_error) { return -1; }
+ // Only 1 DWORD in the mask should have any procs set.
+ // Return the appropriate index, or -1 for an invalid mask.
+ virtual int get_proc_group() const { return -1; }
+ };
+ void *operator new(size_t n);
+ void operator delete(void *p);
+ // Need virtual destructor
+ virtual ~KMPAffinity() = default;
+ // Determine if affinity is capable
+ virtual void determine_capable(const char *env_var) {}
+ // Bind the current thread to os proc
+ virtual void bind_thread(int proc) {}
+ // Factory functions to allocate/deallocate a mask
+ virtual Mask *allocate_mask() { return nullptr; }
+ virtual void deallocate_mask(Mask *m) {}
+ virtual Mask *allocate_mask_array(int num) { return nullptr; }
+ virtual void deallocate_mask_array(Mask *m) {}
+ virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }
+ static void pick_api();
+ static void destroy_api();
+ enum api_type {
+ NATIVE_OS
#if KMP_USE_HWLOC
- , HWLOC
+ ,
+ HWLOC
#endif
- };
- virtual api_type get_api_type() const { KMP_ASSERT(0); return NATIVE_OS; };
+ };
+ virtual api_type get_api_type() const {
+ KMP_ASSERT(0);
+ return NATIVE_OS;
+ };
+
private:
- static bool picked_api;
+ static bool picked_api;
};
typedef KMPAffinity::Mask kmp_affin_mask_t;
-extern KMPAffinity* __kmp_affinity_dispatch;
+extern KMPAffinity *__kmp_affinity_dispatch;
-//
// Declare local char buffers with this size for printing debug and info
// messages, using __kmp_affinity_print_mask().
-//
-#define KMP_AFFIN_MASK_PRINT_LEN 1024
+#define KMP_AFFIN_MASK_PRINT_LEN 1024
enum affinity_type {
- affinity_none = 0,
- affinity_physical,
- affinity_logical,
- affinity_compact,
- affinity_scatter,
- affinity_explicit,
- affinity_balanced,
- affinity_disabled, // not used outsize the env var parser
- affinity_default
+ affinity_none = 0,
+ affinity_physical,
+ affinity_logical,
+ affinity_compact,
+ affinity_scatter,
+ affinity_explicit,
+ affinity_balanced,
+ affinity_disabled, // not used outsize the env var parser
+ affinity_default
};
enum affinity_gran {
- affinity_gran_fine = 0,
- affinity_gran_thread,
- affinity_gran_core,
- affinity_gran_package,
- affinity_gran_node,
+ affinity_gran_fine = 0,
+ affinity_gran_thread,
+ affinity_gran_core,
+ affinity_gran_package,
+ affinity_gran_node,
#if KMP_GROUP_AFFINITY
- //
- // The "group" granularity isn't necesssarily coarser than all of the
- // other levels, but we put it last in the enum.
- //
- affinity_gran_group,
+ // The "group" granularity isn't necesssarily coarser than all of the
+ // other levels, but we put it last in the enum.
+ affinity_gran_group,
#endif /* KMP_GROUP_AFFINITY */
- affinity_gran_default
+ affinity_gran_default
};
enum affinity_top_method {
- affinity_top_method_all = 0, // try all (supported) methods, in order
+ affinity_top_method_all = 0, // try all (supported) methods, in order
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- affinity_top_method_apicid,
- affinity_top_method_x2apicid,
+ affinity_top_method_apicid,
+ affinity_top_method_x2apicid,
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
+ affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
#if KMP_GROUP_AFFINITY
- affinity_top_method_group,
+ affinity_top_method_group,
#endif /* KMP_GROUP_AFFINITY */
- affinity_top_method_flat,
+ affinity_top_method_flat,
#if KMP_USE_HWLOC
- affinity_top_method_hwloc,
+ affinity_top_method_hwloc,
#endif
- affinity_top_method_default
+ affinity_top_method_default
};
-#define affinity_respect_mask_default (-1)
+#define affinity_respect_mask_default (-1)
extern enum affinity_type __kmp_affinity_type; /* Affinity type */
extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
@@ -726,57 +733,54 @@ extern int __kmp_affinity_compact; /* Af
extern int __kmp_affinity_offset; /* Affinity offset value */
extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
-extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */
-extern char * __kmp_affinity_proclist; /* proc ID list */
+extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask?
+extern char *__kmp_affinity_proclist; /* proc ID list */
extern kmp_affin_mask_t *__kmp_affinity_masks;
extern unsigned __kmp_affinity_num_masks;
extern void __kmp_affinity_bind_thread(int which);
extern kmp_affin_mask_t *__kmp_affin_fullMask;
-extern char const * __kmp_cpuinfo_file;
+extern char const *__kmp_cpuinfo_file;
#endif /* KMP_AFFINITY_SUPPORTED */
#if OMP_40_ENABLED
-//
// This needs to be kept in sync with the values in omp.h !!!
-//
typedef enum kmp_proc_bind_t {
- proc_bind_false = 0,
- proc_bind_true,
- proc_bind_master,
- proc_bind_close,
- proc_bind_spread,
- proc_bind_intel, // use KMP_AFFINITY interface
- proc_bind_default
+ proc_bind_false = 0,
+ proc_bind_true,
+ proc_bind_master,
+ proc_bind_close,
+ proc_bind_spread,
+ proc_bind_intel, // use KMP_AFFINITY interface
+ proc_bind_default
} kmp_proc_bind_t;
typedef struct kmp_nested_proc_bind_t {
- kmp_proc_bind_t *bind_types;
- int size;
- int used;
+ kmp_proc_bind_t *bind_types;
+ int size;
+ int used;
} kmp_nested_proc_bind_t;
extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
#endif /* OMP_40_ENABLED */
-# if KMP_AFFINITY_SUPPORTED
-# define KMP_PLACE_ALL (-1)
-# define KMP_PLACE_UNDEFINED (-2)
-# endif /* KMP_AFFINITY_SUPPORTED */
+#if KMP_AFFINITY_SUPPORTED
+#define KMP_PLACE_ALL (-1)
+#define KMP_PLACE_UNDEFINED (-2)
+#endif /* KMP_AFFINITY_SUPPORTED */
extern int __kmp_affinity_num_places;
-
#if OMP_40_ENABLED
typedef enum kmp_cancel_kind_t {
- cancel_noreq = 0,
- cancel_parallel = 1,
- cancel_loop = 2,
- cancel_sections = 3,
- cancel_taskgroup = 4
+ cancel_noreq = 0,
+ cancel_parallel = 1,
+ cancel_loop = 2,
+ cancel_sections = 3,
+ cancel_taskgroup = 4
} kmp_cancel_kind_t;
#endif // OMP_40_ENABLED
@@ -795,167 +799,176 @@ extern int __kmp_hws_requested;
extern int __kmp_hws_abs_flag; // absolute or per-item number requested
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
+#define KMP_PAD(type, sz) \
+ (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
-//
// We need to avoid using -1 as a GTID as +1 is added to the gtid
// when storing it in a lock, and the value 0 is reserved.
-//
-#define KMP_GTID_DNE (-2) /* Does not exist */
-#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
-#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
-#define KMP_GTID_UNKNOWN (-5) /* Is not known */
-#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
-
-#define __kmp_get_gtid() __kmp_get_global_thread_id()
-#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
-
-#define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
- __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
-
-#define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
-#define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \
- team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid )
-
-#define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team )
-#define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
- __kmp_threads[ (gtid) ]-> th.th_team )
-
-#define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] )
-#define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) )
-
- // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works
- // with registered and not-yet-registered threads.
-#define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \
- (thr)->th.th_info.ds.ds_gtid )
+#define KMP_GTID_DNE (-2) /* Does not exist */
+#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */
+#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */
+#define KMP_GTID_UNKNOWN (-5) /* Is not known */
+#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */
+
+#define __kmp_get_gtid() __kmp_get_global_thread_id()
+#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()
+
+#define __kmp_tid_from_gtid(gtid) \
+ (KMP_DEBUG_ASSERT((gtid) >= 0), __kmp_threads[(gtid)]->th.th_info.ds.ds_tid)
+
+#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))
+#define __kmp_gtid_from_tid(tid, team) \
+ (KMP_DEBUG_ASSERT((tid) >= 0 && (team) != NULL), \
+ team->t.t_threads[(tid)]->th.th_info.ds.ds_gtid)
+
+#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)
+#define __kmp_team_from_gtid(gtid) \
+ (KMP_DEBUG_ASSERT((gtid) >= 0), __kmp_threads[(gtid)]->th.th_team)
+
+#define __kmp_thread_from_gtid(gtid) \
+ (KMP_DEBUG_ASSERT((gtid) >= 0), __kmp_threads[(gtid)])
+#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))
+
+// Returns current thread (pointer to kmp_info_t). In contrast to
+// __kmp_get_thread(), it works with registered and not-yet-registered threads.
+#define __kmp_gtid_from_thread(thr) \
+ (KMP_DEBUG_ASSERT((thr) != NULL), (thr)->th.th_info.ds.ds_gtid)
// AT: Which way is correct?
// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
-#define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc )
+#define __kmp_get_team_num_threads(gtid) \
+ (__kmp_threads[(gtid)]->th.th_team->t.t_nproc)
-
-/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */
-#define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1)))
+#define KMP_UINT64_MAX \
+ (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))
-#define KMP_MIN_NTH 1
+#define KMP_MIN_NTH 1
#ifndef KMP_MAX_NTH
-# if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
-# define KMP_MAX_NTH PTHREAD_THREADS_MAX
-# else
-# define KMP_MAX_NTH INT_MAX
-# endif
+#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX
+#define KMP_MAX_NTH PTHREAD_THREADS_MAX
+#else
+#define KMP_MAX_NTH INT_MAX
+#endif
#endif /* KMP_MAX_NTH */
#ifdef PTHREAD_STACK_MIN
-# define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
+#define KMP_MIN_STKSIZE PTHREAD_STACK_MIN
#else
-# define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
+#define KMP_MIN_STKSIZE ((size_t)(32 * 1024))
#endif
-#define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
+#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
#if KMP_ARCH_X86
-# define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
+#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))
#elif KMP_ARCH_X86_64
-# define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
-# define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
+#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))
+#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))
#else
-# define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
+#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))
#endif
-#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024))
-#define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024))
-#define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
+#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))
+#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))
+#define KMP_MAX_MALLOC_POOL_INCR \
+ (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))
-#define KMP_MIN_STKOFFSET (0)
-#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
+#define KMP_MIN_STKOFFSET (0)
+#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE
#if KMP_OS_DARWIN
-# define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
+#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET
#else
-# define KMP_DEFAULT_STKOFFSET CACHE_LINE
+#define KMP_DEFAULT_STKOFFSET CACHE_LINE
#endif
-#define KMP_MIN_STKPADDING (0)
-#define KMP_MAX_STKPADDING (2 * 1024 * 1024)
+#define KMP_MIN_STKPADDING (0)
+#define KMP_MAX_STKPADDING (2 * 1024 * 1024)
-#define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */
-#define KMP_MIN_BLOCKTIME (0)
-#define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */
-#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */
+#define KMP_BLOCKTIME_MULTIPLIER \
+ (1000) /* number of blocktime units per second */
+#define KMP_MIN_BLOCKTIME (0)
+#define KMP_MAX_BLOCKTIME \
+ (INT_MAX) /* Must be this for "infinite" setting the work */
+#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */
#if KMP_USE_MONITOR
-#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
-#define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */
-#define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */
-
-/* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */
-/* Only allow increasing number of wakeups */
-#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
- ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \
- ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \
- ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \
- (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) )
-
-/* Calculate number of intervals for a specific block time based on monitor_wakeups */
-#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
- ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \
- (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) )
-#else
-# if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
- // HW TSC is used to reduce overhead (clock tick instead of nanosecond).
- extern kmp_uint64 __kmp_ticks_per_msec;
-# if KMP_COMPILER_ICC
-# define KMP_NOW() _rdtsc()
-# else
-# define KMP_NOW() __kmp_hardware_timestamp()
-# endif
-# define KMP_NOW_MSEC() (KMP_NOW()/__kmp_ticks_per_msec)
-# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * __kmp_ticks_per_msec)
-# define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
-# else
- // System time is retrieved sporadically while blocking.
- extern kmp_uint64 __kmp_now_nsec();
-# define KMP_NOW() __kmp_now_nsec()
-# define KMP_NOW_MSEC() (KMP_NOW()/KMP_USEC_PER_SEC)
-# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC)
-# define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
-# endif
-# define KMP_YIELD_NOW() (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) \
- % (__kmp_yield_on_count + __kmp_yield_off_count) < (kmp_uint32)__kmp_yield_on_count)
+#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
+#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second
+#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec
+
+/* Calculate new number of monitor wakeups for a specific block time based on
+ previous monitor_wakeups. Only allow increasing number of wakeups */
+#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
+ (((blocktime) == KMP_MAX_BLOCKTIME) \
+ ? (monitor_wakeups) \
+ : ((blocktime) == KMP_MIN_BLOCKTIME) \
+ ? KMP_MAX_MONITOR_WAKEUPS \
+ : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \
+ ? (monitor_wakeups) \
+ : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
+
+/* Calculate number of intervals for a specific block time based on
+ monitor_wakeups */
+#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
+ (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \
+ (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))
+#else
+#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
+// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
+extern kmp_uint64 __kmp_ticks_per_msec;
+#if KMP_COMPILER_ICC
+#define KMP_NOW() _rdtsc()
+#else
+#define KMP_NOW() __kmp_hardware_timestamp()
+#endif
+#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
+#define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * __kmp_ticks_per_msec)
+#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
+#else
+// System time is retrieved sporadically while blocking.
+extern kmp_uint64 __kmp_now_nsec();
+#define KMP_NOW() __kmp_now_nsec()
+#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
+#define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC)
+#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
+#endif
+#define KMP_YIELD_NOW() \
+ (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) % \
+ (__kmp_yield_on_count + __kmp_yield_off_count) < \
+ (kmp_uint32)__kmp_yield_on_count)
#endif // KMP_USE_MONITOR
-#define KMP_MIN_STATSCOLS 40
-#define KMP_MAX_STATSCOLS 4096
-#define KMP_DEFAULT_STATSCOLS 80
+#define KMP_MIN_STATSCOLS 40
+#define KMP_MAX_STATSCOLS 4096
+#define KMP_DEFAULT_STATSCOLS 80
-#define KMP_MIN_INTERVAL 0
-#define KMP_MAX_INTERVAL (INT_MAX-1)
-#define KMP_DEFAULT_INTERVAL 0
+#define KMP_MIN_INTERVAL 0
+#define KMP_MAX_INTERVAL (INT_MAX - 1)
+#define KMP_DEFAULT_INTERVAL 0
-#define KMP_MIN_CHUNK 1
-#define KMP_MAX_CHUNK (INT_MAX-1)
-#define KMP_DEFAULT_CHUNK 1
+#define KMP_MIN_CHUNK 1
+#define KMP_MAX_CHUNK (INT_MAX - 1)
+#define KMP_DEFAULT_CHUNK 1
-#define KMP_MIN_INIT_WAIT 1
-#define KMP_MAX_INIT_WAIT (INT_MAX/2)
-#define KMP_DEFAULT_INIT_WAIT 2048U
+#define KMP_MIN_INIT_WAIT 1
+#define KMP_MAX_INIT_WAIT (INT_MAX / 2)
+#define KMP_DEFAULT_INIT_WAIT 2048U
-#define KMP_MIN_NEXT_WAIT 1
-#define KMP_MAX_NEXT_WAIT (INT_MAX/2)
-#define KMP_DEFAULT_NEXT_WAIT 1024U
+#define KMP_MIN_NEXT_WAIT 1
+#define KMP_MAX_NEXT_WAIT (INT_MAX / 2)
+#define KMP_DEFAULT_NEXT_WAIT 1024U
-#define KMP_DFLT_DISP_NUM_BUFF 7
-#define KMP_MAX_ORDERED 8
+#define KMP_DFLT_DISP_NUM_BUFF 7
+#define KMP_MAX_ORDERED 8
-#define KMP_MAX_FIELDS 32
+#define KMP_MAX_FIELDS 32
-#define KMP_MAX_BRANCH_BITS 31
+#define KMP_MAX_BRANCH_BITS 31
#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
@@ -963,204 +976,231 @@ extern int __kmp_hws_abs_flag; // absolu
#define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX
-/* Minimum number of threads before switch to TLS gtid (experimentally determined) */
+/* Minimum number of threads before switch to TLS gtid (experimentally
+ determined) */
/* josh TODO: what about OS X* tuning? */
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-# define KMP_TLS_GTID_MIN 5
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#define KMP_TLS_GTID_MIN 5
#else
-# define KMP_TLS_GTID_MIN INT_MAX
+#define KMP_TLS_GTID_MIN INT_MAX
#endif
-#define KMP_MASTER_TID(tid) ( (tid) == 0 )
-#define KMP_WORKER_TID(tid) ( (tid) != 0 )
+#define KMP_MASTER_TID(tid) ((tid) == 0)
+#define KMP_WORKER_TID(tid) ((tid) != 0)
-#define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 )
-#define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 )
-#define KMP_UBER_GTID(gtid) \
- ( \
- KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \
- KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \
- (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \
- (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\
- )
-#define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 )
+#define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0)
+#define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0)
+#define KMP_UBER_GTID(gtid) \
+ (KMP_DEBUG_ASSERT((gtid) >= KMP_GTID_MIN), \
+ KMP_DEBUG_ASSERT((gtid) < __kmp_threads_capacity), \
+ (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \
+ (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread))
+#define KMP_INITIAL_GTID(gtid) ((gtid) == 0)
#ifndef TRUE
-#define FALSE 0
-#define TRUE (! FALSE)
+#define FALSE 0
+#define TRUE (!FALSE)
#endif
/* NOTE: all of the following constants must be even */
#if KMP_OS_WINDOWS
-# define KMP_INIT_WAIT 64U /* initial number of spin-tests */
-# define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
+#define KMP_INIT_WAIT 64U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */
#elif KMP_OS_CNK
-# define KMP_INIT_WAIT 16U /* initial number of spin-tests */
-# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
+#define KMP_INIT_WAIT 16U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */
#elif KMP_OS_LINUX
-# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
-# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
#elif KMP_OS_DARWIN
/* TODO: tune for KMP_OS_DARWIN */
-# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
-# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
#elif KMP_OS_FREEBSD
/* TODO: tune for KMP_OS_FREEBSD */
-# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
-# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
#elif KMP_OS_NETBSD
/* TODO: tune for KMP_OS_NETBSD */
-# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
-# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
typedef struct kmp_cpuid {
- kmp_uint32 eax;
- kmp_uint32 ebx;
- kmp_uint32 ecx;
- kmp_uint32 edx;
+ kmp_uint32 eax;
+ kmp_uint32 ebx;
+ kmp_uint32 ecx;
+ kmp_uint32 edx;
} kmp_cpuid_t;
-extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
-# if KMP_ARCH_X86
- extern void __kmp_x86_pause( void );
-# elif KMP_MIC
- static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); }
-# else
- static void __kmp_x86_pause( void ) { _mm_pause(); }
-# endif
-# define KMP_CPU_PAUSE() __kmp_x86_pause()
+extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);
+#if KMP_ARCH_X86
+extern void __kmp_x86_pause(void);
+#elif KMP_MIC
+static void __kmp_x86_pause(void) { _mm_delay_32(100); }
+#else
+static void __kmp_x86_pause(void) { _mm_pause(); }
+#endif
+#define KMP_CPU_PAUSE() __kmp_x86_pause()
#elif KMP_ARCH_PPC64
-# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")
-# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")
-# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")
-# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0)
+#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")
+#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")
+#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")
+#define KMP_CPU_PAUSE() \
+ do { \
+ KMP_PPC64_PRI_LOW(); \
+ KMP_PPC64_PRI_MED(); \
+ KMP_PPC64_PRI_LOC_MB(); \
+ } while (0)
#else
-# define KMP_CPU_PAUSE() /* nothing to do */
+#define KMP_CPU_PAUSE() /* nothing to do */
#endif
-#define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; }
+#define KMP_INIT_YIELD(count) \
+ { (count) = __kmp_yield_init; }
-#define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); }
-
-// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
-// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd.
-
-#define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \
- if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } }
-#define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \
- if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } }
+#define KMP_YIELD(cond) \
+ { \
+ KMP_CPU_PAUSE(); \
+ __kmp_yield((cond)); \
+ }
+
+// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,
+// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.
+
+#define KMP_YIELD_WHEN(cond, count) \
+ { \
+ KMP_CPU_PAUSE(); \
+ (count) -= 2; \
+ if (!(count)) { \
+ KMP_YIELD(cond); \
+ (count) = __kmp_yield_next; \
+ } \
+ }
+#define KMP_YIELD_SPIN(count) \
+ { \
+ KMP_CPU_PAUSE(); \
+ (count) -= 2; \
+ if (!(count)) { \
+ KMP_YIELD(1); \
+ (count) = __kmp_yield_next; \
+ } \
+ }
/* ------------------------------------------------------------------------ */
/* Support datatypes for the orphaned construct nesting checks. */
/* ------------------------------------------------------------------------ */
enum cons_type {
- ct_none,
- ct_parallel,
- ct_pdo,
- ct_pdo_ordered,
- ct_psections,
- ct_psingle,
-
- /* the following must be left in order and not split up */
- ct_taskq,
- ct_task, /* really task inside non-ordered taskq, considered a worksharing type */
- ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */
- /* the preceding must be left in order and not split up */
-
- ct_critical,
- ct_ordered_in_parallel,
- ct_ordered_in_pdo,
- ct_ordered_in_taskq,
- ct_master,
- ct_reduce,
- ct_barrier
+ ct_none,
+ ct_parallel,
+ ct_pdo,
+ ct_pdo_ordered,
+ ct_psections,
+ ct_psingle,
+
+ /* the following must be left in order and not split up */
+ ct_taskq,
+ ct_task, // really task inside non-ordered taskq, considered worksharing type
+ ct_task_ordered, /* really task inside ordered taskq, considered a worksharing
+ type */
+ /* the preceding must be left in order and not split up */
+
+ ct_critical,
+ ct_ordered_in_parallel,
+ ct_ordered_in_pdo,
+ ct_ordered_in_taskq,
+ ct_master,
+ ct_reduce,
+ ct_barrier
};
/* test to see if we are in a taskq construct */
-# define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) )
-# define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered)
+#define IS_CONS_TYPE_TASKQ(ct) \
+ (((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered))
+#define IS_CONS_TYPE_ORDERED(ct) \
+ ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered)
struct cons_data {
- ident_t const *ident;
- enum cons_type type;
- int prev;
- kmp_user_lock_p name; /* address exclusively for critical section name comparison */
+ ident_t const *ident;
+ enum cons_type type;
+ int prev;
+ kmp_user_lock_p
+ name; /* address exclusively for critical section name comparison */
};
struct cons_header {
- int p_top, w_top, s_top;
- int stack_size, stack_top;
- struct cons_data *stack_data;
+ int p_top, w_top, s_top;
+ int stack_size, stack_top;
+ struct cons_data *stack_data;
};
struct kmp_region_info {
- char *text;
- int offset[KMP_MAX_FIELDS];
- int length[KMP_MAX_FIELDS];
+ char *text;
+ int offset[KMP_MAX_FIELDS];
+ int length[KMP_MAX_FIELDS];
};
-
/* ---------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- */
#if KMP_OS_WINDOWS
- typedef HANDLE kmp_thread_t;
- typedef DWORD kmp_key_t;
+typedef HANDLE kmp_thread_t;
+typedef DWORD kmp_key_t;
#endif /* KMP_OS_WINDOWS */
#if KMP_OS_UNIX
- typedef pthread_t kmp_thread_t;
- typedef pthread_key_t kmp_key_t;
+typedef pthread_t kmp_thread_t;
+typedef pthread_key_t kmp_key_t;
#endif
-extern kmp_key_t __kmp_gtid_threadprivate_key;
+extern kmp_key_t __kmp_gtid_threadprivate_key;
typedef struct kmp_sys_info {
- long maxrss; /* the maximum resident set size utilized (in kilobytes) */
- long minflt; /* the number of page faults serviced without any I/O */
- long majflt; /* the number of page faults serviced that required I/O */
- long nswap; /* the number of times a process was "swapped" out of memory */
- long inblock; /* the number of times the file system had to perform input */
- long oublock; /* the number of times the file system had to perform output */
- long nvcsw; /* the number of times a context switch was voluntarily */
- long nivcsw; /* the number of times a context switch was forced */
+ long maxrss; /* the maximum resident set size utilized (in kilobytes) */
+ long minflt; /* the number of page faults serviced without any I/O */
+ long majflt; /* the number of page faults serviced that required I/O */
+ long nswap; /* the number of times a process was "swapped" out of memory */
+ long inblock; /* the number of times the file system had to perform input */
+ long oublock; /* the number of times the file system had to perform output */
+ long nvcsw; /* the number of times a context switch was voluntarily */
+ long nivcsw; /* the number of times a context switch was forced */
} kmp_sys_info_t;
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
typedef struct kmp_cpuinfo {
- int initialized; // If 0, other fields are not initialized.
- int signature; // CPUID(1).EAX
- int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family )
- int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model)
- int stepping; // CPUID(1).EAX[3:0] ( Stepping )
- int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
- int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
- int cpu_stackoffset;
- int apic_id;
- int physical_id;
- int logical_id;
- kmp_uint64 frequency; // Nominal CPU frequency in Hz.
- char name [3*sizeof (kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
+ int initialized; // If 0, other fields are not initialized.
+ int signature; // CPUID(1).EAX
+ int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)
+ int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended
+ // Model << 4 ) + Model)
+ int stepping; // CPUID(1).EAX[3:0] ( Stepping )
+ int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
+ int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
+ int cpu_stackoffset;
+ int apic_id;
+ int physical_id;
+ int logical_id;
+ kmp_uint64 frequency; // Nominal CPU frequency in Hz.
+ char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)
} kmp_cpuinfo_t;
#endif
#ifdef BUILD_TV
struct tv_threadprivate {
- /* Record type #1 */
- void *global_addr;
- void *thread_addr;
+ /* Record type #1 */
+ void *global_addr;
+ void *thread_addr;
};
struct tv_data {
- struct tv_data *next;
- void *type;
- union tv_union {
- struct tv_threadprivate tp;
- } u;
+ struct tv_data *next;
+ void *type;
+ union tv_union {
+ struct tv_threadprivate tp;
+ } u;
};
extern kmp_key_t __kmp_tv_key;
@@ -1170,137 +1210,168 @@ extern kmp_key_t __kmp_tv_key;
/* ------------------------------------------------------------------------ */
#if USE_ITT_BUILD
-// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here.
-// Later we will check the type meets requirements.
+// We cannot include "kmp_itt.h" due to circular dependency. Declare the only
+// required type here. Later we will check the type meets requirements.
typedef int kmp_itt_mark_t;
#define KMP_ITT_DEBUG 0
#endif /* USE_ITT_BUILD */
-/* ------------------------------------------------------------------------ */
-
-/*
- * Taskq data structures
- */
+/* Taskq data structures */
-#define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4)
-#define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */
+#define HIGH_WATER_MARK(nslots) (((nslots)*3) / 4)
+// num thunks that each thread can simultaneously execute from a task queue
+#define __KMP_TASKQ_THUNKS_PER_TH 1
+
+/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t
+ th_flags */
+
+#define TQF_IS_ORDERED 0x0001 // __kmpc_taskq interface, taskq ordered
+// __kmpc_taskq interface, taskq with lastprivate list
+#define TQF_IS_LASTPRIVATE 0x0002
+#define TQF_IS_NOWAIT 0x0004 // __kmpc_taskq interface, end taskq nowait
+// __kmpc_taskq interface, use heuristics to decide task queue size
+#define TQF_HEURISTICS 0x0008
+
+// __kmpc_taskq interface, reserved for future use
+#define TQF_INTERFACE_RESERVED1 0x0010
+// __kmpc_taskq interface, reserved for future use
+#define TQF_INTERFACE_RESERVED2 0x0020
+// __kmpc_taskq interface, reserved for future use
+#define TQF_INTERFACE_RESERVED3 0x0040
+// __kmpc_taskq interface, reserved for future use
+#define TQF_INTERFACE_RESERVED4 0x0080
+
+#define TQF_INTERFACE_FLAGS 0x00ff // all the __kmpc_taskq interface flags
+// internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE
+#define TQF_IS_LAST_TASK 0x0100
+// internal use only; this thunk->th_task is the taskq_task
+#define TQF_TASKQ_TASK 0x0200
+// internal use only; must release worker threads once ANY queued task
+// exists (global)
+#define TQF_RELEASE_WORKERS 0x0400
+// internal use only; notify workers that master has finished enqueuing tasks
+#define TQF_ALL_TASKS_QUEUED 0x0800
+// internal use only: this queue encountered in parallel context: not serialized
+#define TQF_PARALLEL_CONTEXT 0x1000
+// internal use only; this queue is on the freelist and not in use
+#define TQF_DEALLOCATED 0x2000
-/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */
-
-#define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */
-#define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */
-#define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */
-#define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */
-#define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */
-#define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */
-#define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */
-#define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */
-
-#define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */
-
-#define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */
-#define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */
-#define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */
-#define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */
-#define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */
-#define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */
-
-#define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */
+#define TQF_INTERNAL_FLAGS 0x3f00 // all the internal use only flags
typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t {
- kmp_int32 ai_data;
+ kmp_int32 ai_data;
} kmpc_aligned_int32_t;
typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t {
- struct kmpc_thunk_t *qs_thunk;
+ struct kmpc_thunk_t *qs_thunk;
} kmpc_aligned_queue_slot_t;
typedef struct kmpc_task_queue_t {
- /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */
- kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */
- union {
- struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */
- struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */
- } tq;
- volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */
- struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */
- struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */
- volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */
- /* (other than the thread executing the kmpc_end_taskq call) */
- /* locked by parent tq's tq_link_lck */
-
- /* shared data for task queue */
- struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */
- /* only one array element exists for all but outermost taskq */
-
- /* bookkeeping for ordered task queue */
- kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */
- volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */
-
- /* thunk storage management for task queue */
- kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */
- struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */
- struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */
-
- /* data fields for queue itself */
- kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */
- kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */
- volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */
- kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */
- kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */
- kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */
- volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */
- kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */
- volatile kmp_int32 tq_flags; /* TQF_xxx */
-
- /* bookkeeping for outstanding thunks */
- struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */
- kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */
+ /* task queue linkage fields for n-ary tree of queues (locked with global
+ taskq_tree_lck) */
+ kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and
+ child ref counts */
+ union {
+ struct kmpc_task_queue_t *tq_parent; // pointer to parent taskq, not locked
+ // for taskq internal freelists, locked with global taskq_freelist_lck
+ struct kmpc_task_queue_t *tq_next_free;
+ } tq;
+ // pointer to linked-list of children, locked by tq's tq_link_lck
+ volatile struct kmpc_task_queue_t *tq_first_child;
+ // next child in linked-list, locked by parent tq's tq_link_lck
+ struct kmpc_task_queue_t *tq_next_child;
+ // previous child in linked-list, locked by parent tq's tq_link_lck
+ struct kmpc_task_queue_t *tq_prev_child;
+ // reference count of threads with access to this task queue
+ volatile kmp_int32 tq_ref_count;
+ /* (other than the thread executing the kmpc_end_taskq call) */
+ /* locked by parent tq's tq_link_lck */
+
+ /* shared data for task queue */
+ /* per-thread array of pointers to shared variable structures */
+ struct kmpc_aligned_shared_vars_t *tq_shareds;
+ /* only one array element exists for all but outermost taskq */
+
+ /* bookkeeping for ordered task queue */
+ kmp_uint32 tq_tasknum_queuing; // ordered task # assigned while queuing tasks
+ // ordered number of next task to be served (executed)
+ volatile kmp_uint32 tq_tasknum_serving;
+
+ /* thunk storage management for task queue */
+ kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */
+ // thunk freelist, chained via th.th_next_free
+ struct kmpc_thunk_t *tq_free_thunks;
+ // space allocated for thunks for this task queue
+ struct kmpc_thunk_t *tq_thunk_space;
+
+ /* data fields for queue itself */
+ kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue,
+ tq_head, tq_tail, tq_nfull */
+ /* array of queue slots to hold thunks for tasks */
+ kmpc_aligned_queue_slot_t *tq_queue;
+ volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task
+ thunk, occupied if not NULL */
+ kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl.
+ tq_taskq_slot space) */
+ kmp_int32 tq_head; // enqueue puts item here (index into tq_queue array)
+ kmp_int32 tq_tail; // dequeue takes item from here (index into tq_queue array)
+ volatile kmp_int32 tq_nfull; // # of occupied entries in task queue right now
+ kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */
+ volatile kmp_int32 tq_flags; /* TQF_xxx */
+
+ /* bookkeeping for outstanding thunks */
+
+ /* per-thread array for # of regular thunks currently being executed */
+ struct kmpc_aligned_int32_t *tq_th_thunks;
+ kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */
- /* statistics library bookkeeping */
- ident_t *tq_loc; /* source location information for taskq directive */
+ /* statistics library bookkeeping */
+ ident_t *tq_loc; /* source location information for taskq directive */
} kmpc_task_queue_t;
-typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk);
+typedef void (*kmpc_task_t)(kmp_int32 global_tid, struct kmpc_thunk_t *thunk);
/* sizeof_shareds passed as arg to __kmpc_taskq call */
-typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */
- kmpc_task_queue_t *sv_queue;
- /* (pointers to) shared vars */
+typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */
+ kmpc_task_queue_t *sv_queue; /* (pointers to) shared vars */
} kmpc_shared_vars_t;
typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t {
- volatile struct kmpc_shared_vars_t *ai_data;
+ volatile struct kmpc_shared_vars_t *ai_data;
} kmpc_aligned_shared_vars_t;
-/* sizeof_thunk passed as arg to kmpc_taskq call */
-typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */
- union { /* field used for internal freelists too */
- kmpc_shared_vars_t *th_shareds;
- struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */
- } th;
- kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */
- struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */
- kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */
- kmp_int32 th_status;
- kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */
- /* private vars */
+/* sizeof_thunk passed as arg to kmpc_taskq call */
+typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */
+ union { /* field used for internal freelists too */
+ kmpc_shared_vars_t *th_shareds;
+ struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within
+ queue, head at tq_free_thunks */
+ } th;
+ kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */
+ struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk
+ on this thread's call stack */
+ // TQF_xxx(tq_flags interface plus possible internal flags)
+ kmp_int32 th_flags;
+
+ kmp_int32 th_status;
+ kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for
+ ordered sections */
+ /* private vars */
} kmpc_thunk_t;
typedef struct KMP_ALIGN_CACHE kmp_taskq {
- int tq_curr_thunk_capacity;
+ int tq_curr_thunk_capacity;
- kmpc_task_queue_t *tq_root;
- kmp_int32 tq_global_flags;
+ kmpc_task_queue_t *tq_root;
+ kmp_int32 tq_global_flags;
- kmp_lock_t tq_freelist_lck;
- kmpc_task_queue_t *tq_freelist;
+ kmp_lock_t tq_freelist_lck;
+ kmpc_task_queue_t *tq_freelist;
- kmpc_thunk_t **tq_curr_thunk;
+ kmpc_thunk_t **tq_curr_thunk;
} kmp_taskq_t;
/* END Taskq data structures */
-/* --------------------------------------------------------------------------- */
typedef kmp_int32 kmp_critical_name[8];
@@ -1308,18 +1379,21 @@ typedef kmp_int32 kmp_critical_name[8];
@ingroup PARALLEL
The type for a microtask which gets passed to @ref __kmpc_fork_call().
The arguments to the outlined function are
- at param global_tid the global thread identity of the thread executing the function.
+ at param global_tid the global thread identity of the thread executing the
+function.
@param bound_tid the local identitiy of the thread executing the function
@param ... pointers to shared variables accessed by the function.
*/
-typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... );
-typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... );
+typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);
+typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,
+ ...);
/*!
@ingroup THREADPRIVATE
@{
*/
-/* --------------------------------------------------------------------------- */
+/* ---------------------------------------------------------------------------
+ */
/* Threadprivate initialization/finalization function declarations */
/* for non-array objects: __kmpc_threadprivate_register() */
@@ -1328,487 +1402,505 @@ typedef void (*kmpc_micro_bound)
Pointer to the constructor function.
The first argument is the <tt>this</tt> pointer
*/
-typedef void *(*kmpc_ctor) (void *);
+typedef void *(*kmpc_ctor)(void *);
/*!
Pointer to the destructor function.
The first argument is the <tt>this</tt> pointer
*/
-typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */
+typedef void (*kmpc_dtor)(
+ void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel
+ compiler */
/*!
Pointer to an alternate constructor.
The first argument is the <tt>this</tt> pointer.
*/
-typedef void *(*kmpc_cctor) (void *, void *);
+typedef void *(*kmpc_cctor)(void *, void *);
-/* for array objects: __kmpc_threadprivate_register_vec() */
- /* First arg: "this" pointer */
- /* Last arg: number of array elements */
+/* for array objects: __kmpc_threadprivate_register_vec() */
+/* First arg: "this" pointer */
+/* Last arg: number of array elements */
/*!
Array constructor.
First argument is the <tt>this</tt> pointer
Second argument the number of array elements.
*/
-typedef void *(*kmpc_ctor_vec) (void *, size_t);
+typedef void *(*kmpc_ctor_vec)(void *, size_t);
/*!
Pointer to the array destructor function.
The first argument is the <tt>this</tt> pointer
Second argument the number of array elements.
*/
-typedef void (*kmpc_dtor_vec) (void *, size_t);
+typedef void (*kmpc_dtor_vec)(void *, size_t);
/*!
Array constructor.
First argument is the <tt>this</tt> pointer
Third argument the number of array elements.
*/
-typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */
+typedef void *(*kmpc_cctor_vec)(void *, void *,
+ size_t); /* function unused by compiler */
/*!
@}
*/
-
-/* ------------------------------------------------------------------------ */
-
/* keeps tracked of threadprivate cache allocations for cleanup later */
typedef struct kmp_cached_addr {
- void **addr; /* address of allocated cache */
- struct kmp_cached_addr *next; /* pointer to next cached address */
+ void **addr; /* address of allocated cache */
+ struct kmp_cached_addr *next; /* pointer to next cached address */
} kmp_cached_addr_t;
struct private_data {
- struct private_data *next; /* The next descriptor in the list */
- void *data; /* The data buffer for this descriptor */
- int more; /* The repeat count for this descriptor */
- size_t size; /* The data size for this descriptor */
+ struct private_data *next; /* The next descriptor in the list */
+ void *data; /* The data buffer for this descriptor */
+ int more; /* The repeat count for this descriptor */
+ size_t size; /* The data size for this descriptor */
};
struct private_common {
- struct private_common *next;
- struct private_common *link;
- void *gbl_addr;
- void *par_addr; /* par_addr == gbl_addr for MASTER thread */
- size_t cmn_size;
-};
-
-struct shared_common
-{
- struct shared_common *next;
- struct private_data *pod_init;
- void *obj_init;
- void *gbl_addr;
- union {
- kmpc_ctor ctor;
- kmpc_ctor_vec ctorv;
- } ct;
- union {
- kmpc_cctor cctor;
- kmpc_cctor_vec cctorv;
- } cct;
- union {
- kmpc_dtor dtor;
- kmpc_dtor_vec dtorv;
- } dt;
- size_t vec_len;
- int is_vec;
- size_t cmn_size;
-};
-
-#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
-#define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
-#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
-#define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1))
+ struct private_common *next;
+ struct private_common *link;
+ void *gbl_addr;
+ void *par_addr; /* par_addr == gbl_addr for MASTER thread */
+ size_t cmn_size;
+};
+
+struct shared_common {
+ struct shared_common *next;
+ struct private_data *pod_init;
+ void *obj_init;
+ void *gbl_addr;
+ union {
+ kmpc_ctor ctor;
+ kmpc_ctor_vec ctorv;
+ } ct;
+ union {
+ kmpc_cctor cctor;
+ kmpc_cctor_vec cctorv;
+ } cct;
+ union {
+ kmpc_dtor dtor;
+ kmpc_dtor_vec dtorv;
+ } dt;
+ size_t vec_len;
+ int is_vec;
+ size_t cmn_size;
+};
+
+#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */
+#define KMP_HASH_TABLE_SIZE \
+ (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */
+#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */
+#define KMP_HASH(x) \
+ ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))
struct common_table {
- struct private_common *data[ KMP_HASH_TABLE_SIZE ];
+ struct private_common *data[KMP_HASH_TABLE_SIZE];
};
struct shared_table {
- struct shared_common *data[ KMP_HASH_TABLE_SIZE ];
+ struct shared_common *data[KMP_HASH_TABLE_SIZE];
};
-/* ------------------------------------------------------------------------ */
+
/* ------------------------------------------------------------------------ */
#if KMP_STATIC_STEAL_ENABLED
typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
- kmp_int32 count;
- kmp_int32 ub;
- /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
- kmp_int32 lb;
- kmp_int32 st;
- kmp_int32 tc;
- kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */
-
- // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
- // a) parm3 is properly aligned and
- // b) all parm1-4 are in the same cache line.
- // Because of parm1-4 are used together, performance seems to be better
- // if they are in the same line (not measured though).
-
- struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template
- kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
- kmp_int32 parm2; // make no real change at least while padding is off.
- kmp_int32 parm3;
- kmp_int32 parm4;
- };
+ kmp_int32 count;
+ kmp_int32 ub;
+ /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
+ kmp_int32 lb;
+ kmp_int32 st;
+ kmp_int32 tc;
+ kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
+ after ub */
+
+ // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
+ // a) parm3 is properly aligned and
+ // b) all parm1-4 are in the same cache line.
+ // Because of parm1-4 are used together, performance seems to be better
+ // if they are in the same line (not measured though).
+
+ struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
+ kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
+ kmp_int32 parm2; // make no real change at least while padding is off.
+ kmp_int32 parm3;
+ kmp_int32 parm4;
+ };
- kmp_uint32 ordered_lower;
- kmp_uint32 ordered_upper;
+ kmp_uint32 ordered_lower;
+ kmp_uint32 ordered_upper;
#if KMP_OS_WINDOWS
- // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
- // It would be nice to measure execution times.
- // Conditional if/endif can be removed at all.
- kmp_int32 last_upper;
+// This var can be placed in the hole between 'tc' and 'parm1', instead of
+// 'static_steal_counter'. It would be nice to measure execution times.
+// Conditional if/endif can be removed at all.
+ kmp_int32 last_upper;
#endif /* KMP_OS_WINDOWS */
} dispatch_private_info32_t;
typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
- kmp_int64 count; /* current chunk number for static and static-steal scheduling*/
- kmp_int64 ub; /* upper-bound */
- /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
- kmp_int64 lb; /* lower-bound */
- kmp_int64 st; /* stride */
- kmp_int64 tc; /* trip count (number of iterations) */
- kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */
-
- /* parm[1-4] are used in different ways by different scheduling algorithms */
-
- // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
- // a) parm3 is properly aligned and
- // b) all parm1-4 are in the same cache line.
- // Because of parm1-4 are used together, performance seems to be better
- // if they are in the same line (not measured though).
-
- struct KMP_ALIGN( 32 ) {
- kmp_int64 parm1;
- kmp_int64 parm2;
- kmp_int64 parm3;
- kmp_int64 parm4;
- };
+ kmp_int64 count; // current chunk number for static & static-steal scheduling
+ kmp_int64 ub; /* upper-bound */
+ /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
+ kmp_int64 lb; /* lower-bound */
+ kmp_int64 st; /* stride */
+ kmp_int64 tc; /* trip count (number of iterations) */
+ kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
+ after ub */
+
+ /* parm[1-4] are used in different ways by different scheduling algorithms */
+
+ // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
+ // a) parm3 is properly aligned and
+ // b) all parm1-4 are in the same cache line.
+ // Because of parm1-4 are used together, performance seems to be better
+ // if they are in the same line (not measured though).
+
+ struct KMP_ALIGN(32) {
+ kmp_int64 parm1;
+ kmp_int64 parm2;
+ kmp_int64 parm3;
+ kmp_int64 parm4;
+ };
- kmp_uint64 ordered_lower;
- kmp_uint64 ordered_upper;
+ kmp_uint64 ordered_lower;
+ kmp_uint64 ordered_upper;
#if KMP_OS_WINDOWS
- // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
- // It would be nice to measure execution times.
- // Conditional if/endif can be removed at all.
- kmp_int64 last_upper;
+// This var can be placed in the hole between 'tc' and 'parm1', instead of
+// 'static_steal_counter'. It would be nice to measure execution times.
+// Conditional if/endif can be removed at all.
+ kmp_int64 last_upper;
#endif /* KMP_OS_WINDOWS */
} dispatch_private_info64_t;
#else /* KMP_STATIC_STEAL_ENABLED */
typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
- kmp_int32 lb;
- kmp_int32 ub;
- kmp_int32 st;
- kmp_int32 tc;
+ kmp_int32 lb;
+ kmp_int32 ub;
+ kmp_int32 st;
+ kmp_int32 tc;
+
+ kmp_int32 parm1;
+ kmp_int32 parm2;
+ kmp_int32 parm3;
+ kmp_int32 parm4;
- kmp_int32 parm1;
- kmp_int32 parm2;
- kmp_int32 parm3;
- kmp_int32 parm4;
-
- kmp_int32 count;
+ kmp_int32 count;
- kmp_uint32 ordered_lower;
- kmp_uint32 ordered_upper;
+ kmp_uint32 ordered_lower;
+ kmp_uint32 ordered_upper;
#if KMP_OS_WINDOWS
- kmp_int32 last_upper;
+ kmp_int32 last_upper;
#endif /* KMP_OS_WINDOWS */
} dispatch_private_info32_t;
typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
- kmp_int64 lb; /* lower-bound */
- kmp_int64 ub; /* upper-bound */
- kmp_int64 st; /* stride */
- kmp_int64 tc; /* trip count (number of iterations) */
-
- /* parm[1-4] are used in different ways by different scheduling algorithms */
- kmp_int64 parm1;
- kmp_int64 parm2;
- kmp_int64 parm3;
- kmp_int64 parm4;
+ kmp_int64 lb; /* lower-bound */
+ kmp_int64 ub; /* upper-bound */
+ kmp_int64 st; /* stride */
+ kmp_int64 tc; /* trip count (number of iterations) */
+
+ /* parm[1-4] are used in different ways by different scheduling algorithms */
+ kmp_int64 parm1;
+ kmp_int64 parm2;
+ kmp_int64 parm3;
+ kmp_int64 parm4;
- kmp_int64 count; /* current chunk number for static scheduling */
+ kmp_int64 count; /* current chunk number for static scheduling */
- kmp_uint64 ordered_lower;
- kmp_uint64 ordered_upper;
+ kmp_uint64 ordered_lower;
+ kmp_uint64 ordered_upper;
#if KMP_OS_WINDOWS
- kmp_int64 last_upper;
+ kmp_int64 last_upper;
#endif /* KMP_OS_WINDOWS */
} dispatch_private_info64_t;
#endif /* KMP_STATIC_STEAL_ENABLED */
typedef struct KMP_ALIGN_CACHE dispatch_private_info {
- union private_info {
- dispatch_private_info32_t p32;
- dispatch_private_info64_t p64;
- } u;
- enum sched_type schedule; /* scheduling algorithm */
- kmp_int32 ordered; /* ordered clause specified */
- kmp_int32 ordered_bumped;
- kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
- struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */
- kmp_int32 nomerge; /* don't merge iters if serialized */
- kmp_int32 type_size; /* the size of types in private_info */
- enum cons_type pushed_ws;
+ union private_info {
+ dispatch_private_info32_t p32;
+ dispatch_private_info64_t p64;
+ } u;
+ enum sched_type schedule; /* scheduling algorithm */
+ kmp_int32 ordered; /* ordered clause specified */
+ kmp_int32 ordered_bumped;
+ // To retain the structure size after making ordered_iteration scalar
+ kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
+ // Stack of buffers for nest of serial regions
+ struct dispatch_private_info *next;
+ kmp_int32 nomerge; /* don't merge iters if serialized */
+ kmp_int32 type_size; /* the size of types in private_info */
+ enum cons_type pushed_ws;
} dispatch_private_info_t;
typedef struct dispatch_shared_info32 {
- /* chunk index under dynamic, number of idle threads under static-steal;
- iteration index otherwise */
- volatile kmp_uint32 iteration;
- volatile kmp_uint32 num_done;
- volatile kmp_uint32 ordered_iteration;
- kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
+ /* chunk index under dynamic, number of idle threads under static-steal;
+ iteration index otherwise */
+ volatile kmp_uint32 iteration;
+ volatile kmp_uint32 num_done;
+ volatile kmp_uint32 ordered_iteration;
+ // Dummy to retain the structure size after making ordered_iteration scalar
+ kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
} dispatch_shared_info32_t;
typedef struct dispatch_shared_info64 {
- /* chunk index under dynamic, number of idle threads under static-steal;
- iteration index otherwise */
- volatile kmp_uint64 iteration;
- volatile kmp_uint64 num_done;
- volatile kmp_uint64 ordered_iteration;
- kmp_int64 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
+ /* chunk index under dynamic, number of idle threads under static-steal;
+ iteration index otherwise */
+ volatile kmp_uint64 iteration;
+ volatile kmp_uint64 num_done;
+ volatile kmp_uint64 ordered_iteration;
+ // Dummy to retain the structure size after making ordered_iteration scalar
+ kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
} dispatch_shared_info64_t;
typedef struct dispatch_shared_info {
- union shared_info {
- dispatch_shared_info32_t s32;
- dispatch_shared_info64_t s64;
- } u;
- volatile kmp_uint32 buffer_index;
+ union shared_info {
+ dispatch_shared_info32_t s32;
+ dispatch_shared_info64_t s64;
+ } u;
+ volatile kmp_uint32 buffer_index;
#if OMP_45_ENABLED
- volatile kmp_int32 doacross_buf_idx; // teamwise index
- volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
- kmp_int32 doacross_num_done; // count finished threads
+ volatile kmp_int32 doacross_buf_idx; // teamwise index
+ volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)
+ kmp_int32 doacross_num_done; // count finished threads
#endif
#if KMP_USE_HWLOC
- // When linking with libhwloc, the ORDERED EPCC test slows down on big
- // machines (> 48 cores). Performance analysis showed that a cache thrash
- // was occurring and this padding helps alleviate the problem.
- char padding[64];
+ // When linking with libhwloc, the ORDERED EPCC test slows down on big
+ // machines (> 48 cores). Performance analysis showed that a cache thrash
+ // was occurring and this padding helps alleviate the problem.
+ char padding[64];
#endif
} dispatch_shared_info_t;
typedef struct kmp_disp {
- /* Vector for ORDERED SECTION */
- void (*th_deo_fcn)( int * gtid, int * cid, ident_t *);
- /* Vector for END ORDERED SECTION */
- void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *);
+ /* Vector for ORDERED SECTION */
+ void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);
+ /* Vector for END ORDERED SECTION */
+ void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);
- dispatch_shared_info_t *th_dispatch_sh_current;
- dispatch_private_info_t *th_dispatch_pr_current;
+ dispatch_shared_info_t *th_dispatch_sh_current;
+ dispatch_private_info_t *th_dispatch_pr_current;
- dispatch_private_info_t *th_disp_buffer;
- kmp_int32 th_disp_index;
+ dispatch_private_info_t *th_disp_buffer;
+ kmp_int32 th_disp_index;
#if OMP_45_ENABLED
- kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
- volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
- union { // we can use union here because doacross cannot be used in nonmonotonic loops
- kmp_int64 *th_doacross_info; // info on loop bounds
- kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
- };
+ kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
+ volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
+ union { // we can use union here because doacross cannot be used in
+ // nonmonotonic loops
+ kmp_int64 *th_doacross_info; // info on loop bounds
+ kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
+ };
#else
#if KMP_STATIC_STEAL_ENABLED
- kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
- void* dummy_padding[1]; // make it 64 bytes on Intel(R) 64
+ kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable)
+ void *dummy_padding[1]; // make it 64 bytes on Intel(R) 64
#else
- void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
+ void *dummy_padding[2]; // make it 64 bytes on Intel(R) 64
#endif
#endif
#if KMP_USE_INTERNODE_ALIGNMENT
- char more_padding[INTERNODE_CACHE_LINE];
+ char more_padding[INTERNODE_CACHE_LINE];
#endif
} kmp_disp_t;
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
/* Barrier stuff */
/* constants for barrier state update */
-#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
-#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
-#define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */
-#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
-
-#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
-#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
-#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
+#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */
+#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */
+#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state
+#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */
+
+#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)
+#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)
+#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)
#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
-# error "Barrier sleep bit must be smaller than barrier bump bit"
+#error "Barrier sleep bit must be smaller than barrier bump bit"
#endif
#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
-# error "Barrier unused bit must be smaller than barrier bump bit"
+#error "Barrier unused bit must be smaller than barrier bump bit"
#endif
// Constants for release barrier wait state: currently, hierarchical only
-#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
-#define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release
-#define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release
-#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go
-#define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up
-
-#define KMP_NOT_SAFE_TO_REAP 0 // Thread th_reap_state: not safe to reap (tasking)
-#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
+#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep
+#define KMP_BARRIER_OWN_FLAG \
+ 1 // Normal state; worker waiting on own b_go flag in release
+#define KMP_BARRIER_PARENT_FLAG \
+ 2 // Special state; worker waiting on parent's b_go flag in release
+#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \
+ 3 // Special state; tells worker to shift from parent to own b_go
+#define KMP_BARRIER_SWITCHING \
+ 4 // Special state; worker resets appropriate flag on wake-up
+
+#define KMP_NOT_SAFE_TO_REAP \
+ 0 // Thread th_reap_state: not safe to reap (tasking)
+#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)
enum barrier_type {
- bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */
- bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
- #if KMP_FAST_REDUCTION_BARRIER
- bs_reduction_barrier, /* 2, All barriers that are used in reduction */
- #endif // KMP_FAST_REDUCTION_BARRIER
- bs_last_barrier /* Just a placeholder to mark the end */
+ bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction
+ barriers if enabled) */
+ bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */
+#if KMP_FAST_REDUCTION_BARRIER
+ bs_reduction_barrier, /* 2, All barriers that are used in reduction */
+#endif // KMP_FAST_REDUCTION_BARRIER
+ bs_last_barrier /* Just a placeholder to mark the end */
};
// to work with reduction barriers just like with plain barriers
#if !KMP_FAST_REDUCTION_BARRIER
- #define bs_reduction_barrier bs_plain_barrier
+#define bs_reduction_barrier bs_plain_barrier
#endif // KMP_FAST_REDUCTION_BARRIER
-typedef enum kmp_bar_pat { /* Barrier communication patterns */
- bp_linear_bar = 0, /* Single level (degenerate) tree */
- bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */
- bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */
- bp_hierarchical_bar = 3, /* Machine hierarchy tree */
- bp_last_bar = 4 /* Placeholder to mark the end */
+typedef enum kmp_bar_pat { /* Barrier communication patterns */
+ bp_linear_bar =
+ 0, /* Single level (degenerate) tree */
+ bp_tree_bar =
+ 1, /* Balanced tree with branching factor 2^n */
+ bp_hyper_bar =
+ 2, /* Hypercube-embedded tree with min branching
+ factor 2^n */
+ bp_hierarchical_bar = 3, /* Machine hierarchy tree */
+ bp_last_bar = 4 /* Placeholder to mark the end */
} kmp_bar_pat_e;
-# define KMP_BARRIER_ICV_PUSH 1
+#define KMP_BARRIER_ICV_PUSH 1
/* Record for holding the values of the internal controls stack records */
typedef struct kmp_internal_control {
- int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */
- kmp_int8 nested; /* internal control for nested parallelism (per thread) */
- kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */
- kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */
- int blocktime; /* internal control for blocktime */
+ int serial_nesting_level; /* corresponds to the value of the
+ th_team_serialized field */
+ kmp_int8 nested; /* internal control for nested parallelism (per thread) */
+ kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per
+ thread) */
+ kmp_int8
+ bt_set; /* internal control for whether blocktime is explicitly set */
+ int blocktime; /* internal control for blocktime */
#if KMP_USE_MONITOR
- int bt_intervals; /* internal control for blocktime intervals */
+ int bt_intervals; /* internal control for blocktime intervals */
#endif
- int nproc; /* internal control for #threads for next parallel region (per thread) */
- int max_active_levels; /* internal control for max_active_levels */
- kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */
+ int nproc; /* internal control for #threads for next parallel region (per
+ thread) */
+ int max_active_levels; /* internal control for max_active_levels */
+ kmp_r_sched_t
+ sched; /* internal control for runtime schedule {sched,chunk} pair */
#if OMP_40_ENABLED
- kmp_proc_bind_t proc_bind; /* internal control for affinity */
- kmp_int32 default_device; /* internal control for default device */
+ kmp_proc_bind_t proc_bind; /* internal control for affinity */
+ kmp_int32 default_device; /* internal control for default device */
#endif // OMP_40_ENABLED
- struct kmp_internal_control *next;
+ struct kmp_internal_control *next;
} kmp_internal_control_t;
-static inline void
-copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
- *dst = *src;
+static inline void copy_icvs(kmp_internal_control_t *dst,
+ kmp_internal_control_t *src) {
+ *dst = *src;
}
/* Thread barrier needs volatile barrier fields */
typedef struct KMP_ALIGN_CACHE kmp_bstate {
- // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it).
- // It is not explicitly aligned below, because we *don't* want it to be padded -- instead,
- // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines
- // stores in the hierarchical barrier.
- kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
- // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store
- volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
- KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point.
- kmp_uint32 *skip_per_level;
- kmp_uint32 my_level;
- kmp_int32 parent_tid;
- kmp_int32 old_tid;
- kmp_uint32 depth;
- struct kmp_bstate *parent_bar;
- kmp_team_t *team;
- kmp_uint64 leaf_state;
- kmp_uint32 nproc;
- kmp_uint8 base_leaf_kids;
- kmp_uint8 leaf_kids;
- kmp_uint8 offset;
- kmp_uint8 wait_flag;
- kmp_uint8 use_oncore_barrier;
+ // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all
+ // uses of it). It is not explicitly aligned below, because we *don't* want
+ // it to be padded -- instead, we fit b_go into the same cache line with
+ // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.
+ kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread
+ // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with
+ // same NGO store
+ volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)
+ KMP_ALIGN_CACHE volatile kmp_uint64
+ b_arrived; // STATE => task reached synch point.
+ kmp_uint32 *skip_per_level;
+ kmp_uint32 my_level;
+ kmp_int32 parent_tid;
+ kmp_int32 old_tid;
+ kmp_uint32 depth;
+ struct kmp_bstate *parent_bar;
+ kmp_team_t *team;
+ kmp_uint64 leaf_state;
+ kmp_uint32 nproc;
+ kmp_uint8 base_leaf_kids;
+ kmp_uint8 leaf_kids;
+ kmp_uint8 offset;
+ kmp_uint8 wait_flag;
+ kmp_uint8 use_oncore_barrier;
#if USE_DEBUGGER
- // The following field is intended for the debugger solely. Only the worker thread itself accesses this
- // field: the worker increases it by 1 when it arrives to a barrier.
- KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
+ // The following field is intended for the debugger solely. Only the worker
+ // thread itself accesses this field: the worker increases it by 1 when it
+ // arrives to a barrier.
+ KMP_ALIGN_CACHE kmp_uint b_worker_arrived;
#endif /* USE_DEBUGGER */
} kmp_bstate_t;
union KMP_ALIGN_CACHE kmp_barrier_union {
- double b_align; /* use worst case alignment */
- char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ];
- kmp_bstate_t bb;
+ double b_align; /* use worst case alignment */
+ char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];
+ kmp_bstate_t bb;
};
typedef union kmp_barrier_union kmp_balign_t;
/* Team barrier needs only non-volatile arrived counter */
union KMP_ALIGN_CACHE kmp_barrier_team_union {
- double b_align; /* use worst case alignment */
- char b_pad[ CACHE_LINE ];
- struct {
- kmp_uint64 b_arrived; /* STATE => task reached synch point. */
+ double b_align; /* use worst case alignment */
+ char b_pad[CACHE_LINE];
+ struct {
+ kmp_uint64 b_arrived; /* STATE => task reached synch point. */
#if USE_DEBUGGER
- // The following two fields are indended for the debugger solely. Only master of the team accesses
- // these fields: the first one is increased by 1 when master arrives to a barrier, the
- // second one is increased by one when all the threads arrived.
- kmp_uint b_master_arrived;
- kmp_uint b_team_arrived;
+ // The following two fields are indended for the debugger solely. Only
+ // master of the team accesses these fields: the first one is increased by
+ // 1 when master arrives to a barrier, the second one is increased by one
+ // when all the threads arrived.
+ kmp_uint b_master_arrived;
+ kmp_uint b_team_arrived;
#endif
- };
+ };
};
typedef union kmp_barrier_team_union kmp_balign_team_t;
-/*
- * Padding for Linux* OS pthreads condition variables and mutexes used to signal
- * threads when a condition changes. This is to workaround an NPTL bug
- * where padding was added to pthread_cond_t which caused the initialization
- * routine to write outside of the structure if compiled on pre-NPTL threads.
- */
-
+/* Padding for Linux* OS pthreads condition variables and mutexes used to signal
+ threads when a condition changes. This is to workaround an NPTL bug where
+ padding was added to pthread_cond_t which caused the initialization routine
+ to write outside of the structure if compiled on pre-NPTL threads. */
#if KMP_OS_WINDOWS
-typedef struct kmp_win32_mutex
-{
- /* The Lock */
- CRITICAL_SECTION cs;
+typedef struct kmp_win32_mutex {
+ /* The Lock */
+ CRITICAL_SECTION cs;
} kmp_win32_mutex_t;
-typedef struct kmp_win32_cond
-{
- /* Count of the number of waiters. */
- int waiters_count_;
-
- /* Serialize access to <waiters_count_> */
- kmp_win32_mutex_t waiters_count_lock_;
-
- /* Number of threads to release via a <cond_broadcast> or a */
- /* <cond_signal> */
- int release_count_;
-
- /* Keeps track of the current "generation" so that we don't allow */
- /* one thread to steal all the "releases" from the broadcast. */
- int wait_generation_count_;
-
- /* A manual-reset event that's used to block and release waiting */
- /* threads. */
- HANDLE event_;
+typedef struct kmp_win32_cond {
+ /* Count of the number of waiters. */
+ int waiters_count_;
+
+ /* Serialize access to <waiters_count_> */
+ kmp_win32_mutex_t waiters_count_lock_;
+
+ /* Number of threads to release via a <cond_broadcast> or a <cond_signal> */
+ int release_count_;
+
+ /* Keeps track of the current "generation" so that we don't allow */
+ /* one thread to steal all the "releases" from the broadcast. */
+ int wait_generation_count_;
+
+ /* A manual-reset event that's used to block and release waiting threads. */
+ HANDLE event_;
} kmp_win32_cond_t;
#endif
#if KMP_OS_UNIX
union KMP_ALIGN_CACHE kmp_cond_union {
- double c_align;
- char c_pad[ CACHE_LINE ];
- pthread_cond_t c_cond;
+ double c_align;
+ char c_pad[CACHE_LINE];
+ pthread_cond_t c_cond;
};
typedef union kmp_cond_union kmp_cond_align_t;
union KMP_ALIGN_CACHE kmp_mutex_union {
- double m_align;
- char m_pad[ CACHE_LINE ];
- pthread_mutex_t m_mutex;
+ double m_align;
+ char m_pad[CACHE_LINE];
+ pthread_mutex_t m_mutex;
};
typedef union kmp_mutex_union kmp_mutex_align_t;
@@ -1816,145 +1908,159 @@ typedef union kmp_mutex_union kmp_mutex_
#endif /* KMP_OS_UNIX */
typedef struct kmp_desc_base {
- void *ds_stackbase;
- size_t ds_stacksize;
- int ds_stackgrow;
- kmp_thread_t ds_thread;
- volatile int ds_tid;
- int ds_gtid;
+ void *ds_stackbase;
+ size_t ds_stacksize;
+ int ds_stackgrow;
+ kmp_thread_t ds_thread;
+ volatile int ds_tid;
+ int ds_gtid;
#if KMP_OS_WINDOWS
- volatile int ds_alive;
- DWORD ds_thread_id;
- /*
- ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However,
- debugger support (libomp_db) cannot work with handles, because they uncomparable. For
- example, debugger requests info about thread with handle h. h is valid within debugger
- process, and meaningless within debugee process. Even if h is duped by call to
- DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new*
- handle which does *not* equal to any other handle in debugee... The only way to
- compare handles is convert them to system-wide ids. GetThreadId() function is
- available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is
- available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by
- call to GetCurrentThreadId() from within the thread and save it to let libomp_db
- identify threads.
- */
+ volatile int ds_alive;
+ DWORD ds_thread_id;
+/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.
+ However, debugger support (libomp_db) cannot work with handles, because they
+ uncomparable. For example, debugger requests info about thread with handle h.
+ h is valid within debugger process, and meaningless within debugee process.
+ Even if h is duped by call to DuplicateHandle(), so the result h' is valid
+ within debugee process, but it is a *new* handle which does *not* equal to
+ any other handle in debugee... The only way to compare handles is convert
+ them to system-wide ids. GetThreadId() function is available only in
+ Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available
+ on all Windows* OS flavours (including Windows* 95). Thus, we have to get
+ thread id by call to GetCurrentThreadId() from within the thread and save it
+ to let libomp_db identify threads. */
#endif /* KMP_OS_WINDOWS */
} kmp_desc_base_t;
typedef union KMP_ALIGN_CACHE kmp_desc {
- double ds_align; /* use worst case alignment */
- char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ];
- kmp_desc_base_t ds;
+ double ds_align; /* use worst case alignment */
+ char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];
+ kmp_desc_base_t ds;
} kmp_desc_t;
-
typedef struct kmp_local {
- volatile int this_construct; /* count of single's encountered by thread */
- void *reduce_data;
+ volatile int this_construct; /* count of single's encountered by thread */
+ void *reduce_data;
#if KMP_USE_BGET
- void *bget_data;
- void *bget_list;
-#if ! USE_CMP_XCHG_FOR_BGET
+ void *bget_data;
+ void *bget_list;
+#if !USE_CMP_XCHG_FOR_BGET
#ifdef USE_QUEUING_LOCK_FOR_BGET
- kmp_lock_t bget_lock; /* Lock for accessing bget free list */
+ kmp_lock_t bget_lock; /* Lock for accessing bget free list */
#else
- kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */
- /* Must be bootstrap lock so we can use it at library shutdown */
+ kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be
+// bootstrap lock so we can use it at library
+// shutdown.
#endif /* USE_LOCK_FOR_BGET */
#endif /* ! USE_CMP_XCHG_FOR_BGET */
#endif /* KMP_USE_BGET */
#ifdef BUILD_TV
- struct tv_data *tv_data;
+ struct tv_data *tv_data;
#endif
- PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */
+ PACKED_REDUCTION_METHOD_T
+ packed_reduction_method; /* stored by __kmpc_reduce*(), used by
+ __kmpc_end_reduce*() */
} kmp_local_t;
-#define KMP_CHECK_UPDATE(a, b) if ((a) != (b)) (a) = (b)
-#define KMP_CHECK_UPDATE_SYNC(a, b) if ((a) != (b)) TCW_SYNC_PTR((a), (b))
-
-#define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
-#define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
+#define KMP_CHECK_UPDATE(a, b) \
+ if ((a) != (b)) \
+ (a) = (b)
+#define KMP_CHECK_UPDATE_SYNC(a, b) \
+ if ((a) != (b)) \
+ TCW_SYNC_PTR((a), (b))
+
+#define get__blocktime(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
+#define get__bt_set(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
#if KMP_USE_MONITOR
-#define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
+#define get__bt_intervals(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
#endif
-#define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
-#define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
-#define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
-#define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
-
-#define set__blocktime_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) )
+#define get__nested_2(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
+#define get__dynamic_2(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
+#define get__nproc_2(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
+#define get__sched_2(xteam, xtid) \
+ ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
+
+#define set__blocktime_team(xteam, xtid, xval) \
+ (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \
+ (xval))
#if KMP_USE_MONITOR
-#define set__bt_intervals_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
+#define set__bt_intervals_team(xteam, xtid, xval) \
+ (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \
+ (xval))
#endif
-#define set__bt_set_team( xteam, xtid, xval ) \
- ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) )
-
+#define set__bt_set_team(xteam, xtid, xval) \
+ (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))
-#define set__nested( xthread, xval ) \
- ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) )
-#define get__nested( xthread ) \
- ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) )
+#define set__nested(xthread, xval) \
+ (((xthread)->th.th_current_task->td_icvs.nested) = (xval))
+#define get__nested(xthread) \
+ (((xthread)->th.th_current_task->td_icvs.nested) ? (FTN_TRUE) : (FTN_FALSE))
-#define set__dynamic( xthread, xval ) \
- ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) )
-#define get__dynamic( xthread ) \
- ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) )
+#define set__dynamic(xthread, xval) \
+ (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))
+#define get__dynamic(xthread) \
+ (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))
-#define set__nproc( xthread, xval ) \
- ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) )
+#define set__nproc(xthread, xval) \
+ (((xthread)->th.th_current_task->td_icvs.nproc) = (xval))
-#define set__max_active_levels( xthread, xval ) \
- ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) )
+#define set__max_active_levels(xthread, xval) \
+ (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))
-#define set__sched( xthread, xval ) \
- ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) )
+#define set__sched(xthread, xval) \
+ (((xthread)->th.th_current_task->td_icvs.sched) = (xval))
#if OMP_40_ENABLED
-#define set__proc_bind( xthread, xval ) \
- ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) )
-#define get__proc_bind( xthread ) \
- ( (xthread)->th.th_current_task->td_icvs.proc_bind )
+#define set__proc_bind(xthread, xval) \
+ (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))
+#define get__proc_bind(xthread) \
+ ((xthread)->th.th_current_task->td_icvs.proc_bind)
#endif /* OMP_40_ENABLED */
-
-/* ------------------------------------------------------------------------ */
// OpenMP tasking data structures
-//
typedef enum kmp_tasking_mode {
- tskm_immediate_exec = 0,
- tskm_extra_barrier = 1,
- tskm_task_teams = 2,
- tskm_max = 2
+ tskm_immediate_exec = 0,
+ tskm_extra_barrier = 1,
+ tskm_task_teams = 2,
+ tskm_max = 2
} kmp_tasking_mode_t;
-extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */
+extern kmp_tasking_mode_t
+ __kmp_tasking_mode; /* determines how/when to execute tasks */
extern kmp_int32 __kmp_task_stealing_constraint;
#if OMP_40_ENABLED
- extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if specified, defaults to 0 otherwise
+extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
+// specified, defaults to 0 otherwise
#endif
#if OMP_45_ENABLED
- extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise
+extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if
+// specified, defaults to 0 otherwise
#endif
-/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */
-#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1)
-#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1)
-
-// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
-// queued since the previous barrier release.
-#define KMP_TASKING_ENABLED(task_team) \
- (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
+/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with
+ taskdata first */
+#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)
+#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)
+
+// The tt_found_tasks flag is a signal to all threads in the team that tasks
+// were spawned and queued since the previous barrier release.
+#define KMP_TASKING_ENABLED(task_team) \
+ (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE)
/*!
@ingroup BASIC_TYPES
@{
@@ -1962,33 +2068,37 @@ extern kmp_int32 __kmp_task_stealing_con
/*!
*/
-typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * );
+typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);
#if OMP_40_ENABLED || OMP_45_ENABLED
typedef union kmp_cmplrdata {
#if OMP_45_ENABLED
- kmp_int32 priority; /**< priority specified by user for the task */
+ kmp_int32 priority; /**< priority specified by user for the task */
#endif // OMP_45_ENABLED
#if OMP_40_ENABLED
- kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */
+ kmp_routine_entry_t
+ destructors; /* pointer to function to invoke deconstructors of
+ firstprivate C++ objects */
#endif // OMP_40_ENABLED
- /* future data */
+ /* future data */
} kmp_cmplrdata_t;
#endif
/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */
/*!
*/
-typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
- void * shareds; /**< pointer to block of pointers to shared vars */
- kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */
- kmp_int32 part_id; /**< part id for the task */
+typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */
+ void *shareds; /**< pointer to block of pointers to shared vars */
+ kmp_routine_entry_t
+ routine; /**< pointer to routine to call for executing task */
+ kmp_int32 part_id; /**< part id for the task */
#if OMP_40_ENABLED || OMP_45_ENABLED
- kmp_cmplrdata_t data1; /* Two known optional additions: destructors and priority */
- kmp_cmplrdata_t data2; /* Process destructors first, priority second */
- /* future data */
+ kmp_cmplrdata_t
+ data1; /* Two known optional additions: destructors and priority */
+ kmp_cmplrdata_t data2; /* Process destructors first, priority second */
+/* future data */
#endif
- /* private vars */
+ /* private vars */
} kmp_task_t;
/*!
@@ -1997,69 +2107,69 @@ typedef struct kmp_task {
#if OMP_40_ENABLED
typedef struct kmp_taskgroup {
- kmp_uint32 count; // number of allocated and not yet complete tasks
- kmp_int32 cancel_request; // request for cancellation of this taskgroup
- struct kmp_taskgroup *parent; // parent taskgroup
+ kmp_uint32 count; // number of allocated and not yet complete tasks
+ kmp_int32 cancel_request; // request for cancellation of this taskgroup
+ struct kmp_taskgroup *parent; // parent taskgroup
// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
#if OMP_45_ENABLED
- // Block of data to perform task reduction
- void *reduce_data; // reduction related info
- kmp_int32 reduce_num_data; // number of data items to reduce
+ // Block of data to perform task reduction
+ void *reduce_data; // reduction related info
+ kmp_int32 reduce_num_data; // number of data items to reduce
#endif
} kmp_taskgroup_t;
// forward declarations
-typedef union kmp_depnode kmp_depnode_t;
-typedef struct kmp_depnode_list kmp_depnode_list_t;
+typedef union kmp_depnode kmp_depnode_t;
+typedef struct kmp_depnode_list kmp_depnode_list_t;
typedef struct kmp_dephash_entry kmp_dephash_entry_t;
typedef struct kmp_depend_info {
- kmp_intptr_t base_addr;
- size_t len;
- struct {
- bool in:1;
- bool out:1;
- } flags;
+ kmp_intptr_t base_addr;
+ size_t len;
+ struct {
+ bool in : 1;
+ bool out : 1;
+ } flags;
} kmp_depend_info_t;
struct kmp_depnode_list {
- kmp_depnode_t * node;
- kmp_depnode_list_t * next;
+ kmp_depnode_t *node;
+ kmp_depnode_list_t *next;
};
typedef struct kmp_base_depnode {
- kmp_depnode_list_t * successors;
- kmp_task_t * task;
+ kmp_depnode_list_t *successors;
+ kmp_task_t *task;
- kmp_lock_t lock;
+ kmp_lock_t lock;
#if KMP_SUPPORT_GRAPH_OUTPUT
- kmp_uint32 id;
+ kmp_uint32 id;
#endif
- volatile kmp_int32 npredecessors;
- volatile kmp_int32 nrefs;
+ volatile kmp_int32 npredecessors;
+ volatile kmp_int32 nrefs;
} kmp_base_depnode_t;
union KMP_ALIGN_CACHE kmp_depnode {
- double dn_align; /* use worst case alignment */
- char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ];
- kmp_base_depnode_t dn;
+ double dn_align; /* use worst case alignment */
+ char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];
+ kmp_base_depnode_t dn;
};
struct kmp_dephash_entry {
- kmp_intptr_t addr;
- kmp_depnode_t * last_out;
- kmp_depnode_list_t * last_ins;
- kmp_dephash_entry_t * next_in_bucket;
+ kmp_intptr_t addr;
+ kmp_depnode_t *last_out;
+ kmp_depnode_list_t *last_ins;
+ kmp_dephash_entry_t *next_in_bucket;
};
typedef struct kmp_dephash {
- kmp_dephash_entry_t ** buckets;
- size_t size;
+ kmp_dephash_entry_t **buckets;
+ size_t size;
#ifdef KMP_DEBUG
- kmp_uint32 nelements;
- kmp_uint32 nconflicts;
+ kmp_uint32 nelements;
+ kmp_uint32 nconflicts;
#endif
} kmp_dephash_t;
@@ -2069,556 +2179,583 @@ typedef struct kmp_dephash {
/* Tied Task stack definitions */
typedef struct kmp_stack_block {
- kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ];
- struct kmp_stack_block * sb_next;
- struct kmp_stack_block * sb_prev;
+ kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];
+ struct kmp_stack_block *sb_next;
+ struct kmp_stack_block *sb_prev;
} kmp_stack_block_t;
typedef struct kmp_task_stack {
- kmp_stack_block_t ts_first_block; // first block of stack entries
- kmp_taskdata_t ** ts_top; // pointer to the top of stack
- kmp_int32 ts_entries; // number of entries on the stack
+ kmp_stack_block_t ts_first_block; // first block of stack entries
+ kmp_taskdata_t **ts_top; // pointer to the top of stack
+ kmp_int32 ts_entries; // number of entries on the stack
} kmp_task_stack_t;
#endif // BUILD_TIED_TASK_STACK
-typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
- /* Compiler flags */ /* Total compiler flags must be 16 bits */
- unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
- unsigned final : 1; /* task is final(1) so execute immediately */
- unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
+typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
+ /* Compiler flags */ /* Total compiler flags must be 16 bits */
+ unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
+ unsigned final : 1; /* task is final(1) so execute immediately */
+ unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
+ code path */
#if OMP_40_ENABLED
- unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */
+ unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
+ invoke destructors from the runtime */
#if OMP_45_ENABLED
- unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */
- unsigned priority_specified :1; /* set if the compiler provides priority setting for the task */
- unsigned reserved : 10; /* reserved for compiler use */
+ unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
+ context of the RTL) */
+ unsigned priority_specified : 1; /* set if the compiler provides priority
+ setting for the task */
+ unsigned reserved : 10; /* reserved for compiler use */
#else
- unsigned reserved : 12; /* reserved for compiler use */
+ unsigned reserved : 12; /* reserved for compiler use */
#endif
#else // OMP_40_ENABLED
- unsigned reserved : 13; /* reserved for compiler use */
+ unsigned reserved : 13; /* reserved for compiler use */
#endif // OMP_40_ENABLED
- /* Library flags */ /* Total library flags must be 16 bits */
- unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
- unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */
- unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */
- unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */
- /* If either team_serial or tasking_ser is set, task team may be NULL */
- /* Task State Flags: */
- unsigned started : 1; /* 1==started, 0==not started */
- unsigned executing : 1; /* 1==executing, 0==not executing */
- unsigned complete : 1; /* 1==complete, 0==not complete */
- unsigned freed : 1; /* 1==freed, 0==allocateed */
- unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
- unsigned reserved31 : 7; /* reserved for library use */
+ /* Library flags */ /* Total library flags must be 16 bits */
+ unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
+ unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
+ unsigned tasking_ser : 1; // all tasks in team are either executed immediately
+ // (1) or may be deferred (0)
+ unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
+ // (0) [>= 2 threads]
+ /* If either team_serial or tasking_ser is set, task team may be NULL */
+ /* Task State Flags: */
+ unsigned started : 1; /* 1==started, 0==not started */
+ unsigned executing : 1; /* 1==executing, 0==not executing */
+ unsigned complete : 1; /* 1==complete, 0==not complete */
+ unsigned freed : 1; /* 1==freed, 0==allocateed */
+ unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
+ unsigned reserved31 : 7; /* reserved for library use */
} kmp_tasking_flags_t;
-
-struct kmp_taskdata { /* aligned during dynamic allocation */
- kmp_int32 td_task_id; /* id, assigned by debugger */
- kmp_tasking_flags_t td_flags; /* task flags */
- kmp_team_t * td_team; /* team for this task */
- kmp_info_p * td_alloc_thread; /* thread that allocated data structures */
- /* Currently not used except for perhaps IDB */
- kmp_taskdata_t * td_parent; /* parent task */
- kmp_int32 td_level; /* task nesting level */
- kmp_int32 td_untied_count; /* untied task active parts counter */
- ident_t * td_ident; /* task identifier */
- // Taskwait data.
- ident_t * td_taskwait_ident;
- kmp_uint32 td_taskwait_counter;
- kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
- KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */
- KMP_ALIGN_CACHE volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */
- volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */
-#if OMP_40_ENABLED
- kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup
- kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here
- kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies
+struct kmp_taskdata { /* aligned during dynamic allocation */
+ kmp_int32 td_task_id; /* id, assigned by debugger */
+ kmp_tasking_flags_t td_flags; /* task flags */
+ kmp_team_t *td_team; /* team for this task */
+ kmp_info_p *td_alloc_thread; /* thread that allocated data structures */
+ /* Currently not used except for perhaps IDB */
+ kmp_taskdata_t *td_parent; /* parent task */
+ kmp_int32 td_level; /* task nesting level */
+ kmp_int32 td_untied_count; /* untied task active parts counter */
+ ident_t *td_ident; /* task identifier */
+ // Taskwait data.
+ ident_t *td_taskwait_ident;
+ kmp_uint32 td_taskwait_counter;
+ kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */
+ KMP_ALIGN_CACHE kmp_internal_control_t
+ td_icvs; /* Internal control variables for the task */
+ KMP_ALIGN_CACHE volatile kmp_uint32
+ td_allocated_child_tasks; /* Child tasks (+ current task) not yet
+ deallocated */
+ volatile kmp_uint32
+ td_incomplete_child_tasks; /* Child tasks not yet complete */
+#if OMP_40_ENABLED
+ kmp_taskgroup_t
+ *td_taskgroup; // Each task keeps pointer to its current taskgroup
+ kmp_dephash_t
+ *td_dephash; // Dependencies for children tasks are tracked from here
+ kmp_depnode_t
+ *td_depnode; // Pointer to graph node if this task has dependencies
#endif
#if OMPT_SUPPORT
- ompt_task_info_t ompt_task_info;
+ ompt_task_info_t ompt_task_info;
#endif
#if OMP_45_ENABLED
- kmp_task_team_t * td_task_team;
- kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
+ kmp_task_team_t *td_task_team;
+ kmp_int32 td_size_alloc; // The size of task structure, including shareds etc.
#endif
}; // struct kmp_taskdata
// Make sure padding above worked
-KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 );
+KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);
// Data for task team but per thread
typedef struct kmp_base_thread_data {
- kmp_info_p * td_thr; // Pointer back to thread info
- // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued?
- kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
- kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
- kmp_int32 td_deque_size; // Size of deck
- kmp_uint32 td_deque_head; // Head of deque (will wrap)
- kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
- kmp_int32 td_deque_ntasks; // Number of tasks in deque
- // GEH: shouldn't this be volatile since used in while-spin?
- kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
+ kmp_info_p *td_thr; // Pointer back to thread info
+ // Used only in __kmp_execute_tasks_template, maybe not avail until task is
+ // queued?
+ kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque
+ kmp_taskdata_t *
+ *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated
+ kmp_int32 td_deque_size; // Size of deck
+ kmp_uint32 td_deque_head; // Head of deque (will wrap)
+ kmp_uint32 td_deque_tail; // Tail of deque (will wrap)
+ kmp_int32 td_deque_ntasks; // Number of tasks in deque
+ // GEH: shouldn't this be volatile since used in while-spin?
+ kmp_int32 td_deque_last_stolen; // Thread number of last successful steal
#ifdef BUILD_TIED_TASK_STACK
- kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint
+ kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task
+// scheduling constraint
#endif // BUILD_TIED_TASK_STACK
} kmp_base_thread_data_t;
-#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
-#define INITIAL_TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS )
+#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE
+#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)
-#define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
-#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
+#define TASK_DEQUE_SIZE(td) ((td).td_deque_size)
+#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)
typedef union KMP_ALIGN_CACHE kmp_thread_data {
- kmp_base_thread_data_t td;
- double td_align; /* use worst case alignment */
- char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ];
+ kmp_base_thread_data_t td;
+ double td_align; /* use worst case alignment */
+ char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];
} kmp_thread_data_t;
-
// Data for task teams which are used when tasking is enabled for the team
typedef struct kmp_base_task_team {
- kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */
- /* must be bootstrap lock since used at library shutdown*/
- kmp_task_team_t * tt_next; /* For linking the task team free list */
- kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */
- /* Data survives task team deallocation */
- kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */
- /* TRUE means tt_threads_data is set up and initialized */
- kmp_int32 tt_nproc; /* #threads in team */
- kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */
+ kmp_bootstrap_lock_t
+ tt_threads_lock; /* Lock used to allocate per-thread part of task team */
+ /* must be bootstrap lock since used at library shutdown*/
+ kmp_task_team_t *tt_next; /* For linking the task team free list */
+ kmp_thread_data_t
+ *tt_threads_data; /* Array of per-thread structures for task team */
+ /* Data survives task team deallocation */
+ kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while
+ executing this team? */
+ /* TRUE means tt_threads_data is set up and initialized */
+ kmp_int32 tt_nproc; /* #threads in team */
+ kmp_int32
+ tt_max_threads; /* number of entries allocated for threads_data array */
#if OMP_45_ENABLED
- kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */
+ kmp_int32
+ tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */
#endif
- KMP_ALIGN_CACHE
- volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */
+ KMP_ALIGN_CACHE
+ volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */
- KMP_ALIGN_CACHE
- volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */
+ KMP_ALIGN_CACHE
+ volatile kmp_uint32
+ tt_active; /* is the team still actively executing tasks */
} kmp_base_task_team_t;
union KMP_ALIGN_CACHE kmp_task_team {
- kmp_base_task_team_t tt;
- double tt_align; /* use worst case alignment */
- char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ];
+ kmp_base_task_team_t tt;
+ double tt_align; /* use worst case alignment */
+ char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];
};
-#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
-// Free lists keep same-size free memory slots for fast memory allocation routines
+#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
+// Free lists keep same-size free memory slots for fast memory allocation
+// routines
typedef struct kmp_free_list {
- void *th_free_list_self; // Self-allocated tasks free list
- void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads
- void *th_free_list_other; // Non-self free list (to be returned to owner's sync list)
+ void *th_free_list_self; // Self-allocated tasks free list
+ void *th_free_list_sync; // Self-allocated tasks stolen/returned by other
+ // threads
+ void *th_free_list_other; // Non-self free list (to be returned to owner's
+ // sync list)
} kmp_free_list_t;
#endif
#if KMP_NESTED_HOT_TEAMS
-// Hot teams array keeps hot teams and their sizes for given thread.
-// Hot teams are not put in teams pool, and they don't put threads in threads pool.
+// Hot teams array keeps hot teams and their sizes for given thread. Hot teams
+// are not put in teams pool, and they don't put threads in threads pool.
typedef struct kmp_hot_team_ptr {
- kmp_team_p *hot_team; // pointer to hot_team of given nesting level
- kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
+ kmp_team_p *hot_team; // pointer to hot_team of given nesting level
+ kmp_int32 hot_team_nth; // number of threads allocated for the hot_team
} kmp_hot_team_ptr_t;
#endif
#if OMP_40_ENABLED
typedef struct kmp_teams_size {
- kmp_int32 nteams; // number of teams in a league
- kmp_int32 nth; // number of threads in each team of the league
+ kmp_int32 nteams; // number of teams in a league
+ kmp_int32 nth; // number of threads in each team of the league
} kmp_teams_size_t;
#endif
-/* ------------------------------------------------------------------------ */
// OpenMP thread data structures
-//
typedef struct KMP_ALIGN_CACHE kmp_base_info {
-/*
- * Start with the readonly data which is cache aligned and padded.
- * this is written before the thread starts working by the master.
- * (uber masters may update themselves later)
- * (usage does not consider serialized regions)
- */
- kmp_desc_t th_info;
- kmp_team_p *th_team; /* team we belong to */
- kmp_root_p *th_root; /* pointer to root of task hierarchy */
- kmp_info_p *th_next_pool; /* next available thread in the pool */
- kmp_disp_t *th_dispatch; /* thread's dispatch data */
- int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
-
- /* The following are cached from the team info structure */
- /* TODO use these in more places as determined to be needed via profiling */
- int th_team_nproc; /* number of threads in a team */
- kmp_info_p *th_team_master; /* the team's master thread */
- int th_team_serialized; /* team is serialized */
-#if OMP_40_ENABLED
- microtask_t th_teams_microtask; /* save entry address for teams construct */
- int th_teams_level; /* save initial level of teams construct */
- /* it is 0 on device but may be any on host */
-#endif
-
- /* The blocktime info is copied from the team struct to the thread sruct */
- /* at the start of a barrier, and the values stored in the team are used */
- /* at points in the code where the team struct is no longer guaranteed */
- /* to exist (from the POV of worker threads). */
+ /* Start with the readonly data which is cache aligned and padded. This is
+ written before the thread starts working by the master. Uber masters may
+ update themselves later. Usage does not consider serialized regions. */
+ kmp_desc_t th_info;
+ kmp_team_p *th_team; /* team we belong to */
+ kmp_root_p *th_root; /* pointer to root of task hierarchy */
+ kmp_info_p *th_next_pool; /* next available thread in the pool */
+ kmp_disp_t *th_dispatch; /* thread's dispatch data */
+ int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */
+
+ /* The following are cached from the team info structure */
+ /* TODO use these in more places as determined to be needed via profiling */
+ int th_team_nproc; /* number of threads in a team */
+ kmp_info_p *th_team_master; /* the team's master thread */
+ int th_team_serialized; /* team is serialized */
+#if OMP_40_ENABLED
+ microtask_t th_teams_microtask; /* save entry address for teams construct */
+ int th_teams_level; /* save initial level of teams construct */
+/* it is 0 on device but may be any on host */
+#endif
+
+/* The blocktime info is copied from the team struct to the thread sruct */
+/* at the start of a barrier, and the values stored in the team are used */
+/* at points in the code where the team struct is no longer guaranteed */
+/* to exist (from the POV of worker threads). */
#if KMP_USE_MONITOR
- int th_team_bt_intervals;
- int th_team_bt_set;
+ int th_team_bt_intervals;
+ int th_team_bt_set;
#else
- kmp_uint64 th_team_bt_intervals;
+ kmp_uint64 th_team_bt_intervals;
#endif
#if KMP_AFFINITY_SUPPORTED
- kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
+ kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
#endif
-/*
- * The data set by the master at reinit, then R/W by the worker
- */
- KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */
+ /* The data set by the master at reinit, then R/W by the worker */
+ KMP_ALIGN_CACHE int
+ th_set_nproc; /* if > 0, then only use this request for the next fork */
#if KMP_NESTED_HOT_TEAMS
- kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
+ kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
#endif
#if OMP_40_ENABLED
- kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
- kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */
-# if KMP_AFFINITY_SUPPORTED
- int th_current_place; /* place currently bound to */
- int th_new_place; /* place to bind to in par reg */
- int th_first_place; /* first place in partition */
- int th_last_place; /* last place in partition */
-# endif
+ kmp_proc_bind_t
+ th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
+ kmp_teams_size_t
+ th_teams_size; /* number of teams/threads in teams construct */
+#if KMP_AFFINITY_SUPPORTED
+ int th_current_place; /* place currently bound to */
+ int th_new_place; /* place to bind to in par reg */
+ int th_first_place; /* first place in partition */
+ int th_last_place; /* last place in partition */
+#endif
#endif
#if USE_ITT_BUILD
- kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
- kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
- kmp_uint64 th_frame_time; /* frame timestamp */
+ kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
+ kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
+ kmp_uint64 th_frame_time; /* frame timestamp */
#endif /* USE_ITT_BUILD */
- kmp_local_t th_local;
- struct private_common *th_pri_head;
+ kmp_local_t th_local;
+ struct private_common *th_pri_head;
-/*
- * Now the data only used by the worker (after initial allocation)
- */
- /* TODO the first serial team should actually be stored in the info_t
- * structure. this will help reduce initial allocation overhead */
- KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/
+ /* Now the data only used by the worker (after initial allocation) */
+ /* TODO the first serial team should actually be stored in the info_t
+ structure. this will help reduce initial allocation overhead */
+ KMP_ALIGN_CACHE kmp_team_p
+ *th_serial_team; /*serialized team held in reserve*/
#if OMPT_SUPPORT
- ompt_thread_info_t ompt_thread_info;
+ ompt_thread_info_t ompt_thread_info;
#endif
-/* The following are also read by the master during reinit */
- struct common_table *th_pri_common;
-
- volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
- /* while awaiting queuing lock acquire */
-
- volatile void *th_sleep_loc; // this points at a kmp_flag<T>
-
- ident_t *th_ident;
- unsigned th_x; // Random number generator data
- unsigned th_a; // Random number generator data
-
-/*
- * Tasking-related data for the thread
- */
- kmp_task_team_t * th_task_team; // Task team struct
- kmp_taskdata_t * th_current_task; // Innermost Task being executed
- kmp_uint8 th_task_state; // alternating 0/1 for task team identification
- kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels
- kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
- kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
- kmp_uint32 th_reap_state; // Non-zero indicates thread is not
- // tasking, thus safe to reap
-
- /*
- * More stuff for keeping track of active/sleeping threads
- * (this part is written by the worker thread)
- */
- kmp_uint8 th_active_in_pool; // included in count of
- // #active threads in pool
- int th_active; // ! sleeping
- // 32 bits for TCR/TCW
-
- struct cons_header * th_cons; // used for consistency check
-
-/*
- * Add the syncronizing data which is cache aligned and padded.
- */
- KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ];
+ /* The following are also read by the master during reinit */
+ struct common_table *th_pri_common;
- KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
+ volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
+ /* while awaiting queuing lock acquire */
-#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
- #define NUM_LISTS 4
- kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines
+ volatile void *th_sleep_loc; // this points at a kmp_flag<T>
+
+ ident_t *th_ident;
+ unsigned th_x; // Random number generator data
+ unsigned th_a; // Random number generator data
+
+ /* Tasking-related data for the thread */
+ kmp_task_team_t *th_task_team; // Task team struct
+ kmp_taskdata_t *th_current_task; // Innermost Task being executed
+ kmp_uint8 th_task_state; // alternating 0/1 for task team identification
+ kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state
+ // at nested levels
+ kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack
+ kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack
+ kmp_uint32 th_reap_state; // Non-zero indicates thread is not
+ // tasking, thus safe to reap
+
+ /* More stuff for keeping track of active/sleeping threads (this part is
+ written by the worker thread) */
+ kmp_uint8 th_active_in_pool; // included in count of #active threads in pool
+ int th_active; // ! sleeping; 32 bits for TCR/TCW
+ struct cons_header *th_cons; // used for consistency check
+
+ /* Add the syncronizing data which is cache aligned and padded. */
+ KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];
+
+ KMP_ALIGN_CACHE volatile kmp_int32
+ th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */
+
+#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)
+#define NUM_LISTS 4
+ kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory
+// allocation routines
#endif
#if KMP_OS_WINDOWS
- kmp_win32_cond_t th_suspend_cv;
- kmp_win32_mutex_t th_suspend_mx;
- int th_suspend_init;
+ kmp_win32_cond_t th_suspend_cv;
+ kmp_win32_mutex_t th_suspend_mx;
+ int th_suspend_init;
#endif
#if KMP_OS_UNIX
- kmp_cond_align_t th_suspend_cv;
- kmp_mutex_align_t th_suspend_mx;
- int th_suspend_init_count;
+ kmp_cond_align_t th_suspend_cv;
+ kmp_mutex_align_t th_suspend_mx;
+ int th_suspend_init_count;
#endif
#if USE_ITT_BUILD
- kmp_itt_mark_t th_itt_mark_single;
- // alignment ???
+ kmp_itt_mark_t th_itt_mark_single;
+// alignment ???
#endif /* USE_ITT_BUILD */
#if KMP_STATS_ENABLED
- kmp_stats_list* th_stats;
+ kmp_stats_list *th_stats;
#endif
} kmp_base_info_t;
typedef union KMP_ALIGN_CACHE kmp_info {
- double th_align; /* use worst case alignment */
- char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ];
- kmp_base_info_t th;
+ double th_align; /* use worst case alignment */
+ char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];
+ kmp_base_info_t th;
} kmp_info_t;
-/* ------------------------------------------------------------------------ */
// OpenMP thread team data structures
-//
-typedef struct kmp_base_data {
- volatile kmp_uint32 t_value;
-} kmp_base_data_t;
+
+typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
typedef union KMP_ALIGN_CACHE kmp_sleep_team {
- double dt_align; /* use worst case alignment */
- char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
- kmp_base_data_t dt;
+ double dt_align; /* use worst case alignment */
+ char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
+ kmp_base_data_t dt;
} kmp_sleep_team_t;
typedef union KMP_ALIGN_CACHE kmp_ordered_team {
- double dt_align; /* use worst case alignment */
- char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
- kmp_base_data_t dt;
+ double dt_align; /* use worst case alignment */
+ char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
+ kmp_base_data_t dt;
} kmp_ordered_team_t;
-typedef int (*launch_t)( int gtid );
+typedef int (*launch_t)(int gtid);
/* Minimum number of ARGV entries to malloc if necessary */
-#define KMP_MIN_MALLOC_ARGV_ENTRIES 100
+#define KMP_MIN_MALLOC_ARGV_ENTRIES 100
-// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we
-// have supported at least 96 bytes. Using a larger value for more space between the master write/worker
-// read section and read/write by all section seems to buy more performance on EPCC PARALLEL.
+// Set up how many argv pointers will fit in cache lines containing
+// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
+// larger value for more space between the master write/worker read section and
+// read/write by all section seems to buy more performance on EPCC PARALLEL.
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-# define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
+#define KMP_INLINE_ARGV_BYTES \
+ (4 * CACHE_LINE - \
+ ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \
+ sizeof(kmp_int16) + sizeof(kmp_uint32)) % \
+ CACHE_LINE))
#else
-# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
+#define KMP_INLINE_ARGV_BYTES \
+ (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))
#endif
-#define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
+#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)
typedef struct KMP_ALIGN_CACHE kmp_base_team {
- // Synchronization Data ---------------------------------------------------------------------------------
- KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
- kmp_balign_team_t t_bar[ bs_last_barrier ];
- volatile int t_construct; // count of single directive encountered by team
- kmp_lock_t t_single_lock; // team specific lock
-
- // Master only -----------------------------------------------------------------------------------------
- KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
- int t_master_this_cons; // "this_construct" single counter of master in parent team
- ident_t *t_ident; // if volatile, have to change too much other crud to volatile too
- kmp_team_p *t_parent; // parent team
- kmp_team_p *t_next_pool; // next free team in the team pool
- kmp_disp_t *t_dispatch; // thread's dispatch data
- kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
+ // Synchronization Data
+ // ---------------------------------------------------------------------------
+ KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;
+ kmp_balign_team_t t_bar[bs_last_barrier];
+ volatile int t_construct; // count of single directive encountered by team
+ kmp_lock_t t_single_lock; // team specific lock
+
+ // Master only
+ // ---------------------------------------------------------------------------
+ KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
+ int t_master_this_cons; // "this_construct" single counter of master in parent
+ // team
+ ident_t *t_ident; // if volatile, have to change too much other crud to
+ // volatile too
+ kmp_team_p *t_parent; // parent team
+ kmp_team_p *t_next_pool; // next free team in the team pool
+ kmp_disp_t *t_dispatch; // thread's dispatch data
+ kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2
#if OMP_40_ENABLED
- kmp_proc_bind_t t_proc_bind; // bind type for par region
+ kmp_proc_bind_t t_proc_bind; // bind type for par region
#endif // OMP_40_ENABLED
#if USE_ITT_BUILD
- kmp_uint64 t_region_time; // region begin timestamp
+ kmp_uint64 t_region_time; // region begin timestamp
#endif /* USE_ITT_BUILD */
- // Master write, workers read --------------------------------------------------------------------------
- KMP_ALIGN_CACHE void **t_argv;
- int t_argc;
- int t_nproc; // number of threads in team
- microtask_t t_pkfn;
- launch_t t_invoke; // procedure to launch the microtask
+ // Master write, workers read
+ // --------------------------------------------------------------------------
+ KMP_ALIGN_CACHE void **t_argv;
+ int t_argc;
+ int t_nproc; // number of threads in team
+ microtask_t t_pkfn;
+ launch_t t_invoke; // procedure to launch the microtask
#if OMPT_SUPPORT
- ompt_team_info_t ompt_team_info;
- ompt_lw_taskteam_t *ompt_serialized_team_info;
+ ompt_team_info_t ompt_team_info;
+ ompt_lw_taskteam_t *ompt_serialized_team_info;
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- kmp_int8 t_fp_control_saved;
- kmp_int8 t_pad2b;
- kmp_int16 t_x87_fpu_control_word; // FP control regs
- kmp_uint32 t_mxcsr;
+ kmp_int8 t_fp_control_saved;
+ kmp_int8 t_pad2b;
+ kmp_int16 t_x87_fpu_control_word; // FP control regs
+ kmp_uint32 t_mxcsr;
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
+ void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];
- KMP_ALIGN_CACHE kmp_info_t **t_threads;
- kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
- int t_level; // nested parallel level
-
- KMP_ALIGN_CACHE int t_max_argc;
- int t_max_nproc; // maximum threads this team can handle (dynamicly expandable)
- int t_serialized; // levels deep of serialized teams
- dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
- int t_id; // team's id, assigned by debugger.
- int t_active_level; // nested active parallel level
- kmp_r_sched_t t_sched; // run-time schedule for the team
+ KMP_ALIGN_CACHE kmp_info_t **t_threads;
+ kmp_taskdata_t
+ *t_implicit_task_taskdata; // Taskdata for the thread's implicit task
+ int t_level; // nested parallel level
+
+ KMP_ALIGN_CACHE int t_max_argc;
+ int t_max_nproc; // max threads this team can handle (dynamicly expandable)
+ int t_serialized; // levels deep of serialized teams
+ dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system
+ int t_id; // team's id, assigned by debugger.
+ int t_active_level; // nested active parallel level
+ kmp_r_sched_t t_sched; // run-time schedule for the team
#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
- int t_first_place; // first & last place in parent thread's partition.
- int t_last_place; // Restore these values to master after par region.
+ int t_first_place; // first & last place in parent thread's partition.
+ int t_last_place; // Restore these values to master after par region.
#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
- int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call
+ int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
+// omp_set_num_threads() call
- // Read/write by workers as well -----------------------------------------------------------------------
+// Read/write by workers as well
#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
- // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel'
- // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel'
- // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding.
- char dummy_padding[1024];
-#endif
- KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams.
- // for SERIALIZED teams nested 2 or more levels deep
-#if OMP_40_ENABLED
- kmp_int32 t_cancel_request; // typed flag to store request state of cancellation
-#endif
- int t_master_active; // save on fork, restore on join
- kmp_taskq_t t_taskq; // this team's task queue
- void *t_copypriv_data; // team specific pointer to copyprivate data array
- kmp_uint32 t_copyin_counter;
+ // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf
+ // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra
+ // padding serves to fix the performance of epcc 'parallel' and 'barrier' when
+ // CACHE_LINE=64. TODO: investigate more and get rid if this padding.
+ char dummy_padding[1024];
+#endif
+ // Internal control stack for additional nested teams.
+ KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;
+// for SERIALIZED teams nested 2 or more levels deep
+#if OMP_40_ENABLED
+ // typed flag to store request state of cancellation
+ kmp_int32 t_cancel_request;
+#endif
+ int t_master_active; // save on fork, restore on join
+ kmp_taskq_t t_taskq; // this team's task queue
+ void *t_copypriv_data; // team specific pointer to copyprivate data array
+ kmp_uint32 t_copyin_counter;
#if USE_ITT_BUILD
- void *t_stack_id; // team specific stack stitching id (for ittnotify)
+ void *t_stack_id; // team specific stack stitching id (for ittnotify)
#endif /* USE_ITT_BUILD */
} kmp_base_team_t;
union KMP_ALIGN_CACHE kmp_team {
- kmp_base_team_t t;
- double t_align; /* use worst case alignment */
- char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ];
+ kmp_base_team_t t;
+ double t_align; /* use worst case alignment */
+ char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];
};
-
typedef union KMP_ALIGN_CACHE kmp_time_global {
- double dt_align; /* use worst case alignment */
- char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
- kmp_base_data_t dt;
+ double dt_align; /* use worst case alignment */
+ char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];
+ kmp_base_data_t dt;
} kmp_time_global_t;
typedef struct kmp_base_global {
- /* cache-aligned */
- kmp_time_global_t g_time;
+ /* cache-aligned */
+ kmp_time_global_t g_time;
- /* non cache-aligned */
- volatile int g_abort;
- volatile int g_done;
+ /* non cache-aligned */
+ volatile int g_abort;
+ volatile int g_done;
- int g_dynamic;
- enum dynamic_mode g_dynamic_mode;
+ int g_dynamic;
+ enum dynamic_mode g_dynamic_mode;
} kmp_base_global_t;
typedef union KMP_ALIGN_CACHE kmp_global {
- kmp_base_global_t g;
- double g_align; /* use worst case alignment */
- char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ];
+ kmp_base_global_t g;
+ double g_align; /* use worst case alignment */
+ char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];
} kmp_global_t;
-
typedef struct kmp_base_root {
- // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0)
- // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch
- // overhead or keeping r_active
-
- volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
- // GEH: This is misnamed, should be r_in_parallel
- volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely.
- int r_in_parallel; /* keeps a count of active parallel regions per root */
- // GEH: This is misnamed, should be r_active_levels
- kmp_team_t *r_root_team;
- kmp_team_t *r_hot_team;
- kmp_info_t *r_uber_thread;
- kmp_lock_t r_begin_lock;
- volatile int r_begin;
- int r_blocktime; /* blocktime for this root and descendants */
+ // TODO: GEH - combine r_active with r_in_parallel then r_active ==
+ // (r_in_parallel>= 0)
+ // TODO: GEH - then replace r_active with t_active_levels if we can to reduce
+ // the synch overhead or keeping r_active
+ volatile int r_active; /* TRUE if some region in a nest has > 1 thread */
+ // GEH: This is misnamed, should be r_in_parallel
+ volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely.
+ int r_in_parallel; /* keeps a count of active parallel regions per root */
+ // GEH: This is misnamed, should be r_active_levels
+ kmp_team_t *r_root_team;
+ kmp_team_t *r_hot_team;
+ kmp_info_t *r_uber_thread;
+ kmp_lock_t r_begin_lock;
+ volatile int r_begin;
+ int r_blocktime; /* blocktime for this root and descendants */
} kmp_base_root_t;
typedef union KMP_ALIGN_CACHE kmp_root {
- kmp_base_root_t r;
- double r_align; /* use worst case alignment */
- char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ];
+ kmp_base_root_t r;
+ double r_align; /* use worst case alignment */
+ char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];
} kmp_root_t;
struct fortran_inx_info {
- kmp_int32 data;
+ kmp_int32 data;
};
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-extern int __kmp_settings;
-extern int __kmp_duplicate_library_ok;
+extern int __kmp_settings;
+extern int __kmp_duplicate_library_ok;
#if USE_ITT_BUILD
-extern int __kmp_forkjoin_frames;
-extern int __kmp_forkjoin_frames_mode;
+extern int __kmp_forkjoin_frames;
+extern int __kmp_forkjoin_frames_mode;
#endif
extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
-extern int __kmp_determ_red;
+extern int __kmp_determ_red;
#ifdef KMP_DEBUG
-extern int kmp_a_debug;
-extern int kmp_b_debug;
-extern int kmp_c_debug;
-extern int kmp_d_debug;
-extern int kmp_e_debug;
-extern int kmp_f_debug;
+extern int kmp_a_debug;
+extern int kmp_b_debug;
+extern int kmp_c_debug;
+extern int kmp_d_debug;
+extern int kmp_e_debug;
+extern int kmp_f_debug;
#endif /* KMP_DEBUG */
/* For debug information logging using rotating buffer */
-#define KMP_DEBUG_BUF_LINES_INIT 512
-#define KMP_DEBUG_BUF_LINES_MIN 1
+#define KMP_DEBUG_BUF_LINES_INIT 512
+#define KMP_DEBUG_BUF_LINES_MIN 1
-#define KMP_DEBUG_BUF_CHARS_INIT 128
-#define KMP_DEBUG_BUF_CHARS_MIN 2
+#define KMP_DEBUG_BUF_CHARS_INIT 128
+#define KMP_DEBUG_BUF_CHARS_MIN 2
-extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
-extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
-extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
-extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */
-
-extern char *__kmp_debug_buffer; /* Debug buffer itself */
-extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */
-extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */
+extern int
+ __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */
+extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */
+extern int
+ __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */
+extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer
+ entry pointer */
+
+extern char *__kmp_debug_buffer; /* Debug buffer itself */
+extern int __kmp_debug_count; /* Counter for number of lines printed in buffer
+ so far */
+extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase
+ recommended in warnings */
/* end rotating debug buffer */
#ifdef KMP_DEBUG
-extern int __kmp_par_range; /* +1 => only go par for constructs in range */
+extern int __kmp_par_range; /* +1 => only go par for constructs in range */
-#define KMP_PAR_RANGE_ROUTINE_LEN 1024
-extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
-#define KMP_PAR_RANGE_FILENAME_LEN 1024
-extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
-extern int __kmp_par_range_lb;
-extern int __kmp_par_range_ub;
+#define KMP_PAR_RANGE_ROUTINE_LEN 1024
+extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
+#define KMP_PAR_RANGE_FILENAME_LEN 1024
+extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
+extern int __kmp_par_range_lb;
+extern int __kmp_par_range_ub;
#endif
/* For printing out dynamic storage map for threads and teams */
-extern int __kmp_storage_map; /* True means print storage map for threads and teams */
-extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */
-extern int __kmp_storage_map_verbose_specified;
+extern int
+ __kmp_storage_map; /* True means print storage map for threads and teams */
+extern int __kmp_storage_map_verbose; /* True means storage map includes
+ placement info */
+extern int __kmp_storage_map_verbose_specified;
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-extern kmp_cpuinfo_t __kmp_cpuinfo;
+extern kmp_cpuinfo_t __kmp_cpuinfo;
#endif
extern volatile int __kmp_init_serial;
@@ -2638,65 +2775,72 @@ extern int __kmp_version;
extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
/* Barrier algorithm types and options */
-extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
-extern kmp_uint32 __kmp_barrier_release_bb_dflt;
+extern kmp_uint32 __kmp_barrier_gather_bb_dflt;
+extern kmp_uint32 __kmp_barrier_release_bb_dflt;
extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
-extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ];
-extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ];
-extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ];
-extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ];
-extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ];
-extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ];
-extern char const *__kmp_barrier_type_name [ bs_last_barrier ];
-extern char const *__kmp_barrier_pattern_name [ bp_last_bar ];
+extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];
+extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];
+extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];
+extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];
+extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];
+extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];
+extern char const *__kmp_barrier_type_name[bs_last_barrier];
+extern char const *__kmp_barrier_pattern_name[bp_last_bar];
/* Global Locks */
-extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
-extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
-extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */
+extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */
+extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */
+extern kmp_bootstrap_lock_t
+ __kmp_exit_lock; /* exit() is not always thread-safe */
#if KMP_USE_MONITOR
-extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */
+extern kmp_bootstrap_lock_t
+ __kmp_monitor_lock; /* control monitor thread creation */
#endif
-extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
-
-extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
-extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
-extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
+extern kmp_bootstrap_lock_t
+ __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and
+ __kmp_threads expansion to co-exist */
+
+extern kmp_lock_t __kmp_global_lock; /* control OS/global access */
+extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */
+extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */
/* used for yielding spin-waits */
-extern unsigned int __kmp_init_wait; /* initial number of spin-tests */
-extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
+extern unsigned int __kmp_init_wait; /* initial number of spin-tests */
+extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */
extern enum library_type __kmp_library;
-extern enum sched_type __kmp_sched; /* default runtime scheduling */
-extern enum sched_type __kmp_static; /* default static scheduling method */
-extern enum sched_type __kmp_guided; /* default guided scheduling method */
-extern enum sched_type __kmp_auto; /* default auto scheduling method */
-extern int __kmp_chunk; /* default runtime chunk size */
+extern enum sched_type __kmp_sched; /* default runtime scheduling */
+extern enum sched_type __kmp_static; /* default static scheduling method */
+extern enum sched_type __kmp_guided; /* default guided scheduling method */
+extern enum sched_type __kmp_auto; /* default auto scheduling method */
+extern int __kmp_chunk; /* default runtime chunk size */
-extern size_t __kmp_stksize; /* stack size per thread */
+extern size_t __kmp_stksize; /* stack size per thread */
#if KMP_USE_MONITOR
-extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */
+extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */
#endif
-extern size_t __kmp_stkoffset; /* stack offset per thread */
-extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
+extern size_t __kmp_stkoffset; /* stack offset per thread */
+extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */
-extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
-extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */
-extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
-extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */
-extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */
-extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */
-extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
-extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
-extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */
-extern int __kmp_generate_warnings; /* should we issue warnings? */
-extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
+extern size_t
+ __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
+extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */
+extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */
+extern int __kmp_env_omp_stksize; /* was OMP_STACKSIZE specified? */
+extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS
+ specified? */
+extern int __kmp_env_omp_all_threads; /* was OMP_THREAD_LIMIT specified? */
+extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */
+extern int __kmp_env_checks; /* was KMP_CHECKS specified? */
+extern int
+ __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */
+extern int __kmp_generate_warnings; /* should we issue warnings? */
+extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */
#ifdef DEBUG_SUSPEND
-extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
+extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */
#endif
extern kmp_uint32 __kmp_yield_init;
@@ -2706,87 +2850,107 @@ extern kmp_uint32 __kmp_yield_next;
extern kmp_uint32 __kmp_yielding_on;
#endif
extern kmp_uint32 __kmp_yield_cycle;
-extern kmp_int32 __kmp_yield_on_count;
-extern kmp_int32 __kmp_yield_off_count;
+extern kmp_int32 __kmp_yield_on_count;
+extern kmp_int32 __kmp_yield_off_count;
/* ------------------------------------------------------------------------- */
-extern int __kmp_allThreadsSpecified;
+extern int __kmp_allThreadsSpecified;
-extern size_t __kmp_align_alloc;
+extern size_t __kmp_align_alloc;
/* following data protected by initialization routines */
-extern int __kmp_xproc; /* number of processors in the system */
-extern int __kmp_avail_proc; /* number of processors available to the process */
-extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
-extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
-extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */
-extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */
-extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */
-extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */
-extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */
-extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */
-extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */
-extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */
+extern int __kmp_xproc; /* number of processors in the system */
+extern int __kmp_avail_proc; /* number of processors available to the process */
+extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
+extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
+extern int
+ __kmp_max_nth; /* maximum total number of concurrently-existing threads */
+extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
+ __kmp_root */
+extern int __kmp_dflt_team_nth; /* default number of threads in a parallel
+ region a la OMP_NUM_THREADS */
+extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial
+ initialization */
+extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is
+ used (fixed) */
+extern int __kmp_tp_cached; /* whether threadprivate cache has been created
+ (__kmpc_threadprivate_cached()) */
+extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la
+ OMP_NESTED */
+extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
+ blocking (env setting) */
#if KMP_USE_MONITOR
-extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */
-extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */
+extern int
+ __kmp_monitor_wakeups; /* number of times monitor wakes up per second */
+extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before
+ blocking */
#endif
#ifdef KMP_ADJUST_BLOCKTIME
-extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
+extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */
#endif /* KMP_ADJUST_BLOCKTIME */
#ifdef KMP_DFLT_NTH_CORES
-extern int __kmp_ncores; /* Total number of cores for threads placement */
+extern int __kmp_ncores; /* Total number of cores for threads placement */
#endif
-extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */
+extern int
+ __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */
-extern int __kmp_need_register_atfork_specified;
-extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */
-extern int __kmp_gtid_mode; /* Method of getting gtid, values:
- 0 - not set, will be set at runtime
- 1 - using stack search
- 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS))
- 3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only.
- */
-extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
+extern int __kmp_need_register_atfork_specified;
+extern int
+ __kmp_need_register_atfork; /* At initialization, call pthread_atfork to
+ install fork handler */
+extern int __kmp_gtid_mode; /* Method of getting gtid, values:
+ 0 - not set, will be set at runtime
+ 1 - using stack search
+ 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS
+ X*) or TlsGetValue(Windows* OS))
+ 3 - static TLS (__declspec(thread) __kmp_gtid),
+ Linux* OS .so only. */
+extern int
+ __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
#ifdef KMP_TDATA_GTID
#if KMP_OS_WINDOWS
-extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */
+extern __declspec(
+ thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */
#else
extern __thread int __kmp_gtid;
-#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */
+#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core \
+ compiler 20110316 doesn't accept __declspec */
#endif
-extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
-extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */
+extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */
+extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */
-extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */
-extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
+extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork
+extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg
+extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */
-extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */
+extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested
+ parallelism enabled by default via
+ OMP_MAX_ACTIVE_LEVELS */
+extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in
+ concurrent execution per team */
#if KMP_NESTED_HOT_TEAMS
-extern int __kmp_hot_teams_mode;
-extern int __kmp_hot_teams_max_level;
+extern int __kmp_hot_teams_mode;
+extern int __kmp_hot_teams_max_level;
#endif
-# if KMP_OS_LINUX
+#if KMP_OS_LINUX
extern enum clock_function_type __kmp_clock_function;
extern int __kmp_clock_function_param;
-# endif /* KMP_OS_LINUX */
+#endif /* KMP_OS_LINUX */
#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS)
extern enum mic_type __kmp_mic_type;
#endif
-# ifdef USE_LOAD_BALANCE
-extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */
-# endif /* USE_LOAD_BALANCE */
+#ifdef USE_LOAD_BALANCE
+extern double __kmp_load_balance_interval; // load balance algorithm interval
+#endif /* USE_LOAD_BALANCE */
// OpenMP 3.1 - Nested num threads array
typedef struct kmp_nested_nthreads_t {
- int * nth;
- int size;
- int used;
+ int *nth;
+ int size;
+ int used;
} kmp_nested_nthreads_t;
extern kmp_nested_nthreads_t __kmp_nested_nth;
@@ -2795,290 +2959,313 @@ extern kmp_nested_nthreads_t __kmp_neste
// Parameters for the speculative lock backoff system.
struct kmp_adaptive_backoff_params_t {
- // Number of soft retries before it counts as a hard retry.
- kmp_uint32 max_soft_retries;
- // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right
- kmp_uint32 max_badness;
+ // Number of soft retries before it counts as a hard retry.
+ kmp_uint32 max_soft_retries;
+ // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to
+ // the right
+ kmp_uint32 max_badness;
};
extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;
#if KMP_DEBUG_ADAPTIVE_LOCKS
-extern char * __kmp_speculative_statsfile;
+extern char *__kmp_speculative_statsfile;
#endif
#endif // KMP_USE_ADAPTIVE_LOCKS
#if OMP_40_ENABLED
-extern int __kmp_display_env; /* TRUE or FALSE */
-extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
-extern int __kmp_omp_cancellation; /* TRUE or FALSE */
+extern int __kmp_display_env; /* TRUE or FALSE */
+extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
+extern int __kmp_omp_cancellation; /* TRUE or FALSE */
#endif
/* ------------------------------------------------------------------------- */
-/* --------------------------------------------------------------------------- */
/* the following are protected by the fork/join lock */
/* write: lock read: anytime */
-extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
+extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */
/* read/write: lock */
-extern volatile kmp_team_t * __kmp_team_pool;
-extern volatile kmp_info_t * __kmp_thread_pool;
+extern volatile kmp_team_t *__kmp_team_pool;
+extern volatile kmp_info_t *__kmp_thread_pool;
-/* total number of threads reachable from some root thread including all root threads*/
+// total num threads reachable from some root thread including all root threads
extern volatile int __kmp_nth;
-/* total number of threads reachable from some root thread including all root threads,
- and those in the thread pool */
+/* total number of threads reachable from some root thread including all root
+ threads, and those in the thread pool */
extern volatile int __kmp_all_nth;
extern int __kmp_thread_pool_nth;
extern volatile int __kmp_thread_pool_active_nth;
-extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
+extern kmp_root_t **__kmp_root; /* root of thread hierarchy */
/* end data protected by fork/join lock */
-/* --------------------------------------------------------------------------- */
+/* ------------------------------------------------------------------------- */
-extern kmp_global_t __kmp_global; /* global status */
+extern kmp_global_t __kmp_global; /* global status */
extern kmp_info_t __kmp_monitor;
-extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library.
-extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library.
+extern volatile kmp_uint32 __kmp_team_counter; // For Debugging Support Library
+extern volatile kmp_uint32 __kmp_task_counter; // For Debugging Support Library
#if USE_DEBUGGER
-#define _KMP_GEN_ID( counter ) \
- ( \
- __kmp_debugging \
- ? \
- KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \
- : \
- ~ 0 \
- )
-#else
-#define _KMP_GEN_ID( counter ) \
- ( \
- ~ 0 \
- )
+#define _KMP_GEN_ID(counter) \
+ (__kmp_debugging ? KMP_TEST_THEN_INC32((volatile kmp_int32 *)&counter) + 1 \
+ : ~0)
+#else
+#define _KMP_GEN_ID(counter) (~0)
#endif /* USE_DEBUGGER */
-#define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter )
-#define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter )
+#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)
+#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)
/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... );
+extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,
+ size_t size, char const *format, ...);
+
+extern void __kmp_serial_initialize(void);
+extern void __kmp_middle_initialize(void);
+extern void __kmp_parallel_initialize(void);
+
+extern void __kmp_internal_begin(void);
+extern void __kmp_internal_end_library(int gtid);
+extern void __kmp_internal_end_thread(int gtid);
+extern void __kmp_internal_end_atexit(void);
+extern void __kmp_internal_end_fini(void);
+extern void __kmp_internal_end_dtor(void);
+extern void __kmp_internal_end_dest(void *);
-extern void __kmp_serial_initialize( void );
-extern void __kmp_middle_initialize( void );
-extern void __kmp_parallel_initialize( void );
-
-extern void __kmp_internal_begin( void );
-extern void __kmp_internal_end_library( int gtid );
-extern void __kmp_internal_end_thread( int gtid );
-extern void __kmp_internal_end_atexit( void );
-extern void __kmp_internal_end_fini( void );
-extern void __kmp_internal_end_dtor( void );
-extern void __kmp_internal_end_dest( void* );
-
-extern int __kmp_register_root( int initial_thread );
-extern void __kmp_unregister_root( int gtid );
+extern int __kmp_register_root(int initial_thread);
+extern void __kmp_unregister_root(int gtid);
-extern int __kmp_ignore_mppbeg( void );
-extern int __kmp_ignore_mppend( void );
+extern int __kmp_ignore_mppbeg(void);
+extern int __kmp_ignore_mppend(void);
-extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws );
-extern void __kmp_exit_single( int gtid );
+extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);
+extern void __kmp_exit_single(int gtid);
-extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
-extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
+extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
+extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);
#ifdef USE_LOAD_BALANCE
-extern int __kmp_get_load_balance( int );
+extern int __kmp_get_load_balance(int);
#endif
#ifdef BUILD_TV
-extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr );
+extern void __kmp_tv_threadprivate_store(kmp_info_t *th, void *global_addr,
+ void *thread_addr);
#endif
-extern int __kmp_get_global_thread_id( void );
-extern int __kmp_get_global_thread_id_reg( void );
-extern void __kmp_exit_thread( int exit_status );
-extern void __kmp_abort( char const * format, ... );
-extern void __kmp_abort_thread( void );
-extern void __kmp_abort_process( void );
-extern void __kmp_warn( char const * format, ... );
+extern int __kmp_get_global_thread_id(void);
+extern int __kmp_get_global_thread_id_reg(void);
+extern void __kmp_exit_thread(int exit_status);
+extern void __kmp_abort(char const *format, ...);
+extern void __kmp_abort_thread(void);
+extern void __kmp_abort_process(void);
+extern void __kmp_warn(char const *format, ...);
+
+extern void __kmp_set_num_threads(int new_nth, int gtid);
+
+// Returns current thread (pointer to kmp_info_t). Current thread *must* be
+// registered.
+static inline kmp_info_t *__kmp_entry_thread() {
+ int gtid = __kmp_entry_gtid();
-extern void __kmp_set_num_threads( int new_nth, int gtid );
-
-// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
-static inline kmp_info_t * __kmp_entry_thread()
-{
- int gtid = __kmp_entry_gtid();
-
- return __kmp_threads[gtid];
+ return __kmp_threads[gtid];
}
-extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels );
-extern int __kmp_get_max_active_levels( int gtid );
-extern int __kmp_get_ancestor_thread_num( int gtid, int level );
-extern int __kmp_get_team_size( int gtid, int level );
-extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk );
-extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk );
-
-extern unsigned short __kmp_get_random( kmp_info_t * thread );
-extern void __kmp_init_random( kmp_info_t * thread );
-
-extern kmp_r_sched_t __kmp_get_schedule_global( void );
-extern void __kmp_adjust_num_threads( int new_nproc );
-
-extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL );
-extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL );
-extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL );
-#define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR )
-#define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR )
-#define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR )
+extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);
+extern int __kmp_get_max_active_levels(int gtid);
+extern int __kmp_get_ancestor_thread_num(int gtid, int level);
+extern int __kmp_get_team_size(int gtid, int level);
+extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);
+extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);
+
+extern unsigned short __kmp_get_random(kmp_info_t *thread);
+extern void __kmp_init_random(kmp_info_t *thread);
+
+extern kmp_r_sched_t __kmp_get_schedule_global(void);
+extern void __kmp_adjust_num_threads(int new_nproc);
+
+extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);
+extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);
+extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);
+#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)
+#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)
+#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)
#if USE_FAST_MEMORY
-extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL );
-extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL );
-extern void __kmp_free_fast_memory( kmp_info_t *this_thr );
-extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr );
-#define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR )
-#define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR )
-#endif
-
-extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL );
-extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL );
-extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL );
-extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL );
-#define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR )
-#define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR )
-#define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR )
-#define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR )
-
-#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
-#define KMP_INTERNAL_FREE(p) free(p)
-#define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz))
-#define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz))
-
-extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads );
-
-#if OMP_40_ENABLED
-extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind );
-extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads );
-#endif
-
-extern void __kmp_yield( int cond );
-
-extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
- kmp_int32 chunk );
-extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
- kmp_int32 chunk );
-extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
- kmp_int64 chunk );
-extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
- kmp_int64 chunk );
-
-extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid,
- kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st );
-extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid,
- kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st );
-extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid,
- kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st );
-extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid,
- kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st );
-
-extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid );
-extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid );
-extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid );
-extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid );
-
+extern void *___kmp_fast_allocate(kmp_info_t *this_thr,
+ size_t size KMP_SRC_LOC_DECL);
+extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);
+extern void __kmp_free_fast_memory(kmp_info_t *this_thr);
+extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);
+#define __kmp_fast_allocate(this_thr, size) \
+ ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)
+#define __kmp_fast_free(this_thr, ptr) \
+ ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)
+#endif
+
+extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);
+extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
+ size_t elsize KMP_SRC_LOC_DECL);
+extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
+ size_t size KMP_SRC_LOC_DECL);
+extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
+#define __kmp_thread_malloc(th, size) \
+ ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)
+#define __kmp_thread_calloc(th, nelem, elsize) \
+ ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)
+#define __kmp_thread_realloc(th, ptr, size) \
+ ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)
+#define __kmp_thread_free(th, ptr) \
+ ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
+
+#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
+#define KMP_INTERNAL_FREE(p) free(p)
+#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))
+#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))
+
+extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
+
+#if OMP_40_ENABLED
+extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
+ kmp_proc_bind_t proc_bind);
+extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
+ int num_threads);
+#endif
+
+extern void __kmp_yield(int cond);
+
+extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_int32 lb,
+ kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);
+extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_uint32 lb,
+ kmp_uint32 ub, kmp_int32 st,
+ kmp_int32 chunk);
+extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_int64 lb,
+ kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);
+extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_uint64 lb,
+ kmp_uint64 ub, kmp_int64 st,
+ kmp_int64 chunk);
+
+extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 *p_last, kmp_int32 *p_lb,
+ kmp_int32 *p_ub, kmp_int32 *p_st);
+extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 *p_last, kmp_uint32 *p_lb,
+ kmp_uint32 *p_ub, kmp_int32 *p_st);
+extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 *p_last, kmp_int64 *p_lb,
+ kmp_int64 *p_ub, kmp_int64 *p_st);
+extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 *p_last, kmp_uint64 *p_lb,
+ kmp_uint64 *p_ub, kmp_int64 *p_st);
+
+extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);
+extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);
+extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);
+extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);
#ifdef KMP_GOMP_COMPAT
-extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
- kmp_int32 chunk, int push_ws );
-extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
- kmp_int32 chunk, int push_ws );
-extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
- kmp_int64 chunk, int push_ws );
-extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
- enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
- kmp_int64 chunk, int push_ws );
-extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid );
-extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid );
-extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid );
-extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid );
+extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_int32 lb,
+ kmp_int32 ub, kmp_int32 st,
+ kmp_int32 chunk, int push_ws);
+extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_uint32 lb,
+ kmp_uint32 ub, kmp_int32 st,
+ kmp_int32 chunk, int push_ws);
+extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_int64 lb,
+ kmp_int64 ub, kmp_int64 st,
+ kmp_int64 chunk, int push_ws);
+extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,
+ enum sched_type schedule, kmp_uint64 lb,
+ kmp_uint64 ub, kmp_int64 st,
+ kmp_int64 chunk, int push_ws);
+extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);
+extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);
+extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);
+extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);
#endif /* KMP_GOMP_COMPAT */
-
-extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker );
-extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker );
-extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker );
-extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker );
-extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker );
-extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj );
-extern void __kmp_wait_yield_4_ptr( void * spinner, kmp_uint32 checker, kmp_uint32 (* pred)( void *, kmp_uint32 ), void * obj );
+extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);
+extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);
+extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);
+extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);
+extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);
+extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner,
+ kmp_uint32 checker,
+ kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),
+ void *obj);
+extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker,
+ kmp_uint32 (*pred)(void *, kmp_uint32),
+ void *obj);
class kmp_flag_32;
class kmp_flag_64;
class kmp_flag_oncore;
-extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
+extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
+ int final_spin
#if USE_ITT_BUILD
- , void * itt_sync_obj
+ ,
+ void *itt_sync_obj
#endif
- );
+ );
extern void __kmp_release_64(kmp_flag_64 *flag);
-extern void __kmp_infinite_loop( void );
+extern void __kmp_infinite_loop(void);
-extern void __kmp_cleanup( void );
+extern void __kmp_cleanup(void);
#if KMP_HANDLE_SIGNALS
- extern int __kmp_handle_signals;
- extern void __kmp_install_signals( int parallel_init );
- extern void __kmp_remove_signals( void );
+extern int __kmp_handle_signals;
+extern void __kmp_install_signals(int parallel_init);
+extern void __kmp_remove_signals(void);
#endif
-extern void __kmp_clear_system_time( void );
-extern void __kmp_read_system_time( double *delta );
+extern void __kmp_clear_system_time(void);
+extern void __kmp_read_system_time(double *delta);
-extern void __kmp_check_stack_overlap( kmp_info_t *thr );
+extern void __kmp_check_stack_overlap(kmp_info_t *thr);
-extern void __kmp_expand_host_name( char *buffer, size_t size );
-extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern );
+extern void __kmp_expand_host_name(char *buffer, size_t size);
+extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */
+extern void
+__kmp_initialize_system_tick(void); /* Initialize timer tick value */
#endif
-extern void __kmp_runtime_initialize( void ); /* machine specific initialization */
-extern void __kmp_runtime_destroy( void );
+extern void
+__kmp_runtime_initialize(void); /* machine specific initialization */
+extern void __kmp_runtime_destroy(void);
#if KMP_AFFINITY_SUPPORTED
-extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask);
+extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
+ kmp_affin_mask_t *mask);
extern void __kmp_affinity_initialize(void);
extern void __kmp_affinity_uninitialize(void);
-extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
+extern void __kmp_affinity_set_init_mask(
+ int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
#if OMP_40_ENABLED
extern void __kmp_affinity_set_place(int gtid);
#endif
-extern void __kmp_affinity_determine_capable( const char *env_var );
+extern void __kmp_affinity_determine_capable(const char *env_var);
extern int __kmp_aux_set_affinity(void **mask);
extern int __kmp_aux_get_affinity(void **mask);
extern int __kmp_aux_get_affinity_max_proc();
extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
-extern void __kmp_balanced_affinity( int tid, int team_size );
+extern void __kmp_balanced_affinity(int tid, int team_size);
#endif /* KMP_AFFINITY_SUPPORTED */
extern void __kmp_cleanup_hierarchy();
@@ -3086,208 +3273,226 @@ extern void __kmp_get_hierarchy(kmp_uint
#if KMP_USE_FUTEX
-extern int __kmp_futex_determine_capable( void );
+extern int __kmp_futex_determine_capable(void);
#endif // KMP_USE_FUTEX
-extern void __kmp_gtid_set_specific( int gtid );
-extern int __kmp_gtid_get_specific( void );
+extern void __kmp_gtid_set_specific(int gtid);
+extern int __kmp_gtid_get_specific(void);
-extern double __kmp_read_cpu_time( void );
+extern double __kmp_read_cpu_time(void);
-extern int __kmp_read_system_info( struct kmp_sys_info *info );
+extern int __kmp_read_system_info(struct kmp_sys_info *info);
#if KMP_USE_MONITOR
-extern void __kmp_create_monitor( kmp_info_t *th );
+extern void __kmp_create_monitor(kmp_info_t *th);
#endif
-extern void *__kmp_launch_thread( kmp_info_t *thr );
+extern void *__kmp_launch_thread(kmp_info_t *thr);
-extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size );
+extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);
#if KMP_OS_WINDOWS
-extern int __kmp_still_running(kmp_info_t *th);
-extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val );
-extern void __kmp_free_handle( kmp_thread_t tHandle );
+extern int __kmp_still_running(kmp_info_t *th);
+extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);
+extern void __kmp_free_handle(kmp_thread_t tHandle);
#endif
#if KMP_USE_MONITOR
-extern void __kmp_reap_monitor( kmp_info_t *th );
+extern void __kmp_reap_monitor(kmp_info_t *th);
#endif
-extern void __kmp_reap_worker( kmp_info_t *th );
-extern void __kmp_terminate_thread( int gtid );
+extern void __kmp_reap_worker(kmp_info_t *th);
+extern void __kmp_terminate_thread(int gtid);
-extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag );
-extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag );
-extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag );
-extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag );
-extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag );
-extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag );
+extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag);
+extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag);
+extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);
+extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag);
+extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag);
+extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);
-extern void __kmp_elapsed( double * );
-extern void __kmp_elapsed_tick( double * );
+extern void __kmp_elapsed(double *);
+extern void __kmp_elapsed_tick(double *);
-extern void __kmp_enable( int old_state );
-extern void __kmp_disable( int *old_state );
+extern void __kmp_enable(int old_state);
+extern void __kmp_disable(int *old_state);
-extern void __kmp_thread_sleep( int millis );
+extern void __kmp_thread_sleep(int millis);
-extern void __kmp_common_initialize( void );
-extern void __kmp_common_destroy( void );
-extern void __kmp_common_destroy_gtid( int gtid );
+extern void __kmp_common_initialize(void);
+extern void __kmp_common_destroy(void);
+extern void __kmp_common_destroy_gtid(int gtid);
#if KMP_OS_UNIX
-extern void __kmp_register_atfork( void );
+extern void __kmp_register_atfork(void);
#endif
-extern void __kmp_suspend_initialize( void );
-extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th );
+extern void __kmp_suspend_initialize(void);
+extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);
-extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
- kmp_team_t *team, int tid);
+extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
+ int tid);
#if OMP_40_ENABLED
-extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
+extern kmp_team_t *
+__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id,
+ ompt_parallel_id_t ompt_parallel_id,
#endif
- kmp_proc_bind_t proc_bind,
- kmp_internal_control_t *new_icvs,
- int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
+ kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,
+ int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
#else
-extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
+extern kmp_team_t *
+__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
#if OMPT_SUPPORT
- ompt_parallel_id_t ompt_parallel_id,
+ ompt_parallel_id_t ompt_parallel_id,
#endif
- kmp_internal_control_t *new_icvs,
- int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) );
+ kmp_internal_control_t *new_icvs,
+ int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));
#endif // OMP_40_ENABLED
-extern void __kmp_free_thread( kmp_info_t * );
-extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) );
-extern kmp_team_t * __kmp_reap_team( kmp_team_t * );
+extern void __kmp_free_thread(kmp_info_t *);
+extern void __kmp_free_team(kmp_root_t *,
+ kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));
+extern kmp_team_t *__kmp_reap_team(kmp_team_t *);
/* ------------------------------------------------------------------------ */
-extern void __kmp_initialize_bget( kmp_info_t *th );
-extern void __kmp_finalize_bget( kmp_info_t *th );
+extern void __kmp_initialize_bget(kmp_info_t *th);
+extern void __kmp_finalize_bget(kmp_info_t *th);
-KMP_EXPORT void *kmpc_malloc( size_t size );
-KMP_EXPORT void *kmpc_aligned_malloc( size_t size, size_t alignment );
-KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize );
-KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size );
-KMP_EXPORT void kmpc_free( void *ptr );
+KMP_EXPORT void *kmpc_malloc(size_t size);
+KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);
+KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);
+KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);
+KMP_EXPORT void kmpc_free(void *ptr);
-/* ------------------------------------------------------------------------ */
/* declarations for internal use */
-extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
- size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) );
-extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid );
+extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
+ size_t reduce_size, void *reduce_data,
+ void (*reduce)(void *, void *));
+extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);
/*!
- * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call.
+ * Tell the fork call which compiler generated the fork call, and therefore how
+ * to deal with the call.
*/
-enum fork_context_e
-{
- fork_context_gnu, /**< Called from GNU generated code, so must not invoke the microtask internally. */
- fork_context_intel, /**< Called from Intel generated code. */
- fork_context_last
+enum fork_context_e {
+ fork_context_gnu, /**< Called from GNU generated code, so must not invoke the
+ microtask internally. */
+ fork_context_intel, /**< Called from Intel generated code. */
+ fork_context_last
};
-extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context,
- kmp_int32 argc,
+extern int __kmp_fork_call(ident_t *loc, int gtid,
+ enum fork_context_e fork_context, kmp_int32 argc,
#if OMPT_SUPPORT
- void *unwrapped_task,
+ void *unwrapped_task,
#endif
- microtask_t microtask, launch_t invoker,
+ microtask_t microtask, launch_t invoker,
/* TODO: revert workaround for Intel(R) 64 tracker #96 */
#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX
- va_list *ap
+ va_list *ap
#else
- va_list ap
+ va_list ap
#endif
- );
+ );
-extern void __kmp_join_call( ident_t *loc, int gtid
+extern void __kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT
- , enum fork_context_e fork_context
+ ,
+ enum fork_context_e fork_context
#endif
#if OMP_40_ENABLED
- , int exit_teams = 0
+ ,
+ int exit_teams = 0
#endif
- );
+ );
extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);
-extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team );
-extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team );
-extern int __kmp_invoke_task_func( int gtid );
-extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
-extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
+extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);
+extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);
+extern int __kmp_invoke_task_func(int gtid);
+extern void __kmp_run_before_invoked_task(int gtid, int tid,
+ kmp_info_t *this_thr,
+ kmp_team_t *team);
+extern void __kmp_run_after_invoked_task(int gtid, int tid,
+ kmp_info_t *this_thr,
+ kmp_team_t *team);
// should never have been exported
-KMP_EXPORT int __kmpc_invoke_task_func( int gtid );
+KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
#if OMP_40_ENABLED
-extern int __kmp_invoke_teams_master( int gtid );
-extern void __kmp_teams_master( int gtid );
+extern int __kmp_invoke_teams_master(int gtid);
+extern void __kmp_teams_master(int gtid);
#endif
-extern void __kmp_save_internal_controls( kmp_info_t * thread );
-extern void __kmp_user_set_library (enum library_type arg);
-extern void __kmp_aux_set_library (enum library_type arg);
-extern void __kmp_aux_set_stacksize( size_t arg);
-extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid);
-extern void __kmp_aux_set_defaults( char const * str, int len );
+extern void __kmp_save_internal_controls(kmp_info_t *thread);
+extern void __kmp_user_set_library(enum library_type arg);
+extern void __kmp_aux_set_library(enum library_type arg);
+extern void __kmp_aux_set_stacksize(size_t arg);
+extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);
+extern void __kmp_aux_set_defaults(char const *str, int len);
/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */
-void kmpc_set_blocktime (int arg);
-void ompc_set_nested( int flag );
-void ompc_set_dynamic( int flag );
-void ompc_set_num_threads( int arg );
-
-extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr,
- kmp_team_t *team, int tid );
-extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr );
-extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid,
- kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- kmp_routine_entry_t task_entry );
-extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr,
- kmp_team_t *team, int tid, int set_curr_task );
+void kmpc_set_blocktime(int arg);
+void ompc_set_nested(int flag);
+void ompc_set_dynamic(int flag);
+void ompc_set_num_threads(int arg);
+
+extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,
+ kmp_team_t *team, int tid);
+extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);
+extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_tasking_flags_t *flags,
+ size_t sizeof_kmp_task_t,
+ size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry);
+extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
+ kmp_team_t *team, int tid,
+ int set_curr_task);
extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);
extern void __kmp_free_implicit_task(kmp_info_t *this_thr);
-
-int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
+int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,
+ kmp_flag_32 *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
- void * itt_sync_obj,
+ void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
-int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
+int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,
+ kmp_flag_64 *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
- void * itt_sync_obj,
+ void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
-int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
+int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
+ kmp_flag_oncore *flag, int final_spin,
int *thread_finished,
#if USE_ITT_BUILD
- void * itt_sync_obj,
+ void *itt_sync_obj,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
-extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team );
-extern void __kmp_reap_task_teams( void );
-extern void __kmp_wait_to_unref_task_teams( void );
-extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always );
-extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team );
-extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team
+extern void __kmp_free_task_team(kmp_info_t *thread,
+ kmp_task_team_t *task_team);
+extern void __kmp_reap_task_teams(void);
+extern void __kmp_wait_to_unref_task_teams(void);
+extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
+ int always);
+extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);
+extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team
#if USE_ITT_BUILD
- , void * itt_sync_obj
+ ,
+ void *itt_sync_obj
#endif /* USE_ITT_BUILD */
- , int wait=1
-);
-extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );
+ ,
+ int wait = 1);
+extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
+ int gtid);
-extern int __kmp_is_address_mapped( void *addr );
+extern int __kmp_is_address_mapped(void *addr);
extern kmp_uint64 __kmp_hardware_timestamp(void);
#if KMP_OS_UNIX
-extern int __kmp_read_from_file( char const *path, char const *format, ... );
+extern int __kmp_read_from_file(char const *path, char const *format, ...);
#endif
/* ------------------------------------------------------------------------ */
@@ -3297,127 +3502,145 @@ extern int __kmp_read_from_file( char c
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-extern void __kmp_query_cpuid( kmp_cpuinfo_t *p );
+extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);
#define __kmp_load_mxcsr(p) _mm_setcsr(*(p))
-static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
+static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }
-extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
-extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p );
+extern void __kmp_load_x87_fpu_control_word(kmp_int16 *p);
+extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);
extern void __kmp_clear_x87_fpu_status_word();
-# define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
+#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[]
+extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
+ void *argv[]
#if OMPT_SUPPORT
- , void **exit_frame_ptr
+ ,
+ void **exit_frame_ptr
#endif
-);
-
+ );
/* ------------------------------------------------------------------------ */
-KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags );
-KMP_EXPORT void __kmpc_end ( ident_t * );
-
-KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor,
- kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length );
-KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor );
-KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size );
-
-KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * );
-KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * );
-KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * );
-KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * );
-
-KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * );
-KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... );
-
-KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid );
-
-KMP_EXPORT void __kmpc_flush ( ident_t *);
-KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
-KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
+KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);
+KMP_EXPORT void __kmpc_end(ident_t *);
+
+KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,
+ kmpc_ctor_vec ctor,
+ kmpc_cctor_vec cctor,
+ kmpc_dtor_vec dtor,
+ size_t vector_length);
+KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,
+ kmpc_ctor ctor, kmpc_cctor cctor,
+ kmpc_dtor dtor);
+KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,
+ void *data, size_t size);
+
+KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);
+KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);
+KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);
+KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);
+
+KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);
+KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,
+ kmpc_micro microtask, ...);
+
+KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);
+
+KMP_EXPORT void __kmpc_flush(ident_t *);
+KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
+ kmp_critical_name *);
+KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,
+ kmp_critical_name *);
#if OMP_45_ENABLED
-KMP_EXPORT void __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint );
+KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,
+ kmp_critical_name *, uintptr_t hint);
#endif
-KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid );
-
-KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid );
-
-KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid );
+KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);
-KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter,
- kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk );
-
-KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid );
-
-KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit );
-
-extern void KMPC_SET_NUM_THREADS ( int arg );
-extern void KMPC_SET_DYNAMIC ( int flag );
-extern void KMPC_SET_NESTED ( int flag );
-
-/* --------------------------------------------------------------------------- */
-
-/*
- * Taskq interface routines
- */
+KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,
+ kmp_int32 global_tid);
+
+KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
+
+KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 schedtype, kmp_int32 *plastiter,
+ kmp_int *plower, kmp_int *pupper,
+ kmp_int *pstride, kmp_int incr,
+ kmp_int chunk);
+
+KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
+
+KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+ size_t cpy_size, void *cpy_data,
+ void (*cpy_func)(void *, void *),
+ kmp_int32 didit);
+
+extern void KMPC_SET_NUM_THREADS(int arg);
+extern void KMPC_SET_DYNAMIC(int flag);
+extern void KMPC_SET_NESTED(int flag);
+
+/* Taskq interface routines */
+KMP_EXPORT kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid,
+ kmpc_task_t taskq_task,
+ size_t sizeof_thunk,
+ size_t sizeof_shareds, kmp_int32 flags,
+ kmpc_shared_vars_t **shareds);
+KMP_EXPORT void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *thunk);
+KMP_EXPORT kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *thunk);
+KMP_EXPORT void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *thunk, kmp_int32 status);
+KMP_EXPORT void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *thunk);
+KMP_EXPORT kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *taskq_thunk,
+ kmpc_task_t task);
+
+/* OMP 3.0 tasking interface routines */
+KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task);
+KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 flags,
+ size_t sizeof_kmp_task_t,
+ size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry);
+KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *task);
+KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *task);
+KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task);
+KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);
-KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk,
- size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds);
-KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
-KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
-KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status);
-KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
-KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task);
-
-/* ------------------------------------------------------------------------ */
-
-/*
- * OMP 3.0 tasking interface routines
- */
-
-KMP_EXPORT kmp_int32
-__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
-KMP_EXPORT kmp_task_t*
-__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
- size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- kmp_routine_entry_t task_entry );
-KMP_EXPORT void
-__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
-KMP_EXPORT void
-__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
-KMP_EXPORT kmp_int32
-__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
-KMP_EXPORT kmp_int32
-__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid );
-
-KMP_EXPORT kmp_int32
-__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part );
+KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,
+ int end_part);
#if TASK_UNUSED
-void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
-void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
+void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);
+void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *task);
#endif // TASK_UNUSED
/* ------------------------------------------------------------------------ */
#if OMP_40_ENABLED
-KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid );
-KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid );
+KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);
+KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);
KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,
@@ -3432,154 +3655,169 @@ extern void __kmp_release_deps(kmp_int32
extern void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h);
extern void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h);
-extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate );
+extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
+ bool serialize_immediate);
-KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
-KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind);
-KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid);
+KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 cncl_kind);
+KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);
KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);
#if OMP_45_ENABLED
-KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask );
-KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask );
-KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, kmp_int32 if_val,
- kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
- kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize, void * task_dup );
+KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);
+KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);
+KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
+ kmp_int32 if_val, kmp_uint64 *lb,
+ kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,
+ kmp_int32 sched, kmp_uint64 grainsize,
+ void *task_dup);
#endif
// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
#if OMP_45_ENABLED
-KMP_EXPORT void* __kmpc_task_reduction_init(int gtid, int num_data, void *data);
-KMP_EXPORT void* __kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
+KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
+KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
#endif
#endif
-
-/*
- * Lock interface routines (fast versions with gtid passed in)
- */
-KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
-KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
+/* Lock interface routines (fast versions with gtid passed in) */
+KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
+KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
+KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);
+KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,
+ void **user_lock);
#if OMP_45_ENABLED
-KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint );
-KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint );
-#endif
-
-/* ------------------------------------------------------------------------ */
-
-/*
- * Interface to fast scalable reduce methods routines
- */
-
-KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid,
- kmp_int32 num_vars, size_t reduce_size,
- void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
- kmp_critical_name *lck );
-KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
-KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid,
- kmp_int32 num_vars, size_t reduce_size,
- void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
- kmp_critical_name *lck );
-KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
-
-/*
- * internal fast reduction routines
- */
-
-extern PACKED_REDUCTION_METHOD_T
-__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
- kmp_int32 num_vars, size_t reduce_size,
- void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
- kmp_critical_name *lck );
+KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,
+ void **user_lock, uintptr_t hint);
+KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
+ void **user_lock,
+ uintptr_t hint);
+#endif
+
+/* Interface to fast scalable reduce methods routines */
+
+KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(
+ ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
+ void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+ kmp_critical_name *lck);
+KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+ kmp_critical_name *lck);
+KMP_EXPORT kmp_int32 __kmpc_reduce(
+ ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
+ void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+ kmp_critical_name *lck);
+KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+ kmp_critical_name *lck);
+
+/* Internal fast reduction routines */
+
+extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(
+ ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,
+ void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
+ kmp_critical_name *lck);
// this function is for testing set/get/determine reduce method
-KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
+KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);
KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
// C++ port
// missing 'extern "C"' declarations
-KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc );
-KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid );
-KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads );
+KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
+KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
+KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 num_threads);
#if OMP_40_ENABLED
-KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind );
-KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads );
-KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...);
+KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
+ int proc_bind);
+KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 num_teams,
+ kmp_int32 num_threads);
+KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
+ kmpc_micro microtask, ...);
#endif
#if OMP_45_ENABLED
-struct kmp_dim { // loop bounds info casted to kmp_int64
- kmp_int64 lo; // lower
- kmp_int64 up; // upper
- kmp_int64 st; // stride
-};
-KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, struct kmp_dim * dims);
-KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec);
-KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec);
+struct kmp_dim { // loop bounds info casted to kmp_int64
+ kmp_int64 lo; // lower
+ kmp_int64 up; // upper
+ kmp_int64 st; // stride
+};
+KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
+ kmp_int32 num_dims, struct kmp_dim *dims);
+KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,
+ kmp_int64 *vec);
+KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,
+ kmp_int64 *vec);
KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
#endif
-KMP_EXPORT void*
-__kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid,
- void * data, size_t size, void *** cache );
+KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,
+ void *data, size_t size,
+ void ***cache);
// Symbols for MS mutual detection.
extern int _You_must_link_with_exactly_one_OpenMP_library;
extern int _You_must_link_with_Intel_OpenMP_library;
-#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 )
- extern int _You_must_link_with_Microsoft_OpenMP_library;
+#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4)
+extern int _You_must_link_with_Microsoft_OpenMP_library;
#endif
// The routines below are not exported.
// Consider making them 'static' in corresponding source files.
-void
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
-struct private_common *
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
+ void *data_addr, size_t pc_size);
+struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
+ void *data_addr,
+ size_t pc_size);
-//
// ompc_, kmpc_ entries moved from omp.h.
-//
#if KMP_OS_WINDOWS
-# define KMPC_CONVENTION __cdecl
+#define KMPC_CONVENTION __cdecl
#else
-# define KMPC_CONVENTION
+#define KMPC_CONVENTION
#endif
#ifndef __OMP_H
typedef enum omp_sched_t {
- omp_sched_static = 1,
- omp_sched_dynamic = 2,
- omp_sched_guided = 3,
- omp_sched_auto = 4
+ omp_sched_static = 1,
+ omp_sched_dynamic = 2,
+ omp_sched_guided = 3,
+ omp_sched_auto = 4
} omp_sched_t;
-typedef void * kmp_affinity_mask_t;
+typedef void *kmp_affinity_mask_t;
#endif
KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);
KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);
-KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
-KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
-KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
-KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
-KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);
+KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);
+KMP_EXPORT int KMPC_CONVENTION
+kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int KMPC_CONVENTION
+kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);
+KMP_EXPORT int KMPC_CONVENTION
+kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
@@ -3592,4 +3830,3 @@ KMP_EXPORT void KMPC_CONVENTION kmpc_set
#endif
#endif /* KMP_H */
-
Modified: openmp/trunk/runtime/src/kmp_affinity.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.cpp (original)
+++ openmp/trunk/runtime/src/kmp_affinity.cpp Fri May 12 13:01:32 2017
@@ -14,156 +14,149 @@
#include "kmp.h"
+#include "kmp_affinity.h"
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_str.h"
#include "kmp_wrapper_getpid.h"
-#include "kmp_affinity.h"
// Store the real or imagined machine hierarchy here
static hierarchy_info machine_hierarchy;
-void __kmp_cleanup_hierarchy() {
- machine_hierarchy.fini();
-}
+void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
+
void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
- kmp_uint32 depth;
- // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier.
- if (TCR_1(machine_hierarchy.uninitialized))
- machine_hierarchy.init(NULL, nproc);
-
- // Adjust the hierarchy in case num threads exceeds original
- if (nproc > machine_hierarchy.base_num_threads)
- machine_hierarchy.resize(nproc);
-
- depth = machine_hierarchy.depth;
- KMP_DEBUG_ASSERT(depth > 0);
-
- thr_bar->depth = depth;
- thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
- thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
+ kmp_uint32 depth;
+ // The test below is true if affinity is available, but set to "none". Need to
+ // init on first use of hierarchical barrier.
+ if (TCR_1(machine_hierarchy.uninitialized))
+ machine_hierarchy.init(NULL, nproc);
+
+ // Adjust the hierarchy in case num threads exceeds original
+ if (nproc > machine_hierarchy.base_num_threads)
+ machine_hierarchy.resize(nproc);
+
+ depth = machine_hierarchy.depth;
+ KMP_DEBUG_ASSERT(depth > 0);
+
+ thr_bar->depth = depth;
+ thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
+ thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
}
#if KMP_AFFINITY_SUPPORTED
bool KMPAffinity::picked_api = false;
-void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
-void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
-void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); }
-void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); }
-void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
-void KMPAffinity::operator delete(void* p) { __kmp_free(p); }
+void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
+void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); }
+void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); }
+void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); }
+void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); }
+void KMPAffinity::operator delete(void *p) { __kmp_free(p); }
void KMPAffinity::pick_api() {
- KMPAffinity* affinity_dispatch;
- if (picked_api)
- return;
+ KMPAffinity *affinity_dispatch;
+ if (picked_api)
+ return;
#if KMP_USE_HWLOC
- if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
- affinity_dispatch = new KMPHwlocAffinity();
- } else
+ if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+ affinity_dispatch = new KMPHwlocAffinity();
+ } else
#endif
- {
- affinity_dispatch = new KMPNativeAffinity();
- }
- __kmp_affinity_dispatch = affinity_dispatch;
- picked_api = true;
+ {
+ affinity_dispatch = new KMPNativeAffinity();
+ }
+ __kmp_affinity_dispatch = affinity_dispatch;
+ picked_api = true;
}
void KMPAffinity::destroy_api() {
- if (__kmp_affinity_dispatch != NULL) {
- delete __kmp_affinity_dispatch;
- __kmp_affinity_dispatch = NULL;
- picked_api = false;
- }
+ if (__kmp_affinity_dispatch != NULL) {
+ delete __kmp_affinity_dispatch;
+ __kmp_affinity_dispatch = NULL;
+ picked_api = false;
+ }
}
-//
// Print the affinity mask to the character array in a pretty format.
-//
-char *
-__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
-{
- KMP_ASSERT(buf_len >= 40);
- char *scan = buf;
- char *end = buf + buf_len - 1;
-
- //
- // Find first element / check for empty set.
- //
- size_t i;
- i = mask->begin();
- if (i == mask->end()) {
- KMP_SNPRINTF(scan, end-scan+1, "{<empty>}");
- while (*scan != '\0') scan++;
- KMP_ASSERT(scan <= end);
- return buf;
- }
-
- KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i);
- while (*scan != '\0') scan++;
- i++;
- for (; i != mask->end(); i = mask->next(i)) {
- if (! KMP_CPU_ISSET(i, mask)) {
- continue;
- }
-
- //
- // Check for buffer overflow. A string of the form ",<n>" will have
- // at most 10 characters, plus we want to leave room to print ",...}"
- // if the set is too large to print for a total of 15 characters.
- // We already left room for '\0' in setting end.
- //
- if (end - scan < 15) {
- break;
- }
- KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i);
- while (*scan != '\0') scan++;
- }
- if (i != mask->end()) {
- KMP_SNPRINTF(scan, end-scan+1, ",...");
- while (*scan != '\0') scan++;
- }
- KMP_SNPRINTF(scan, end-scan+1, "}");
- while (*scan != '\0') scan++;
+char *__kmp_affinity_print_mask(char *buf, int buf_len,
+ kmp_affin_mask_t *mask) {
+ KMP_ASSERT(buf_len >= 40);
+ char *scan = buf;
+ char *end = buf + buf_len - 1;
+
+ // Find first element / check for empty set.
+ size_t i;
+ i = mask->begin();
+ if (i == mask->end()) {
+ KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
+ while (*scan != '\0')
+ scan++;
KMP_ASSERT(scan <= end);
return buf;
-}
+ }
+ KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i);
+ while (*scan != '\0')
+ scan++;
+ i++;
+ for (; i != mask->end(); i = mask->next(i)) {
+ if (!KMP_CPU_ISSET(i, mask)) {
+ continue;
+ }
+
+ // Check for buffer overflow. A string of the form ",<n>" will have at most
+ // 10 characters, plus we want to leave room to print ",...}" if the set is
+ // too large to print for a total of 15 characters. We already left room for
+ // '\0' in setting end.
+ if (end - scan < 15) {
+ break;
+ }
+ KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i);
+ while (*scan != '\0')
+ scan++;
+ }
+ if (i != mask->end()) {
+ KMP_SNPRINTF(scan, end - scan + 1, ",...");
+ while (*scan != '\0')
+ scan++;
+ }
+ KMP_SNPRINTF(scan, end - scan + 1, "}");
+ while (*scan != '\0')
+ scan++;
+ KMP_ASSERT(scan <= end);
+ return buf;
+}
-void
-__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
-{
- KMP_CPU_ZERO(mask);
+void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
+ KMP_CPU_ZERO(mask);
-# if KMP_GROUP_AFFINITY
+#if KMP_GROUP_AFFINITY
- if (__kmp_num_proc_groups > 1) {
- int group;
- KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
- for (group = 0; group < __kmp_num_proc_groups; group++) {
- int i;
- int num = __kmp_GetActiveProcessorCount(group);
- for (i = 0; i < num; i++) {
- KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
- }
- }
+ if (__kmp_num_proc_groups > 1) {
+ int group;
+ KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
+ for (group = 0; group < __kmp_num_proc_groups; group++) {
+ int i;
+ int num = __kmp_GetActiveProcessorCount(group);
+ for (i = 0; i < num; i++) {
+ KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask);
+ }
}
- else
+ } else
-# endif /* KMP_GROUP_AFFINITY */
+#endif /* KMP_GROUP_AFFINITY */
- {
- int proc;
- for (proc = 0; proc < __kmp_xproc; proc++) {
- KMP_CPU_SET(proc, mask);
- }
+ {
+ int proc;
+ for (proc = 0; proc < __kmp_xproc; proc++) {
+ KMP_CPU_SET(proc, mask);
}
+ }
}
-//
// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
// called to renumber the labels from [0..n] and place them into the child_num
// vector of the address object. This is done in case the labels used for
@@ -175,59 +168,53 @@ __kmp_affinity_entire_machine_mask(kmp_a
// because we are paying attention to the labels themselves, not the ordinal
// child numbers. By using the child numbers in the sort, the result is
// {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604.
-//
-static void
-__kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
- int numAddrs)
-{
- KMP_DEBUG_ASSERT(numAddrs > 0);
- int depth = address2os->first.depth;
- unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
- unsigned *lastLabel = (unsigned *)__kmp_allocate(depth
- * sizeof(unsigned));
- int labCt;
+static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
+ int numAddrs) {
+ KMP_DEBUG_ASSERT(numAddrs > 0);
+ int depth = address2os->first.depth;
+ unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+ unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+ int labCt;
+ for (labCt = 0; labCt < depth; labCt++) {
+ address2os[0].first.childNums[labCt] = counts[labCt] = 0;
+ lastLabel[labCt] = address2os[0].first.labels[labCt];
+ }
+ int i;
+ for (i = 1; i < numAddrs; i++) {
for (labCt = 0; labCt < depth; labCt++) {
- address2os[0].first.childNums[labCt] = counts[labCt] = 0;
- lastLabel[labCt] = address2os[0].first.labels[labCt];
- }
- int i;
- for (i = 1; i < numAddrs; i++) {
- for (labCt = 0; labCt < depth; labCt++) {
- if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
- int labCt2;
- for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
- counts[labCt2] = 0;
- lastLabel[labCt2] = address2os[i].first.labels[labCt2];
- }
- counts[labCt]++;
- lastLabel[labCt] = address2os[i].first.labels[labCt];
- break;
- }
- }
- for (labCt = 0; labCt < depth; labCt++) {
- address2os[i].first.childNums[labCt] = counts[labCt];
- }
- for (; labCt < (int)Address::maxDepth; labCt++) {
- address2os[i].first.childNums[labCt] = 0;
+ if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
+ int labCt2;
+ for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
+ counts[labCt2] = 0;
+ lastLabel[labCt2] = address2os[i].first.labels[labCt2];
}
+ counts[labCt]++;
+ lastLabel[labCt] = address2os[i].first.labels[labCt];
+ break;
+ }
}
- __kmp_free(lastLabel);
- __kmp_free(counts);
+ for (labCt = 0; labCt < depth; labCt++) {
+ address2os[i].first.childNums[labCt] = counts[labCt];
+ }
+ for (; labCt < (int)Address::maxDepth; labCt++) {
+ address2os[i].first.childNums[labCt] = 0;
+ }
+ }
+ __kmp_free(lastLabel);
+ __kmp_free(counts);
}
-
-//
// All of the __kmp_affinity_create_*_map() routines should set
// __kmp_affinity_masks to a vector of affinity mask objects of length
-// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and
-// return the number of levels in the machine topology tree (zero if
+// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return
+// the number of levels in the machine topology tree (zero if
// __kmp_affinity_type == affinity_none).
//
-// All of the __kmp_affinity_create_*_map() routines should set *__kmp_affin_fullMask
-// to the affinity mask for the initialization thread. They need to save and
-// restore the mask, and it could be needed later, so saving it is just an
-// optimization to avoid calling kmp_get_system_affinity() again.
-//
+// All of the __kmp_affinity_create_*_map() routines should set
+// *__kmp_affin_fullMask to the affinity mask for the initialization thread.
+// They need to save and restore the mask, and it could be needed later, so
+// saving it is just an optimization to avoid calling kmp_get_system_affinity()
+// again.
kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
static int nCoresPerPkg, nPackages;
@@ -237,58 +224,45 @@ static int __kmp_ncores;
#endif
static int *__kmp_pu_os_idx = NULL;
-//
// __kmp_affinity_uniform_topology() doesn't work when called from
// places which support arbitrarily many levels in the machine topology
// map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map()
// __kmp_affinity_create_x2apicid_map().
-//
-inline static bool
-__kmp_affinity_uniform_topology()
-{
- return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
+inline static bool __kmp_affinity_uniform_topology() {
+ return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
}
-
-//
// Print out the detailed machine topology map, i.e. the physical locations
// of each OS proc.
-//
-static void
-__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth,
- int pkgLevel, int coreLevel, int threadLevel)
-{
- int proc;
+static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len,
+ int depth, int pkgLevel,
+ int coreLevel, int threadLevel) {
+ int proc;
- KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
- for (proc = 0; proc < len; proc++) {
- int level;
- kmp_str_buf_t buf;
- __kmp_str_buf_init(&buf);
- for (level = 0; level < depth; level++) {
- if (level == threadLevel) {
- __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
- }
- else if (level == coreLevel) {
- __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
- }
- else if (level == pkgLevel) {
- __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
- }
- else if (level > pkgLevel) {
- __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
- level - pkgLevel - 1);
- }
- else {
- __kmp_str_buf_print(&buf, "L%d ", level);
- }
- __kmp_str_buf_print(&buf, "%d ",
- address2os[proc].first.labels[level]);
- }
- KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
- buf.str);
- __kmp_str_buf_free(&buf);
+ KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY");
+ for (proc = 0; proc < len; proc++) {
+ int level;
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ for (level = 0; level < depth; level++) {
+ if (level == threadLevel) {
+ __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread));
+ } else if (level == coreLevel) {
+ __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core));
+ } else if (level == pkgLevel) {
+ __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package));
+ } else if (level > pkgLevel) {
+ __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node),
+ level - pkgLevel - 1);
+ } else {
+ __kmp_str_buf_print(&buf, "L%d ", level);
+ }
+ __kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]);
}
+ KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second,
+ buf.str);
+ __kmp_str_buf_free(&buf);
+ }
}
#if KMP_USE_HWLOC
@@ -298,2734 +272,2423 @@ __kmp_affinity_print_topology(AddrUnsPai
// have one thread context per core, we don't want the extra thread context
// level if it offers no unique labels. So they are removed.
// return value: the new depth of address2os
-static int
-__kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, int nActiveThreads, int depth, int* pkgLevel, int* coreLevel, int* threadLevel) {
- int level;
- int i;
- int radix1_detected;
-
- for (level = depth-1; level >= 0; --level) {
- // Always keep the package level
- if (level == *pkgLevel)
- continue;
- // Detect if this level is radix 1
- radix1_detected = 1;
- for (i = 1; i < nActiveThreads; ++i) {
- if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) {
- // There are differing label values for this level so it stays
- radix1_detected = 0;
- break;
- }
- }
- if (!radix1_detected)
- continue;
- // Radix 1 was detected
- if (level == *threadLevel) {
- // If only one thread per core, then just decrement
- // the depth which removes the threadlevel from address2os
- for (i = 0; i < nActiveThreads; ++i) {
- address2os[i].first.depth--;
- }
- *threadLevel = -1;
- } else if (level == *coreLevel) {
- // For core level, we move the thread labels over if they are still
- // valid (*threadLevel != -1), and also reduce the depth another level
- for (i = 0; i < nActiveThreads; ++i) {
- if (*threadLevel != -1) {
- address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel];
- }
- address2os[i].first.depth--;
- }
- *coreLevel = -1;
+static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
+ int nActiveThreads, int depth,
+ int *pkgLevel, int *coreLevel,
+ int *threadLevel) {
+ int level;
+ int i;
+ int radix1_detected;
+
+ for (level = depth - 1; level >= 0; --level) {
+ // Always keep the package level
+ if (level == *pkgLevel)
+ continue;
+ // Detect if this level is radix 1
+ radix1_detected = 1;
+ for (i = 1; i < nActiveThreads; ++i) {
+ if (address2os[0].first.labels[level] !=
+ address2os[i].first.labels[level]) {
+ // There are differing label values for this level so it stays
+ radix1_detected = 0;
+ break;
+ }
+ }
+ if (!radix1_detected)
+ continue;
+ // Radix 1 was detected
+ if (level == *threadLevel) {
+ // If only one thread per core, then just decrement
+ // the depth which removes the threadlevel from address2os
+ for (i = 0; i < nActiveThreads; ++i) {
+ address2os[i].first.depth--;
+ }
+ *threadLevel = -1;
+ } else if (level == *coreLevel) {
+ // For core level, we move the thread labels over if they are still
+ // valid (*threadLevel != -1), and also reduce the depth another level
+ for (i = 0; i < nActiveThreads; ++i) {
+ if (*threadLevel != -1) {
+ address2os[i].first.labels[*coreLevel] =
+ address2os[i].first.labels[*threadLevel];
}
+ address2os[i].first.depth--;
+ }
+ *coreLevel = -1;
}
- return address2os[0].first.depth;
+ }
+ return address2os[0].first.depth;
}
-// Returns the number of objects of type 'type' below 'obj' within the topology tree structure.
-// e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is HWLOC_OBJ_PU, then
-// this will return the number of PU's under the SOCKET object.
-static int
-__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) {
- int retval = 0;
- hwloc_obj_t first;
- for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0);
- first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj;
- first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first))
- {
- ++retval;
+// Returns the number of objects of type 'type' below 'obj' within the topology
+// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is
+// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET
+// object.
+static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
+ hwloc_obj_type_t type) {
+ int retval = 0;
+ hwloc_obj_t first;
+ for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
+ obj->logical_index, type, 0);
+ first != NULL &&
+ hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
+ obj;
+ first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
+ first)) {
+ ++retval;
+ }
+ return retval;
+}
+
+static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
+ kmp_i18n_id_t *const msg_id) {
+ *address2os = NULL;
+ *msg_id = kmp_i18n_null;
+
+ // Save the affinity mask for the current thread.
+ kmp_affin_mask_t *oldMask;
+ KMP_CPU_ALLOC(oldMask);
+ __kmp_get_system_affinity(oldMask, TRUE);
+
+ int depth = 3;
+ int pkgLevel = 0;
+ int coreLevel = 1;
+ int threadLevel = 2;
+
+ if (!KMP_AFFINITY_CAPABLE()) {
+ // Hack to try and infer the machine topology using only the data
+ // available from cpuid on the current thread, and __kmp_xproc.
+ KMP_ASSERT(__kmp_affinity_type == affinity_none);
+
+ nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
+ hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
+ HWLOC_OBJ_CORE);
+ __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
+ hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
+ HWLOC_OBJ_PU);
+ __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
+ nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (__kmp_affinity_uniform_topology()) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
+ }
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
- return retval;
-}
-
-static int
-__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
- kmp_i18n_id_t *const msg_id)
-{
- *address2os = NULL;
- *msg_id = kmp_i18n_null;
-
- //
- // Save the affinity mask for the current thread.
- //
- kmp_affin_mask_t *oldMask;
- KMP_CPU_ALLOC(oldMask);
- __kmp_get_system_affinity(oldMask, TRUE);
-
- int depth = 3;
- int pkgLevel = 0;
- int coreLevel = 1;
- int threadLevel = 2;
+ KMP_CPU_FREE(oldMask);
+ return 0;
+ }
- if (! KMP_AFFINITY_CAPABLE())
- {
- //
- // Hack to try and infer the machine topology using only the data
- // available from cpuid on the current thread, and __kmp_xproc.
- //
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
-
- nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
- __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
- __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
- nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (__kmp_affinity_uniform_topology()) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
- KMP_CPU_FREE(oldMask);
- return 0;
+ // Allocate the data structure to be returned.
+ AddrUnsPair *retval =
+ (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+ __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore,
+ // nCoresPerPkg, & nPackages. Make sure all these vars are set
+ // correctly, and return if affinity is not enabled.
+
+ hwloc_obj_t pu;
+ hwloc_obj_t core;
+ hwloc_obj_t socket;
+ int nActiveThreads = 0;
+ int socket_identifier = 0;
+ // re-calculate globals to count only accessible resources
+ __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
+ for (socket =
+ hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
+ socket != NULL;
+ socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology,
+ HWLOC_OBJ_PACKAGE, socket),
+ socket_identifier++) {
+ int core_identifier = 0;
+ int num_active_cores = 0;
+ for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
+ socket->logical_index,
+ HWLOC_OBJ_CORE, 0);
+ core != NULL &&
+ hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
+ core) == socket;
+ core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
+ core),
+ core_identifier++) {
+ int pu_identifier = 0;
+ int num_active_threads = 0;
+ for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
+ core->logical_index, HWLOC_OBJ_PU,
+ 0);
+ pu != NULL &&
+ hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
+ pu) == core;
+ pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
+ pu),
+ pu_identifier++) {
+ Address addr(3);
+ if(!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
+ continue; // skip inactive (inaccessible) unit
+ KA_TRACE(20,
+ ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
+ socket->os_index, socket->logical_index, core->os_index,
+ core->logical_index, pu->os_index,pu->logical_index));
+ addr.labels[0] = socket_identifier; // package
+ addr.labels[1] = core_identifier; // core
+ addr.labels[2] = pu_identifier; // pu
+ retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
+ __kmp_pu_os_idx[nActiveThreads] =
+ pu->os_index; // keep os index for each active pu
+ nActiveThreads++;
+ ++num_active_threads; // count active threads per core
+ }
+ if (num_active_threads) { // were there any active threads on the core?
+ ++__kmp_ncores; // count total active cores
+ ++num_active_cores; // count active cores per socket
+ if (num_active_threads > __kmp_nThreadsPerCore)
+ __kmp_nThreadsPerCore = num_active_threads; // calc maximum
+ }
}
+ if (num_active_cores) { // were there any active cores on the socket?
+ ++nPackages; // count total active packages
+ if (num_active_cores > nCoresPerPkg)
+ nCoresPerPkg = num_active_cores; // calc maximum
+ }
+ }
+
+ // If there's only one thread context to bind to, return now.
+ KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
+ KMP_ASSERT(nActiveThreads > 0);
+ if (nActiveThreads == 1) {
+ __kmp_ncores = nPackages = 1;
+ __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
- //
- // Allocate the data structure to be returned.
- //
- AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
- __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
-
- //
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return if affinity is not enabled.
- //
-
- hwloc_obj_t pu;
- hwloc_obj_t core;
- hwloc_obj_t socket;
- int nActiveThreads = 0;
- int socket_identifier = 0;
- // re-calculate globals to count only accessible resources
- __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
- for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
- socket != NULL;
- socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, socket),
- socket_identifier++)
- {
- int core_identifier = 0;
- int num_active_cores = 0;
- for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0);
- core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket;
- core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core),
- core_identifier++)
- {
- int pu_identifier = 0;
- int num_active_threads = 0;
- for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0);
- pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core;
- pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu),
- pu_identifier++)
- {
- Address addr(3);
- if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
- continue; // skip inactive (inaccessible) unit
- KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
- socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index));
- addr.labels[0] = socket_identifier; // package
- addr.labels[1] = core_identifier; // core
- addr.labels[2] = pu_identifier; // pu
- retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
- __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu
- nActiveThreads++;
- ++num_active_threads; // count active threads per core
- }
- if (num_active_threads) { // were there any active threads on the core?
- ++__kmp_ncores; // count total active cores
- ++num_active_cores; // count active cores per socket
- if (num_active_threads > __kmp_nThreadsPerCore)
- __kmp_nThreadsPerCore = num_active_threads; // calc maximum
- }
- }
- if (num_active_cores) { // were there any active cores on the socket?
- ++nPackages; // count total active packages
- if (num_active_cores > nCoresPerPkg)
- nCoresPerPkg = num_active_cores; // calc maximum
- }
+ KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+ }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
- //
- // If there's only one thread context to bind to, return now.
- //
- KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
- KMP_ASSERT(nActiveThreads > 0);
- if (nActiveThreads == 1) {
- __kmp_ncores = nPackages = 1;
- __kmp_nThreadsPerCore = nCoresPerPkg = 1;
- if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
-
- KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
-
- if (__kmp_affinity_type == affinity_none) {
- __kmp_free(retval);
- KMP_CPU_FREE(oldMask);
- return 0;
- }
-
- //
- // Form an Address object which only includes the package level.
- //
- Address addr(1);
- addr.labels[0] = retval[0].first.labels[pkgLevel];
- retval[0].first = addr;
-
- if (__kmp_affinity_gran_levels < 0) {
- __kmp_affinity_gran_levels = 0;
- }
-
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
- }
-
- *address2os = retval;
- KMP_CPU_FREE(oldMask);
- return 1;
+ if (__kmp_affinity_type == affinity_none) {
+ __kmp_free(retval);
+ KMP_CPU_FREE(oldMask);
+ return 0;
}
- //
- // Sort the table by physical Id.
- //
- qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
+ // Form an Address object which only includes the package level.
+ Address addr(1);
+ addr.labels[0] = retval[0].first.labels[pkgLevel];
+ retval[0].first = addr;
- //
- // Check to see if the machine topology is uniform
- //
- unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
+ if (__kmp_affinity_gran_levels < 0) {
+ __kmp_affinity_gran_levels = 0;
+ }
- //
- // Print the machine topology summary.
- //
if (__kmp_affinity_verbose) {
- char mask[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
-
- KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (uniform) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
-
- kmp_str_buf_t buf;
- __kmp_str_buf_init(&buf);
+ __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
+ }
- __kmp_str_buf_print(&buf, "%d", nPackages);
- //for (level = 1; level <= pkgLevel; level++) {
- // __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
- // }
- KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
+ *address2os = retval;
+ KMP_CPU_FREE(oldMask);
+ return 1;
+ }
- __kmp_str_buf_free(&buf);
+ // Sort the table by physical Id.
+ qsort(retval, nActiveThreads, sizeof(*retval),
+ __kmp_affinity_cmp_Address_labels);
+
+ // Check to see if the machine topology is uniform
+ unsigned uniform =
+ (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
+
+ // Print the machine topology summary.
+ if (__kmp_affinity_verbose) {
+ char mask[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+ KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
}
-
- if (__kmp_affinity_type == affinity_none) {
- __kmp_free(retval);
- KMP_CPU_FREE(oldMask);
- return 0;
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (uniform) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
}
- //
- // Find any levels with radiix 1, and remove them from the map
- // (except for the package level).
- //
- depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
- if (__kmp_affinity_gran_levels < 0) {
- //
- // Set the granularity level based on what levels are modeled
- // in the machine topology map.
- //
- __kmp_affinity_gran_levels = 0;
- if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
- __kmp_affinity_gran_levels++;
- }
- if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
- __kmp_affinity_gran_levels++;
- }
- if (__kmp_affinity_gran > affinity_gran_package) {
- __kmp_affinity_gran_levels++;
- }
- }
+ __kmp_str_buf_print(&buf, "%d", nPackages);
+ // for (level = 1; level <= pkgLevel; level++) {
+ // __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
+ // }
+ KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
- coreLevel, threadLevel);
- }
+ __kmp_str_buf_free(&buf);
+ }
+ if (__kmp_affinity_type == affinity_none) {
+ __kmp_free(retval);
KMP_CPU_FREE(oldMask);
- *address2os = retval;
- return depth;
+ return 0;
+ }
+
+ // Find any levels with radiix 1, and remove them from the map
+ // (except for the package level).
+ depth = __kmp_affinity_remove_radix_one_levels(
+ retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
+
+ if (__kmp_affinity_gran_levels < 0) {
+ // Set the granularity level based on what levels are modeled
+ // in the machine topology map.
+ __kmp_affinity_gran_levels = 0;
+ if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
+ __kmp_affinity_gran_levels++;
+ }
+ if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+ __kmp_affinity_gran_levels++;
+ }
+ if (__kmp_affinity_gran > affinity_gran_package) {
+ __kmp_affinity_gran_levels++;
+ }
+ }
+
+ if (__kmp_affinity_verbose) {
+ __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
+ coreLevel, threadLevel);
+ }
+
+ KMP_CPU_FREE(oldMask);
+ *address2os = retval;
+ return depth;
}
#endif // KMP_USE_HWLOC
-//
// If we don't know how to retrieve the machine's processor topology, or
// encounter an error in doing so, this routine is called to form a "flat"
// mapping of os thread id's <-> processor id's.
-//
-static int
-__kmp_affinity_create_flat_map(AddrUnsPair **address2os,
- kmp_i18n_id_t *const msg_id)
-{
- *address2os = NULL;
- *msg_id = kmp_i18n_null;
-
- //
- // Even if __kmp_affinity_type == affinity_none, this routine might still
- // called to set __kmp_ncores, as well as
- // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
- //
- if (! KMP_AFFINITY_CAPABLE()) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- __kmp_ncores = nPackages = __kmp_xproc;
- __kmp_nThreadsPerCore = nCoresPerPkg = 1;
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
- return 0;
- }
-
- //
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return now if affinity is not enabled.
- //
- __kmp_ncores = nPackages = __kmp_avail_proc;
+static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
+ kmp_i18n_id_t *const msg_id) {
+ *address2os = NULL;
+ *msg_id = kmp_i18n_null;
+
+ // Even if __kmp_affinity_type == affinity_none, this routine might still
+ // called to set __kmp_ncores, as well as
+ // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
+ if (!KMP_AFFINITY_CAPABLE()) {
+ KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ __kmp_ncores = nPackages = __kmp_xproc;
__kmp_nThreadsPerCore = nCoresPerPkg = 1;
if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
-
- KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
- KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
- __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
- if (__kmp_affinity_type == affinity_none) {
- int avail_ct = 0;
- int i;
- KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
- if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask))
- continue;
- __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
- }
- return 0;
+ KMP_INFORM(AffFlatTopology, "KMP_AFFINITY");
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
+ return 0;
+ }
- //
- // Contruct the data structure to be returned.
- //
- *address2os = (AddrUnsPair*)
- __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
+ // Make sure all these vars are set correctly, and return now if affinity is
+ // not enabled.
+ __kmp_ncores = nPackages = __kmp_avail_proc;
+ __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ __kmp_affin_fullMask);
+
+ KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+ }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
+ }
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ if (__kmp_affinity_type == affinity_none) {
int avail_ct = 0;
- unsigned int i;
+ int i;
KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
- //
- // Skip this proc if it is not included in the machine model.
- //
- if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
- continue;
- }
- __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
- Address addr(1);
- addr.labels[0] = i;
- (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
- }
- if (__kmp_affinity_verbose) {
- KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
+ if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
+ continue;
+ __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat
}
+ return 0;
+ }
- if (__kmp_affinity_gran_levels < 0) {
- //
- // Only the package level is modeled in the machine topology map,
- // so the #levels of granularity is either 0 or 1.
- //
- if (__kmp_affinity_gran > affinity_gran_package) {
- __kmp_affinity_gran_levels = 1;
- }
- else {
- __kmp_affinity_gran_levels = 0;
- }
+ // Contruct the data structure to be returned.
+ *address2os =
+ (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+ int avail_ct = 0;
+ unsigned int i;
+ KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
+ // Skip this proc if it is not included in the machine model.
+ if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
+ continue;
+ }
+ __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
+ Address addr(1);
+ addr.labels[0] = i;
+ (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
+ }
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
+ }
+
+ if (__kmp_affinity_gran_levels < 0) {
+ // Only the package level is modeled in the machine topology map,
+ // so the #levels of granularity is either 0 or 1.
+ if (__kmp_affinity_gran > affinity_gran_package) {
+ __kmp_affinity_gran_levels = 1;
+ } else {
+ __kmp_affinity_gran_levels = 0;
}
- return 1;
+ }
+ return 1;
}
+#if KMP_GROUP_AFFINITY
-# if KMP_GROUP_AFFINITY
-
-//
// If multiple Windows* OS processor groups exist, we can create a 2-level
-// topology map with the groups at level 0 and the individual procs at
-// level 1.
-//
+// topology map with the groups at level 0 and the individual procs at level 1.
// This facilitates letting the threads float among all procs in a group,
// if granularity=group (the default when there are multiple groups).
-//
-static int
-__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
- kmp_i18n_id_t *const msg_id)
-{
- *address2os = NULL;
- *msg_id = kmp_i18n_null;
-
- //
- // If we don't have multiple processor groups, return now.
- // The flat mapping will be used.
- //
- if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
- // FIXME set *msg_id
- return -1;
- }
-
- //
- // Contruct the data structure to be returned.
- //
- *address2os = (AddrUnsPair*)
- __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
- KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
- __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
- int avail_ct = 0;
- int i;
- KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
- //
- // Skip this proc if it is not included in the machine model.
- //
- if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
- continue;
- }
- __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
- Address addr(2);
- addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
- addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
- (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
+static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
+ kmp_i18n_id_t *const msg_id) {
+ *address2os = NULL;
+ *msg_id = kmp_i18n_null;
+
+ // If we don't have multiple processor groups, return now.
+ // The flat mapping will be used.
+ if ((!KMP_AFFINITY_CAPABLE()) ||
+ (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) {
+ // FIXME set *msg_id
+ return -1;
+ }
+
+ // Contruct the data structure to be returned.
+ *address2os =
+ (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc);
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ int avail_ct = 0;
+ int i;
+ KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
+ // Skip this proc if it is not included in the machine model.
+ if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
+ continue;
+ }
+ __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat
+ Address addr(2);
+ addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR));
+ addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR));
+ (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
- addr.labels[1]);
- }
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0],
+ addr.labels[1]);
}
+ }
- if (__kmp_affinity_gran_levels < 0) {
- if (__kmp_affinity_gran == affinity_gran_group) {
- __kmp_affinity_gran_levels = 1;
- }
- else if ((__kmp_affinity_gran == affinity_gran_fine)
- || (__kmp_affinity_gran == affinity_gran_thread)) {
- __kmp_affinity_gran_levels = 0;
- }
- else {
- const char *gran_str = NULL;
- if (__kmp_affinity_gran == affinity_gran_core) {
- gran_str = "core";
- }
- else if (__kmp_affinity_gran == affinity_gran_package) {
- gran_str = "package";
- }
- else if (__kmp_affinity_gran == affinity_gran_node) {
- gran_str = "node";
- }
- else {
- KMP_ASSERT(0);
- }
+ if (__kmp_affinity_gran_levels < 0) {
+ if (__kmp_affinity_gran == affinity_gran_group) {
+ __kmp_affinity_gran_levels = 1;
+ } else if ((__kmp_affinity_gran == affinity_gran_fine) ||
+ (__kmp_affinity_gran == affinity_gran_thread)) {
+ __kmp_affinity_gran_levels = 0;
+ } else {
+ const char *gran_str = NULL;
+ if (__kmp_affinity_gran == affinity_gran_core) {
+ gran_str = "core";
+ } else if (__kmp_affinity_gran == affinity_gran_package) {
+ gran_str = "package";
+ } else if (__kmp_affinity_gran == affinity_gran_node) {
+ gran_str = "node";
+ } else {
+ KMP_ASSERT(0);
+ }
- // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread"
- __kmp_affinity_gran_levels = 0;
- }
+ // Warning: can't use affinity granularity \"gran\" with group topology
+ // method, using "thread"
+ __kmp_affinity_gran_levels = 0;
}
- return 2;
+ }
+ return 2;
}
-# endif /* KMP_GROUP_AFFINITY */
-
+#endif /* KMP_GROUP_AFFINITY */
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-static int
-__kmp_cpuid_mask_width(int count) {
- int r = 0;
+static int __kmp_cpuid_mask_width(int count) {
+ int r = 0;
- while((1<<r) < count)
- ++r;
- return r;
+ while ((1 << r) < count)
+ ++r;
+ return r;
}
-
class apicThreadInfo {
public:
- unsigned osId; // param to __kmp_affinity_bind_thread
- unsigned apicId; // from cpuid after binding
- unsigned maxCoresPerPkg; // ""
- unsigned maxThreadsPerPkg; // ""
- unsigned pkgId; // inferred from above values
- unsigned coreId; // ""
- unsigned threadId; // ""
+ unsigned osId; // param to __kmp_affinity_bind_thread
+ unsigned apicId; // from cpuid after binding
+ unsigned maxCoresPerPkg; // ""
+ unsigned maxThreadsPerPkg; // ""
+ unsigned pkgId; // inferred from above values
+ unsigned coreId; // ""
+ unsigned threadId; // ""
};
-
-static int
-__kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, const void *b)
-{
- const apicThreadInfo *aa = (const apicThreadInfo *)a;
- const apicThreadInfo *bb = (const apicThreadInfo *)b;
- if (aa->osId < bb->osId) return -1;
- if (aa->osId > bb->osId) return 1;
- return 0;
+static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a,
+ const void *b) {
+ const apicThreadInfo *aa = (const apicThreadInfo *)a;
+ const apicThreadInfo *bb = (const apicThreadInfo *)b;
+ if (aa->osId < bb->osId)
+ return -1;
+ if (aa->osId > bb->osId)
+ return 1;
+ return 0;
}
-
-static int
-__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b)
-{
- const apicThreadInfo *aa = (const apicThreadInfo *)a;
- const apicThreadInfo *bb = (const apicThreadInfo *)b;
- if (aa->pkgId < bb->pkgId) return -1;
- if (aa->pkgId > bb->pkgId) return 1;
- if (aa->coreId < bb->coreId) return -1;
- if (aa->coreId > bb->coreId) return 1;
- if (aa->threadId < bb->threadId) return -1;
- if (aa->threadId > bb->threadId) return 1;
- return 0;
+static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a,
+ const void *b) {
+ const apicThreadInfo *aa = (const apicThreadInfo *)a;
+ const apicThreadInfo *bb = (const apicThreadInfo *)b;
+ if (aa->pkgId < bb->pkgId)
+ return -1;
+ if (aa->pkgId > bb->pkgId)
+ return 1;
+ if (aa->coreId < bb->coreId)
+ return -1;
+ if (aa->coreId > bb->coreId)
+ return 1;
+ if (aa->threadId < bb->threadId)
+ return -1;
+ if (aa->threadId > bb->threadId)
+ return 1;
+ return 0;
}
-
-//
// On IA-32 architecture and Intel(R) 64 architecture, we attempt to use
// an algorithm which cycles through the available os threads, setting
// the current thread's affinity mask to that thread, and then retrieves
// the Apic Id for each thread context using the cpuid instruction.
-//
-static int
-__kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
- kmp_i18n_id_t *const msg_id)
-{
- kmp_cpuid buf;
- int rc;
- *address2os = NULL;
- *msg_id = kmp_i18n_null;
+static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
+ kmp_i18n_id_t *const msg_id) {
+ kmp_cpuid buf;
+ int rc;
+ *address2os = NULL;
+ *msg_id = kmp_i18n_null;
+
+ // Check if cpuid leaf 4 is supported.
+ __kmp_x86_cpuid(0, 0, &buf);
+ if (buf.eax < 4) {
+ *msg_id = kmp_i18n_str_NoLeaf4Support;
+ return -1;
+ }
+
+ // The algorithm used starts by setting the affinity to each available thread
+ // and retrieving info from the cpuid instruction, so if we are not capable of
+ // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
+ // need to do something else - use the defaults that we calculated from
+ // issuing cpuid without binding to each proc.
+ if (!KMP_AFFINITY_CAPABLE()) {
+ // Hack to try and infer the machine topology using only the data
+ // available from cpuid on the current thread, and __kmp_xproc.
+ KMP_ASSERT(__kmp_affinity_type == affinity_none);
+
+ // Get an upper bound on the number of threads per package using cpuid(1).
+ // On some OS/chps combinations where HT is supported by the chip but is
+ // disabled, this value will be 2 on a single core chip. Usually, it will be
+ // 2 if HT is enabled and 1 if HT is disabled.
+ __kmp_x86_cpuid(1, 0, &buf);
+ int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
+ if (maxThreadsPerPkg == 0) {
+ maxThreadsPerPkg = 1;
+ }
+
+ // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded
+ // value.
+ //
+ // The author of cpu_count.cpp treated this only an upper bound on the
+ // number of cores, but I haven't seen any cases where it was greater than
+ // the actual number of cores, so we will treat it as exact in this block of
+ // code.
+ //
+ // First, we need to check if cpuid(4) is supported on this chip. To see if
+ // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or
+ // greater.
+ __kmp_x86_cpuid(0, 0, &buf);
+ if (buf.eax >= 4) {
+ __kmp_x86_cpuid(4, 0, &buf);
+ nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
+ } else {
+ nCoresPerPkg = 1;
+ }
- //
- // Check if cpuid leaf 4 is supported.
- //
- __kmp_x86_cpuid(0, 0, &buf);
- if (buf.eax < 4) {
- *msg_id = kmp_i18n_str_NoLeaf4Support;
- return -1;
- }
-
- //
- // The algorithm used starts by setting the affinity to each available
- // thread and retrieving info from the cpuid instruction, so if we are
- // not capable of calling __kmp_get_system_affinity() and
- // _kmp_get_system_affinity(), then we need to do something else - use
- // the defaults that we calculated from issuing cpuid without binding
- // to each proc.
- //
- if (! KMP_AFFINITY_CAPABLE()) {
- //
- // Hack to try and infer the machine topology using only the data
- // available from cpuid on the current thread, and __kmp_xproc.
- //
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
-
- //
- // Get an upper bound on the number of threads per package using
- // cpuid(1).
- //
- // On some OS/chps combinations where HT is supported by the chip
- // but is disabled, this value will be 2 on a single core chip.
- // Usually, it will be 2 if HT is enabled and 1 if HT is disabled.
- //
- __kmp_x86_cpuid(1, 0, &buf);
- int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
- if (maxThreadsPerPkg == 0) {
- maxThreadsPerPkg = 1;
- }
-
- //
- // The num cores per pkg comes from cpuid(4).
- // 1 must be added to the encoded value.
- //
- // The author of cpu_count.cpp treated this only an upper bound
- // on the number of cores, but I haven't seen any cases where it
- // was greater than the actual number of cores, so we will treat
- // it as exact in this block of code.
- //
- // First, we need to check if cpuid(4) is supported on this chip.
- // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
- // has the value n or greater.
- //
- __kmp_x86_cpuid(0, 0, &buf);
- if (buf.eax >= 4) {
- __kmp_x86_cpuid(4, 0, &buf);
- nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
- }
- else {
- nCoresPerPkg = 1;
- }
-
- //
- // There is no way to reliably tell if HT is enabled without issuing
- // the cpuid instruction from every thread, can correlating the cpuid
- // info, so if the machine is not affinity capable, we assume that HT
- // is off. We have seen quite a few machines where maxThreadsPerPkg
- // is 2, yet the machine does not support HT.
- //
- // - Older OSes are usually found on machines with older chips, which
- // do not support HT.
- //
- // - The performance penalty for mistakenly identifying a machine as
- // HT when it isn't (which results in blocktime being incorrecly set
- // to 0) is greater than the penalty when for mistakenly identifying
- // a machine as being 1 thread/core when it is really HT enabled
- // (which results in blocktime being incorrectly set to a positive
- // value).
- //
- __kmp_ncores = __kmp_xproc;
- nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
- __kmp_nThreadsPerCore = 1;
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (__kmp_affinity_uniform_topology()) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
- return 0;
+ // There is no way to reliably tell if HT is enabled without issuing the
+ // cpuid instruction from every thread, can correlating the cpuid info, so
+ // if the machine is not affinity capable, we assume that HT is off. We have
+ // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine
+ // does not support HT.
+ //
+ // - Older OSes are usually found on machines with older chips, which do not
+ // support HT.
+ // - The performance penalty for mistakenly identifying a machine as HT when
+ // it isn't (which results in blocktime being incorrecly set to 0) is
+ // greater than the penalty when for mistakenly identifying a machine as
+ // being 1 thread/core when it is really HT enabled (which results in
+ // blocktime being incorrectly set to a positive value).
+ __kmp_ncores = __kmp_xproc;
+ nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+ __kmp_nThreadsPerCore = 1;
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY");
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (__kmp_affinity_uniform_topology()) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
+ }
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
+ return 0;
+ }
- //
- //
- // From here on, we can assume that it is safe to call
- // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
- // even if __kmp_affinity_type = affinity_none.
- //
-
- //
- // Save the affinity mask for the current thread.
- //
- kmp_affin_mask_t *oldMask;
- KMP_CPU_ALLOC(oldMask);
- KMP_ASSERT(oldMask != NULL);
- __kmp_get_system_affinity(oldMask, TRUE);
-
- //
- // Run through each of the available contexts, binding the current thread
- // to it, and obtaining the pertinent information using the cpuid instr.
- //
- // The relevant information is:
- //
- // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
- // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
- //
- // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The
- // value of this field determines the width of the core# + thread#
- // fields in the Apic Id. It is also an upper bound on the number
- // of threads per package, but it has been verified that situations
- // happen were it is not exact. In particular, on certain OS/chip
- // combinations where Intel(R) Hyper-Threading Technology is supported
- // by the chip but has
- // been disabled, the value of this field will be 2 (for a single core
- // chip). On other OS/chip combinations supporting
- // Intel(R) Hyper-Threading Technology, the value of
- // this field will be 1 when Intel(R) Hyper-Threading Technology is
- // disabled and 2 when it is enabled.
- //
- // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The
- // value of this field (+1) determines the width of the core# field in
- // the Apic Id. The comments in "cpucount.cpp" say that this value is
- // an upper bound, but the IA-32 architecture manual says that it is
- // exactly the number of cores per package, and I haven't seen any
- // case where it wasn't.
- //
- // From this information, deduce the package Id, core Id, and thread Id,
- // and set the corresponding fields in the apicThreadInfo struct.
- //
- unsigned i;
- apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
+ // From here on, we can assume that it is safe to call
+ // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
+ // __kmp_affinity_type = affinity_none.
+
+ // Save the affinity mask for the current thread.
+ kmp_affin_mask_t *oldMask;
+ KMP_CPU_ALLOC(oldMask);
+ KMP_ASSERT(oldMask != NULL);
+ __kmp_get_system_affinity(oldMask, TRUE);
+
+ // Run through each of the available contexts, binding the current thread
+ // to it, and obtaining the pertinent information using the cpuid instr.
+ //
+ // The relevant information is:
+ // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context
+ // has a uniqie Apic Id, which is of the form pkg# : core# : thread#.
+ // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value
+ // of this field determines the width of the core# + thread# fields in the
+ // Apic Id. It is also an upper bound on the number of threads per
+ // package, but it has been verified that situations happen were it is not
+ // exact. In particular, on certain OS/chip combinations where Intel(R)
+ // Hyper-Threading Technology is supported by the chip but has been
+ // disabled, the value of this field will be 2 (for a single core chip).
+ // On other OS/chip combinations supporting Intel(R) Hyper-Threading
+ // Technology, the value of this field will be 1 when Intel(R)
+ // Hyper-Threading Technology is disabled and 2 when it is enabled.
+ // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value
+ // of this field (+1) determines the width of the core# field in the Apic
+ // Id. The comments in "cpucount.cpp" say that this value is an upper
+ // bound, but the IA-32 architecture manual says that it is exactly the
+ // number of cores per package, and I haven't seen any case where it
+ // wasn't.
+ //
+ // From this information, deduce the package Id, core Id, and thread Id,
+ // and set the corresponding fields in the apicThreadInfo struct.
+ unsigned i;
+ apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
__kmp_avail_proc * sizeof(apicThreadInfo));
- unsigned nApics = 0;
- KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
- //
- // Skip this proc if it is not included in the machine model.
- //
- if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
- continue;
- }
- KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
-
- __kmp_affinity_dispatch->bind_thread(i);
- threadInfo[nApics].osId = i;
-
- //
- // The apic id and max threads per pkg come from cpuid(1).
- //
- __kmp_x86_cpuid(1, 0, &buf);
- if (((buf.edx >> 9) & 1) == 0) {
- __kmp_set_system_affinity(oldMask, TRUE);
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_ApicNotPresent;
- return -1;
- }
- threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
- threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
- if (threadInfo[nApics].maxThreadsPerPkg == 0) {
- threadInfo[nApics].maxThreadsPerPkg = 1;
- }
-
- //
- // Max cores per pkg comes from cpuid(4).
- // 1 must be added to the encoded value.
- //
- // First, we need to check if cpuid(4) is supported on this chip.
- // To see if cpuid(n) is supported, issue cpuid(0) and check if eax
- // has the value n or greater.
- //
- __kmp_x86_cpuid(0, 0, &buf);
- if (buf.eax >= 4) {
- __kmp_x86_cpuid(4, 0, &buf);
- threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
- }
- else {
- threadInfo[nApics].maxCoresPerPkg = 1;
- }
-
- //
- // Infer the pkgId / coreId / threadId using only the info
- // obtained locally.
- //
- int widthCT = __kmp_cpuid_mask_width(
- threadInfo[nApics].maxThreadsPerPkg);
- threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
-
- int widthC = __kmp_cpuid_mask_width(
- threadInfo[nApics].maxCoresPerPkg);
- int widthT = widthCT - widthC;
- if (widthT < 0) {
- //
- // I've never seen this one happen, but I suppose it could, if
- // the cpuid instruction on a chip was really screwed up.
- // Make sure to restore the affinity mask before the tail call.
- //
- __kmp_set_system_affinity(oldMask, TRUE);
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_InvalidCpuidInfo;
- return -1;
- }
-
- int maskC = (1 << widthC) - 1;
- threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
- &maskC;
-
- int maskT = (1 << widthT) - 1;
- threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
-
- nApics++;
- }
-
- //
- // We've collected all the info we need.
- // Restore the old affinity mask for this thread.
- //
- __kmp_set_system_affinity(oldMask, TRUE);
-
- //
- // If there's only one thread context to bind to, form an Address object
- // with depth 1 and return immediately (or, if affinity is off, set
- // address2os to NULL and return).
- //
- // If it is configured to omit the package level when there is only a
- // single package, the logic at the end of this routine won't work if
- // there is only a single thread - it would try to form an Address
- // object with depth 0.
- //
- KMP_ASSERT(nApics > 0);
- if (nApics == 1) {
- __kmp_ncores = nPackages = 1;
- __kmp_nThreadsPerCore = nCoresPerPkg = 1;
- if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
-
- KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
-
- if (__kmp_affinity_type == affinity_none) {
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- return 0;
- }
-
- *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
- Address addr(1);
- addr.labels[0] = threadInfo[0].pkgId;
- (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
-
- if (__kmp_affinity_gran_levels < 0) {
- __kmp_affinity_gran_levels = 0;
- }
-
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
- }
-
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- return 1;
+ unsigned nApics = 0;
+ KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
+ // Skip this proc if it is not included in the machine model.
+ if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
+ continue;
+ }
+ KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc);
+
+ __kmp_affinity_dispatch->bind_thread(i);
+ threadInfo[nApics].osId = i;
+
+ // The apic id and max threads per pkg come from cpuid(1).
+ __kmp_x86_cpuid(1, 0, &buf);
+ if (((buf.edx >> 9) & 1) == 0) {
+ __kmp_set_system_affinity(oldMask, TRUE);
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ *msg_id = kmp_i18n_str_ApicNotPresent;
+ return -1;
+ }
+ threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
+ threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
+ if (threadInfo[nApics].maxThreadsPerPkg == 0) {
+ threadInfo[nApics].maxThreadsPerPkg = 1;
+ }
+
+ // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded
+ // value.
+ //
+ // First, we need to check if cpuid(4) is supported on this chip. To see if
+ // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n
+ // or greater.
+ __kmp_x86_cpuid(0, 0, &buf);
+ if (buf.eax >= 4) {
+ __kmp_x86_cpuid(4, 0, &buf);
+ threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
+ } else {
+ threadInfo[nApics].maxCoresPerPkg = 1;
}
- //
- // Sort the threadInfo table by physical Id.
- //
- qsort(threadInfo, nApics, sizeof(*threadInfo),
- __kmp_affinity_cmp_apicThreadInfo_phys_id);
-
- //
- // The table is now sorted by pkgId / coreId / threadId, but we really
- // don't know the radix of any of the fields. pkgId's may be sparsely
- // assigned among the chips on a system. Although coreId's are usually
- // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
- // [0..threadsPerCore-1], we don't want to make any such assumptions.
- //
- // For that matter, we don't know what coresPerPkg and threadsPerCore
- // (or the total # packages) are at this point - we want to determine
- // that now. We only have an upper bound on the first two figures.
- //
- // We also perform a consistency check at this point: the values returned
- // by the cpuid instruction for any thread bound to a given package had
- // better return the same info for maxThreadsPerPkg and maxCoresPerPkg.
- //
- nPackages = 1;
- nCoresPerPkg = 1;
- __kmp_nThreadsPerCore = 1;
- unsigned nCores = 1;
-
- unsigned pkgCt = 1; // to determine radii
- unsigned lastPkgId = threadInfo[0].pkgId;
- unsigned coreCt = 1;
- unsigned lastCoreId = threadInfo[0].coreId;
- unsigned threadCt = 1;
- unsigned lastThreadId = threadInfo[0].threadId;
-
- // intra-pkg consist checks
- unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
- unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
-
- for (i = 1; i < nApics; i++) {
- if (threadInfo[i].pkgId != lastPkgId) {
- nCores++;
- pkgCt++;
- lastPkgId = threadInfo[i].pkgId;
- if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
- coreCt = 1;
- lastCoreId = threadInfo[i].coreId;
- if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
- threadCt = 1;
- lastThreadId = threadInfo[i].threadId;
-
- //
- // This is a different package, so go on to the next iteration
- // without doing any consistency checks. Reset the consistency
- // check vars, though.
- //
- prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
- prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
- continue;
- }
-
- if (threadInfo[i].coreId != lastCoreId) {
- nCores++;
- coreCt++;
- lastCoreId = threadInfo[i].coreId;
- if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
- threadCt = 1;
- lastThreadId = threadInfo[i].threadId;
- }
- else if (threadInfo[i].threadId != lastThreadId) {
- threadCt++;
- lastThreadId = threadInfo[i].threadId;
- }
- else {
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
- return -1;
- }
-
- //
- // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
- // fields agree between all the threads bounds to a given package.
- //
- if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
- || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
- return -1;
- }
- }
- nPackages = pkgCt;
- if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
- if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
-
- //
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return now if affinity is not enabled.
- //
- __kmp_ncores = nCores;
+ // Infer the pkgId / coreId / threadId using only the info obtained locally.
+ int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
+ threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
+
+ int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
+ int widthT = widthCT - widthC;
+ if (widthT < 0) {
+ // I've never seen this one happen, but I suppose it could, if the cpuid
+ // instruction on a chip was really screwed up. Make sure to restore the
+ // affinity mask before the tail call.
+ __kmp_set_system_affinity(oldMask, TRUE);
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+ return -1;
+ }
+
+ int maskC = (1 << widthC) - 1;
+ threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
+
+ int maskT = (1 << widthT) - 1;
+ threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
+
+ nApics++;
+ }
+
+ // We've collected all the info we need.
+ // Restore the old affinity mask for this thread.
+ __kmp_set_system_affinity(oldMask, TRUE);
+
+ // If there's only one thread context to bind to, form an Address object
+ // with depth 1 and return immediately (or, if affinity is off, set
+ // address2os to NULL and return).
+ //
+ // If it is configured to omit the package level when there is only a single
+ // package, the logic at the end of this routine won't work if there is only
+ // a single thread - it would try to form an Address object with depth 0.
+ KMP_ASSERT(nApics > 0);
+ if (nApics == 1) {
+ __kmp_ncores = nPackages = 1;
+ __kmp_nThreadsPerCore = nCoresPerPkg = 1;
if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
-
- KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (__kmp_affinity_uniform_topology()) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+ KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+ }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
- KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
- KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
- __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
- for (i = 0; i < nApics; ++i) {
- __kmp_pu_os_idx[i] = threadInfo[i].osId;
- }
+
if (__kmp_affinity_type == affinity_none) {
- __kmp_free(threadInfo);
- KMP_CPU_FREE(oldMask);
- return 0;
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ return 0;
}
- //
- // Now that we've determined the number of packages, the number of cores
- // per package, and the number of threads per core, we can construct the
- // data structure that is to be returned.
- //
- int pkgLevel = 0;
- int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
- int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
- unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
-
- KMP_ASSERT(depth > 0);
- *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
-
- for (i = 0; i < nApics; ++i) {
- Address addr(depth);
- unsigned os = threadInfo[i].osId;
- int d = 0;
-
- if (pkgLevel >= 0) {
- addr.labels[d++] = threadInfo[i].pkgId;
- }
- if (coreLevel >= 0) {
- addr.labels[d++] = threadInfo[i].coreId;
- }
- if (threadLevel >= 0) {
- addr.labels[d++] = threadInfo[i].threadId;
- }
- (*address2os)[i] = AddrUnsPair(addr, os);
- }
+ *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair));
+ Address addr(1);
+ addr.labels[0] = threadInfo[0].pkgId;
+ (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
if (__kmp_affinity_gran_levels < 0) {
- //
- // Set the granularity level based on what levels are modeled
- // in the machine topology map.
- //
- __kmp_affinity_gran_levels = 0;
- if ((threadLevel >= 0)
- && (__kmp_affinity_gran > affinity_gran_thread)) {
- __kmp_affinity_gran_levels++;
- }
- if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
- __kmp_affinity_gran_levels++;
- }
- if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
- __kmp_affinity_gran_levels++;
- }
+ __kmp_affinity_gran_levels = 0;
}
if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
- coreLevel, threadLevel);
+ __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
}
__kmp_free(threadInfo);
KMP_CPU_FREE(oldMask);
- return depth;
-}
-
-
-//
-// Intel(R) microarchitecture code name Nehalem, Dunnington and later
-// architectures support a newer interface for specifying the x2APIC Ids,
-// based on cpuid leaf 11.
-//
-static int
-__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
- kmp_i18n_id_t *const msg_id)
-{
- kmp_cpuid buf;
-
- *address2os = NULL;
- *msg_id = kmp_i18n_null;
+ return 1;
+ }
- //
- // Check to see if cpuid leaf 11 is supported.
- //
- __kmp_x86_cpuid(0, 0, &buf);
- if (buf.eax < 11) {
- *msg_id = kmp_i18n_str_NoLeaf11Support;
- return -1;
+ // Sort the threadInfo table by physical Id.
+ qsort(threadInfo, nApics, sizeof(*threadInfo),
+ __kmp_affinity_cmp_apicThreadInfo_phys_id);
+
+ // The table is now sorted by pkgId / coreId / threadId, but we really don't
+ // know the radix of any of the fields. pkgId's may be sparsely assigned among
+ // the chips on a system. Although coreId's are usually assigned
+ // [0 .. coresPerPkg-1] and threadId's are usually assigned
+ // [0..threadsPerCore-1], we don't want to make any such assumptions.
+ //
+ // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
+ // total # packages) are at this point - we want to determine that now. We
+ // only have an upper bound on the first two figures.
+ //
+ // We also perform a consistency check at this point: the values returned by
+ // the cpuid instruction for any thread bound to a given package had better
+ // return the same info for maxThreadsPerPkg and maxCoresPerPkg.
+ nPackages = 1;
+ nCoresPerPkg = 1;
+ __kmp_nThreadsPerCore = 1;
+ unsigned nCores = 1;
+
+ unsigned pkgCt = 1; // to determine radii
+ unsigned lastPkgId = threadInfo[0].pkgId;
+ unsigned coreCt = 1;
+ unsigned lastCoreId = threadInfo[0].coreId;
+ unsigned threadCt = 1;
+ unsigned lastThreadId = threadInfo[0].threadId;
+
+ // intra-pkg consist checks
+ unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
+ unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
+
+ for (i = 1; i < nApics; i++) {
+ if (threadInfo[i].pkgId != lastPkgId) {
+ nCores++;
+ pkgCt++;
+ lastPkgId = threadInfo[i].pkgId;
+ if ((int)coreCt > nCoresPerPkg)
+ nCoresPerPkg = coreCt;
+ coreCt = 1;
+ lastCoreId = threadInfo[i].coreId;
+ if ((int)threadCt > __kmp_nThreadsPerCore)
+ __kmp_nThreadsPerCore = threadCt;
+ threadCt = 1;
+ lastThreadId = threadInfo[i].threadId;
+
+ // This is a different package, so go on to the next iteration without
+ // doing any consistency checks. Reset the consistency check vars, though.
+ prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
+ prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
+ continue;
+ }
+
+ if (threadInfo[i].coreId != lastCoreId) {
+ nCores++;
+ coreCt++;
+ lastCoreId = threadInfo[i].coreId;
+ if ((int)threadCt > __kmp_nThreadsPerCore)
+ __kmp_nThreadsPerCore = threadCt;
+ threadCt = 1;
+ lastThreadId = threadInfo[i].threadId;
+ } else if (threadInfo[i].threadId != lastThreadId) {
+ threadCt++;
+ lastThreadId = threadInfo[i].threadId;
+ } else {
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
+ return -1;
+ }
+
+ // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg
+ // fields agree between all the threads bounds to a given package.
+ if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
+ (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
+ return -1;
+ }
+ }
+ nPackages = pkgCt;
+ if ((int)coreCt > nCoresPerPkg)
+ nCoresPerPkg = coreCt;
+ if ((int)threadCt > __kmp_nThreadsPerCore)
+ __kmp_nThreadsPerCore = threadCt;
+
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
+ // Make sure all these vars are set correctly, and return now if affinity is
+ // not enabled.
+ __kmp_ncores = nCores;
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+ KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
}
- __kmp_x86_cpuid(11, 0, &buf);
- if (buf.ebx == 0) {
- *msg_id = kmp_i18n_str_NoLeaf11Support;
- return -1;
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (__kmp_affinity_uniform_topology()) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
}
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
+ }
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
+ __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ for (i = 0; i < nApics; ++i) {
+ __kmp_pu_os_idx[i] = threadInfo[i].osId;
+ }
+ if (__kmp_affinity_type == affinity_none) {
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ return 0;
+ }
- //
- // Find the number of levels in the machine topology. While we're at it,
- // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will
- // try to get more accurate values later by explicitly counting them,
- // but get reasonable defaults now, in case we return early.
- //
- int level;
- int threadLevel = -1;
- int coreLevel = -1;
- int pkgLevel = -1;
- __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
+ // Now that we've determined the number of packages, the number of cores per
+ // package, and the number of threads per core, we can construct the data
+ // structure that is to be returned.
+ int pkgLevel = 0;
+ int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
+ int threadLevel =
+ (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
+ unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
+
+ KMP_ASSERT(depth > 0);
+ *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
+
+ for (i = 0; i < nApics; ++i) {
+ Address addr(depth);
+ unsigned os = threadInfo[i].osId;
+ int d = 0;
- for (level = 0;; level++) {
- if (level > 31) {
- //
- // FIXME: Hack for DPD200163180
- //
- // If level is big then something went wrong -> exiting
- //
- // There could actually be 32 valid levels in the machine topology,
- // but so far, the only machine we have seen which does not exit
- // this loop before iteration 32 has fubar x2APIC settings.
- //
- // For now, just reject this case based upon loop trip count.
- //
- *msg_id = kmp_i18n_str_InvalidCpuidInfo;
- return -1;
- }
- __kmp_x86_cpuid(11, level, &buf);
- if (buf.ebx == 0) {
- if (pkgLevel < 0) {
- //
- // Will infer nPackages from __kmp_xproc
- //
- pkgLevel = level;
- level++;
- }
- break;
- }
- int kind = (buf.ecx >> 8) & 0xff;
- if (kind == 1) {
- //
- // SMT level
- //
- threadLevel = level;
- coreLevel = -1;
- pkgLevel = -1;
- __kmp_nThreadsPerCore = buf.ebx & 0xffff;
- if (__kmp_nThreadsPerCore == 0) {
- *msg_id = kmp_i18n_str_InvalidCpuidInfo;
- return -1;
- }
- }
- else if (kind == 2) {
- //
- // core level
- //
- coreLevel = level;
- pkgLevel = -1;
- nCoresPerPkg = buf.ebx & 0xffff;
- if (nCoresPerPkg == 0) {
- *msg_id = kmp_i18n_str_InvalidCpuidInfo;
- return -1;
- }
- }
- else {
- if (level <= 0) {
- *msg_id = kmp_i18n_str_InvalidCpuidInfo;
- return -1;
- }
- if (pkgLevel >= 0) {
- continue;
- }
- pkgLevel = level;
- nPackages = buf.ebx & 0xffff;
- if (nPackages == 0) {
- *msg_id = kmp_i18n_str_InvalidCpuidInfo;
- return -1;
- }
- }
- }
- int depth = level;
-
- //
- // In the above loop, "level" was counted from the finest level (usually
- // thread) to the coarsest. The caller expects that we will place the
- // labels in (*address2os)[].first.labels[] in the inverse order, so
- // we need to invert the vars saying which level means what.
- //
- if (threadLevel >= 0) {
- threadLevel = depth - threadLevel - 1;
+ if (pkgLevel >= 0) {
+ addr.labels[d++] = threadInfo[i].pkgId;
}
if (coreLevel >= 0) {
- coreLevel = depth - coreLevel - 1;
+ addr.labels[d++] = threadInfo[i].coreId;
}
- KMP_DEBUG_ASSERT(pkgLevel >= 0);
- pkgLevel = depth - pkgLevel - 1;
-
- //
- // The algorithm used starts by setting the affinity to each available
- // thread and retrieving info from the cpuid instruction, so if we are
- // not capable of calling __kmp_get_system_affinity() and
- // _kmp_get_system_affinity(), then we need to do something else - use
- // the defaults that we calculated from issuing cpuid without binding
- // to each proc.
- //
- if (! KMP_AFFINITY_CAPABLE())
- {
- //
- // Hack to try and infer the machine topology using only the data
- // available from cpuid on the current thread, and __kmp_xproc.
- //
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
-
- __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
- nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (__kmp_affinity_uniform_topology()) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
- return 0;
+ if (threadLevel >= 0) {
+ addr.labels[d++] = threadInfo[i].threadId;
}
+ (*address2os)[i] = AddrUnsPair(addr, os);
+ }
- //
- //
- // From here on, we can assume that it is safe to call
- // __kmp_get_system_affinity() and __kmp_set_system_affinity(),
- // even if __kmp_affinity_type = affinity_none.
- //
-
- //
- // Save the affinity mask for the current thread.
- //
- kmp_affin_mask_t *oldMask;
- KMP_CPU_ALLOC(oldMask);
- __kmp_get_system_affinity(oldMask, TRUE);
-
- //
- // Allocate the data structure to be returned.
- //
- AddrUnsPair *retval = (AddrUnsPair *)
- __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
-
- //
- // Run through each of the available contexts, binding the current thread
- // to it, and obtaining the pertinent information using the cpuid instr.
- //
- unsigned int proc;
- int nApics = 0;
- KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
- //
- // Skip this proc if it is not included in the machine model.
- //
- if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
- continue;
- }
- KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
-
- __kmp_affinity_dispatch->bind_thread(proc);
-
- //
- // Extrach the labels for each level in the machine topology map
- // from the Apic ID.
- //
- Address addr(depth);
- int prev_shift = 0;
-
- for (level = 0; level < depth; level++) {
- __kmp_x86_cpuid(11, level, &buf);
- unsigned apicId = buf.edx;
- if (buf.ebx == 0) {
- if (level != depth - 1) {
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
- return -1;
- }
- addr.labels[depth - level - 1] = apicId >> prev_shift;
- level++;
- break;
- }
- int shift = buf.eax & 0x1f;
- int mask = (1 << shift) - 1;
- addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
- prev_shift = shift;
- }
- if (level != depth) {
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
- return -1;
- }
+ if (__kmp_affinity_gran_levels < 0) {
+ // Set the granularity level based on what levels are modeled in the machine
+ // topology map.
+ __kmp_affinity_gran_levels = 0;
+ if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
+ __kmp_affinity_gran_levels++;
+ }
+ if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+ __kmp_affinity_gran_levels++;
+ }
+ if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
+ __kmp_affinity_gran_levels++;
+ }
+ }
+
+ if (__kmp_affinity_verbose) {
+ __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
+ coreLevel, threadLevel);
+ }
+
+ __kmp_free(threadInfo);
+ KMP_CPU_FREE(oldMask);
+ return depth;
+}
- retval[nApics] = AddrUnsPair(addr, proc);
- nApics++;
+// Intel(R) microarchitecture code name Nehalem, Dunnington and later
+// architectures support a newer interface for specifying the x2APIC Ids,
+// based on cpuid leaf 11.
+static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
+ kmp_i18n_id_t *const msg_id) {
+ kmp_cpuid buf;
+ *address2os = NULL;
+ *msg_id = kmp_i18n_null;
+
+ // Check to see if cpuid leaf 11 is supported.
+ __kmp_x86_cpuid(0, 0, &buf);
+ if (buf.eax < 11) {
+ *msg_id = kmp_i18n_str_NoLeaf11Support;
+ return -1;
+ }
+ __kmp_x86_cpuid(11, 0, &buf);
+ if (buf.ebx == 0) {
+ *msg_id = kmp_i18n_str_NoLeaf11Support;
+ return -1;
+ }
+
+ // Find the number of levels in the machine topology. While we're at it, get
+ // the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to
+ // get more accurate values later by explicitly counting them, but get
+ // reasonable defaults now, in case we return early.
+ int level;
+ int threadLevel = -1;
+ int coreLevel = -1;
+ int pkgLevel = -1;
+ __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
+
+ for (level = 0;; level++) {
+ if (level > 31) {
+ // FIXME: Hack for DPD200163180
+ //
+ // If level is big then something went wrong -> exiting
+ //
+ // There could actually be 32 valid levels in the machine topology, but so
+ // far, the only machine we have seen which does not exit this loop before
+ // iteration 32 has fubar x2APIC settings.
+ //
+ // For now, just reject this case based upon loop trip count.
+ *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+ return -1;
}
+ __kmp_x86_cpuid(11, level, &buf);
+ if (buf.ebx == 0) {
+ if (pkgLevel < 0) {
+ // Will infer nPackages from __kmp_xproc
+ pkgLevel = level;
+ level++;
+ }
+ break;
+ }
+ int kind = (buf.ecx >> 8) & 0xff;
+ if (kind == 1) {
+ // SMT level
+ threadLevel = level;
+ coreLevel = -1;
+ pkgLevel = -1;
+ __kmp_nThreadsPerCore = buf.ebx & 0xffff;
+ if (__kmp_nThreadsPerCore == 0) {
+ *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+ return -1;
+ }
+ } else if (kind == 2) {
+ // core level
+ coreLevel = level;
+ pkgLevel = -1;
+ nCoresPerPkg = buf.ebx & 0xffff;
+ if (nCoresPerPkg == 0) {
+ *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+ return -1;
+ }
+ } else {
+ if (level <= 0) {
+ *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+ return -1;
+ }
+ if (pkgLevel >= 0) {
+ continue;
+ }
+ pkgLevel = level;
+ nPackages = buf.ebx & 0xffff;
+ if (nPackages == 0) {
+ *msg_id = kmp_i18n_str_InvalidCpuidInfo;
+ return -1;
+ }
+ }
+ }
+ int depth = level;
- //
- // We've collected all the info we need.
- // Restore the old affinity mask for this thread.
- //
- __kmp_set_system_affinity(oldMask, TRUE);
-
- //
- // If there's only one thread context to bind to, return now.
- //
- KMP_ASSERT(nApics > 0);
- if (nApics == 1) {
- __kmp_ncores = nPackages = 1;
- __kmp_nThreadsPerCore = nCoresPerPkg = 1;
- if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
-
- KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- }
-
- if (__kmp_affinity_type == affinity_none) {
- __kmp_free(retval);
- KMP_CPU_FREE(oldMask);
- return 0;
- }
-
- //
- // Form an Address object which only includes the package level.
- //
- Address addr(1);
- addr.labels[0] = retval[0].first.labels[pkgLevel];
- retval[0].first = addr;
-
- if (__kmp_affinity_gran_levels < 0) {
- __kmp_affinity_gran_levels = 0;
- }
-
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
- }
+ // In the above loop, "level" was counted from the finest level (usually
+ // thread) to the coarsest. The caller expects that we will place the labels
+ // in (*address2os)[].first.labels[] in the inverse order, so we need to
+ // invert the vars saying which level means what.
+ if (threadLevel >= 0) {
+ threadLevel = depth - threadLevel - 1;
+ }
+ if (coreLevel >= 0) {
+ coreLevel = depth - coreLevel - 1;
+ }
+ KMP_DEBUG_ASSERT(pkgLevel >= 0);
+ pkgLevel = depth - pkgLevel - 1;
+
+ // The algorithm used starts by setting the affinity to each available thread
+ // and retrieving info from the cpuid instruction, so if we are not capable of
+ // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we
+ // need to do something else - use the defaults that we calculated from
+ // issuing cpuid without binding to each proc.
+ if (!KMP_AFFINITY_CAPABLE()) {
+ // Hack to try and infer the machine topology using only the data
+ // available from cpuid on the current thread, and __kmp_xproc.
+ KMP_ASSERT(__kmp_affinity_type == affinity_none);
- *address2os = retval;
- KMP_CPU_FREE(oldMask);
- return 1;
+ __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
+ nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY");
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (__kmp_affinity_uniform_topology()) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
+ }
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
+ return 0;
+ }
- //
- // Sort the table by physical Id.
- //
- qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
+ // From here on, we can assume that it is safe to call
+ // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
+ // __kmp_affinity_type = affinity_none.
+
+ // Save the affinity mask for the current thread.
+ kmp_affin_mask_t *oldMask;
+ KMP_CPU_ALLOC(oldMask);
+ __kmp_get_system_affinity(oldMask, TRUE);
+
+ // Allocate the data structure to be returned.
+ AddrUnsPair *retval =
+ (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc);
+
+ // Run through each of the available contexts, binding the current thread
+ // to it, and obtaining the pertinent information using the cpuid instr.
+ unsigned int proc;
+ int nApics = 0;
+ KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
+ // Skip this proc if it is not included in the machine model.
+ if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
+ continue;
+ }
+ KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
+
+ __kmp_affinity_dispatch->bind_thread(proc);
+
+ // Extract labels for each level in the machine topology map from Apic ID.
+ Address addr(depth);
+ int prev_shift = 0;
- //
- // Find the radix at each of the levels.
- //
- unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
- unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
- unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
- unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
for (level = 0; level < depth; level++) {
- totals[level] = 1;
- maxCt[level] = 1;
- counts[level] = 1;
- last[level] = retval[0].first.labels[level];
- }
-
- //
- // From here on, the iteration variable "level" runs from the finest
- // level to the coarsest, i.e. we iterate forward through
- // (*address2os)[].first.labels[] - in the previous loops, we iterated
- // backwards.
- //
- for (proc = 1; (int)proc < nApics; proc++) {
- int level;
- for (level = 0; level < depth; level++) {
- if (retval[proc].first.labels[level] != last[level]) {
- int j;
- for (j = level + 1; j < depth; j++) {
- totals[j]++;
- counts[j] = 1;
- // The line below causes printing incorrect topology information
- // in case the max value for some level (maxCt[level]) is encountered earlier than
- // some less value while going through the array.
- // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2
- // whereas it must be 4.
- // TODO!!! Check if it can be commented safely
- //maxCt[j] = 1;
- last[j] = retval[proc].first.labels[j];
- }
- totals[level]++;
- counts[level]++;
- if (counts[level] > maxCt[level]) {
- maxCt[level] = counts[level];
- }
- last[level] = retval[proc].first.labels[level];
- break;
- }
- else if (level == depth - 1) {
- __kmp_free(last);
- __kmp_free(maxCt);
- __kmp_free(counts);
- __kmp_free(totals);
- __kmp_free(retval);
- KMP_CPU_FREE(oldMask);
- *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
- return -1;
- }
+ __kmp_x86_cpuid(11, level, &buf);
+ unsigned apicId = buf.edx;
+ if (buf.ebx == 0) {
+ if (level != depth - 1) {
+ KMP_CPU_FREE(oldMask);
+ *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
+ return -1;
}
- }
+ addr.labels[depth - level - 1] = apicId >> prev_shift;
+ level++;
+ break;
+ }
+ int shift = buf.eax & 0x1f;
+ int mask = (1 << shift) - 1;
+ addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
+ prev_shift = shift;
+ }
+ if (level != depth) {
+ KMP_CPU_FREE(oldMask);
+ *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
+ return -1;
+ }
+
+ retval[nApics] = AddrUnsPair(addr, proc);
+ nApics++;
+ }
+
+ // We've collected all the info we need.
+ // Restore the old affinity mask for this thread.
+ __kmp_set_system_affinity(oldMask, TRUE);
+
+ // If there's only one thread context to bind to, return now.
+ KMP_ASSERT(nApics > 0);
+ if (nApics == 1) {
+ __kmp_ncores = nPackages = 1;
+ __kmp_nThreadsPerCore = nCoresPerPkg = 1;
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
- //
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return if affinity is not enabled.
- //
- if (threadLevel >= 0) {
- __kmp_nThreadsPerCore = maxCt[threadLevel];
- }
- else {
- __kmp_nThreadsPerCore = 1;
+ KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+ }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
}
- nPackages = totals[pkgLevel];
- if (coreLevel >= 0) {
- __kmp_ncores = totals[coreLevel];
- nCoresPerPkg = maxCt[coreLevel];
- }
- else {
- __kmp_ncores = nPackages;
- nCoresPerPkg = 1;
+ if (__kmp_affinity_type == affinity_none) {
+ __kmp_free(retval);
+ KMP_CPU_FREE(oldMask);
+ return 0;
}
- //
- // Check to see if the machine topology is uniform
- //
- unsigned prod = maxCt[0];
- for (level = 1; level < depth; level++) {
- prod *= maxCt[level];
+ // Form an Address object which only includes the package level.
+ Address addr(1);
+ addr.labels[0] = retval[0].first.labels[pkgLevel];
+ retval[0].first = addr;
+
+ if (__kmp_affinity_gran_levels < 0) {
+ __kmp_affinity_gran_levels = 0;
}
- bool uniform = (prod == totals[level - 1]);
- //
- // Print the machine topology summary.
- //
if (__kmp_affinity_verbose) {
- char mask[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+ __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
+ }
- KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (uniform) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
+ *address2os = retval;
+ KMP_CPU_FREE(oldMask);
+ return 1;
+ }
- kmp_str_buf_t buf;
- __kmp_str_buf_init(&buf);
+ // Sort the table by physical Id.
+ qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels);
- __kmp_str_buf_print(&buf, "%d", totals[0]);
- for (level = 1; level <= pkgLevel; level++) {
- __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
+ // Find the radix at each of the levels.
+ unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+ unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+ unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+ unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned));
+ for (level = 0; level < depth; level++) {
+ totals[level] = 1;
+ maxCt[level] = 1;
+ counts[level] = 1;
+ last[level] = retval[0].first.labels[level];
+ }
+
+ // From here on, the iteration variable "level" runs from the finest level to
+ // the coarsest, i.e. we iterate forward through
+ // (*address2os)[].first.labels[] - in the previous loops, we iterated
+ // backwards.
+ for (proc = 1; (int)proc < nApics; proc++) {
+ int level;
+ for (level = 0; level < depth; level++) {
+ if (retval[proc].first.labels[level] != last[level]) {
+ int j;
+ for (j = level + 1; j < depth; j++) {
+ totals[j]++;
+ counts[j] = 1;
+ // The line below causes printing incorrect topology information in
+ // case the max value for some level (maxCt[level]) is encountered
+ // earlier than some less value while going through the array. For
+ // example, let pkg0 has 4 cores and pkg1 has 2 cores. Then
+ // maxCt[1] == 2
+ // whereas it must be 4.
+ // TODO!!! Check if it can be commented safely
+ // maxCt[j] = 1;
+ last[j] = retval[proc].first.labels[j];
+ }
+ totals[level]++;
+ counts[level]++;
+ if (counts[level] > maxCt[level]) {
+ maxCt[level] = counts[level];
}
- KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
-
- __kmp_str_buf_free(&buf);
- }
- KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
- KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
- __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
- for (proc = 0; (int)proc < nApics; ++proc) {
- __kmp_pu_os_idx[proc] = retval[proc].second;
- }
- if (__kmp_affinity_type == affinity_none) {
+ last[level] = retval[proc].first.labels[level];
+ break;
+ } else if (level == depth - 1) {
__kmp_free(last);
__kmp_free(maxCt);
__kmp_free(counts);
__kmp_free(totals);
__kmp_free(retval);
KMP_CPU_FREE(oldMask);
- return 0;
+ *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
+ return -1;
+ }
}
+ }
- //
- // Find any levels with radiix 1, and remove them from the map
- // (except for the package level).
- //
- int new_depth = 0;
- for (level = 0; level < depth; level++) {
- if ((maxCt[level] == 1) && (level != pkgLevel)) {
- continue;
- }
- new_depth++;
- }
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
+ // Make sure all these vars are set correctly, and return if affinity is not
+ // enabled.
+ if (threadLevel >= 0) {
+ __kmp_nThreadsPerCore = maxCt[threadLevel];
+ } else {
+ __kmp_nThreadsPerCore = 1;
+ }
+ nPackages = totals[pkgLevel];
- //
- // If we are removing any levels, allocate a new vector to return,
- // and copy the relevant information to it.
- //
- if (new_depth != depth) {
- AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
- sizeof(AddrUnsPair) * nApics);
- for (proc = 0; (int)proc < nApics; proc++) {
- Address addr(new_depth);
- new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
- }
- int new_level = 0;
- int newPkgLevel = -1;
- int newCoreLevel = -1;
- int newThreadLevel = -1;
- int i;
- for (level = 0; level < depth; level++) {
- if ((maxCt[level] == 1)
- && (level != pkgLevel)) {
- //
- // Remove this level. Never remove the package level
- //
- continue;
- }
- if (level == pkgLevel) {
- newPkgLevel = level;
- }
- if (level == coreLevel) {
- newCoreLevel = level;
- }
- if (level == threadLevel) {
- newThreadLevel = level;
- }
- for (proc = 0; (int)proc < nApics; proc++) {
- new_retval[proc].first.labels[new_level]
- = retval[proc].first.labels[level];
- }
- new_level++;
- }
+ if (coreLevel >= 0) {
+ __kmp_ncores = totals[coreLevel];
+ nCoresPerPkg = maxCt[coreLevel];
+ } else {
+ __kmp_ncores = nPackages;
+ nCoresPerPkg = 1;
+ }
- __kmp_free(retval);
- retval = new_retval;
- depth = new_depth;
- pkgLevel = newPkgLevel;
- coreLevel = newCoreLevel;
- threadLevel = newThreadLevel;
+ // Check to see if the machine topology is uniform
+ unsigned prod = maxCt[0];
+ for (level = 1; level < depth; level++) {
+ prod *= maxCt[level];
+ }
+ bool uniform = (prod == totals[level - 1]);
+
+ // Print the machine topology summary.
+ if (__kmp_affinity_verbose) {
+ char mask[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
+
+ KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask);
}
-
- if (__kmp_affinity_gran_levels < 0) {
- //
- // Set the granularity level based on what levels are modeled
- // in the machine topology map.
- //
- __kmp_affinity_gran_levels = 0;
- if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
- __kmp_affinity_gran_levels++;
- }
- if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
- __kmp_affinity_gran_levels++;
- }
- if (__kmp_affinity_gran > affinity_gran_package) {
- __kmp_affinity_gran_levels++;
- }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (uniform) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
}
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
- coreLevel, threadLevel);
- }
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ __kmp_str_buf_print(&buf, "%d", totals[0]);
+ for (level = 1; level <= pkgLevel; level++) {
+ __kmp_str_buf_print(&buf, " x %d", maxCt[level]);
+ }
+ KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
+
+ __kmp_str_buf_free(&buf);
+ }
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
+ __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ for (proc = 0; (int)proc < nApics; ++proc) {
+ __kmp_pu_os_idx[proc] = retval[proc].second;
+ }
+ if (__kmp_affinity_type == affinity_none) {
__kmp_free(last);
__kmp_free(maxCt);
__kmp_free(counts);
__kmp_free(totals);
+ __kmp_free(retval);
KMP_CPU_FREE(oldMask);
- *address2os = retval;
- return depth;
-}
-
-
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+ return 0;
+ }
+ // Find any levels with radiix 1, and remove them from the map
+ // (except for the package level).
+ int new_depth = 0;
+ for (level = 0; level < depth; level++) {
+ if ((maxCt[level] == 1) && (level != pkgLevel)) {
+ continue;
+ }
+ new_depth++;
+ }
+
+ // If we are removing any levels, allocate a new vector to return,
+ // and copy the relevant information to it.
+ if (new_depth != depth) {
+ AddrUnsPair *new_retval =
+ (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics);
+ for (proc = 0; (int)proc < nApics; proc++) {
+ Address addr(new_depth);
+ new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
+ }
+ int new_level = 0;
+ int newPkgLevel = -1;
+ int newCoreLevel = -1;
+ int newThreadLevel = -1;
+ int i;
+ for (level = 0; level < depth; level++) {
+ if ((maxCt[level] == 1) && (level != pkgLevel)) {
+ // Remove this level. Never remove the package level
+ continue;
+ }
+ if (level == pkgLevel) {
+ newPkgLevel = level;
+ }
+ if (level == coreLevel) {
+ newCoreLevel = level;
+ }
+ if (level == threadLevel) {
+ newThreadLevel = level;
+ }
+ for (proc = 0; (int)proc < nApics; proc++) {
+ new_retval[proc].first.labels[new_level] =
+ retval[proc].first.labels[level];
+ }
+ new_level++;
+ }
-#define osIdIndex 0
-#define threadIdIndex 1
-#define coreIdIndex 2
-#define pkgIdIndex 3
-#define nodeIdIndex 4
+ __kmp_free(retval);
+ retval = new_retval;
+ depth = new_depth;
+ pkgLevel = newPkgLevel;
+ coreLevel = newCoreLevel;
+ threadLevel = newThreadLevel;
+ }
+
+ if (__kmp_affinity_gran_levels < 0) {
+ // Set the granularity level based on what levels are modeled
+ // in the machine topology map.
+ __kmp_affinity_gran_levels = 0;
+ if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
+ __kmp_affinity_gran_levels++;
+ }
+ if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
+ __kmp_affinity_gran_levels++;
+ }
+ if (__kmp_affinity_gran > affinity_gran_package) {
+ __kmp_affinity_gran_levels++;
+ }
+ }
+
+ if (__kmp_affinity_verbose) {
+ __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
+ threadLevel);
+ }
+
+ __kmp_free(last);
+ __kmp_free(maxCt);
+ __kmp_free(counts);
+ __kmp_free(totals);
+ KMP_CPU_FREE(oldMask);
+ *address2os = retval;
+ return depth;
+}
+
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+
+#define osIdIndex 0
+#define threadIdIndex 1
+#define coreIdIndex 2
+#define pkgIdIndex 3
+#define nodeIdIndex 4
typedef unsigned *ProcCpuInfo;
static unsigned maxIndex = pkgIdIndex;
-
-static int
-__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b)
-{
- const unsigned *aa = (const unsigned *)a;
- const unsigned *bb = (const unsigned *)b;
- if (aa[osIdIndex] < bb[osIdIndex]) return -1;
- if (aa[osIdIndex] > bb[osIdIndex]) return 1;
- return 0;
+static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) {
+ const unsigned *aa = (const unsigned *)a;
+ const unsigned *bb = (const unsigned *)b;
+ if (aa[osIdIndex] < bb[osIdIndex])
+ return -1;
+ if (aa[osIdIndex] > bb[osIdIndex])
+ return 1;
+ return 0;
};
-
-static int
-__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b)
-{
- unsigned i;
- const unsigned *aa = *((const unsigned **)a);
- const unsigned *bb = *((const unsigned **)b);
- for (i = maxIndex; ; i--) {
- if (aa[i] < bb[i]) return -1;
- if (aa[i] > bb[i]) return 1;
- if (i == osIdIndex) break;
- }
- return 0;
+static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a,
+ const void *b) {
+ unsigned i;
+ const unsigned *aa = *((const unsigned **)a);
+ const unsigned *bb = *((const unsigned **)b);
+ for (i = maxIndex;; i--) {
+ if (aa[i] < bb[i])
+ return -1;
+ if (aa[i] > bb[i])
+ return 1;
+ if (i == osIdIndex)
+ break;
+ }
+ return 0;
}
-
-//
// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
// affinity map.
-//
-static int
-__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
- kmp_i18n_id_t *const msg_id, FILE *f)
-{
- *address2os = NULL;
- *msg_id = kmp_i18n_null;
-
- //
- // Scan of the file, and count the number of "processor" (osId) fields,
- // and find the highest value of <n> for a node_<n> field.
- //
- char buf[256];
- unsigned num_records = 0;
- while (! feof(f)) {
- buf[sizeof(buf) - 1] = 1;
- if (! fgets(buf, sizeof(buf), f)) {
- //
- // Read errors presumably because of EOF
- //
- break;
- }
-
- char s1[] = "processor";
- if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
- num_records++;
- continue;
- }
-
- //
- // FIXME - this will match "node_<n> <garbage>"
- //
- unsigned level;
- if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
- if (nodeIdIndex + level >= maxIndex) {
- maxIndex = nodeIdIndex + level;
- }
- continue;
- }
- }
-
- //
- // Check for empty file / no valid processor records, or too many.
- // The number of records can't exceed the number of valid bits in the
- // affinity mask.
- //
- if (num_records == 0) {
- *line = 0;
- *msg_id = kmp_i18n_str_NoProcRecords;
- return -1;
- }
- if (num_records > (unsigned)__kmp_xproc) {
- *line = 0;
- *msg_id = kmp_i18n_str_TooManyProcRecords;
- return -1;
- }
-
- //
- // Set the file pointer back to the begginning, so that we can scan the
- // file again, this time performing a full parse of the data.
- // Allocate a vector of ProcCpuInfo object, where we will place the data.
- // Adding an extra element at the end allows us to remove a lot of extra
- // checks for termination conditions.
- //
- if (fseek(f, 0, SEEK_SET) != 0) {
- *line = 0;
- *msg_id = kmp_i18n_str_CantRewindCpuinfo;
- return -1;
- }
-
- //
- // Allocate the array of records to store the proc info in. The dummy
- // element at the end makes the logic in filling them out easier to code.
- //
- unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1)
- * sizeof(unsigned *));
- unsigned i;
- for (i = 0; i <= num_records; i++) {
- threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1)
- * sizeof(unsigned));
- }
-
-#define CLEANUP_THREAD_INFO \
- for (i = 0; i <= num_records; i++) { \
- __kmp_free(threadInfo[i]); \
- } \
- __kmp_free(threadInfo);
-
- //
- // A value of UINT_MAX means that we didn't find the field
- //
- unsigned __index;
-
-#define INIT_PROC_INFO(p) \
- for (__index = 0; __index <= maxIndex; __index++) { \
- (p)[__index] = UINT_MAX; \
- }
-
- for (i = 0; i <= num_records; i++) {
- INIT_PROC_INFO(threadInfo[i]);
+static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
+ int *line,
+ kmp_i18n_id_t *const msg_id,
+ FILE *f) {
+ *address2os = NULL;
+ *msg_id = kmp_i18n_null;
+
+ // Scan of the file, and count the number of "processor" (osId) fields,
+ // and find the highest value of <n> for a node_<n> field.
+ char buf[256];
+ unsigned num_records = 0;
+ while (!feof(f)) {
+ buf[sizeof(buf) - 1] = 1;
+ if (!fgets(buf, sizeof(buf), f)) {
+ // Read errors presumably because of EOF
+ break;
+ }
+
+ char s1[] = "processor";
+ if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
+ num_records++;
+ continue;
+ }
+
+ // FIXME - this will match "node_<n> <garbage>"
+ unsigned level;
+ if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
+ if (nodeIdIndex + level >= maxIndex) {
+ maxIndex = nodeIdIndex + level;
+ }
+ continue;
}
+ }
- unsigned num_avail = 0;
+ // Check for empty file / no valid processor records, or too many. The number
+ // of records can't exceed the number of valid bits in the affinity mask.
+ if (num_records == 0) {
*line = 0;
- while (! feof(f)) {
- //
- // Create an inner scoping level, so that all the goto targets at the
- // end of the loop appear in an outer scoping level. This avoids
- // warnings about jumping past an initialization to a target in the
- // same block.
- //
- {
- buf[sizeof(buf) - 1] = 1;
- bool long_line = false;
- if (! fgets(buf, sizeof(buf), f)) {
- //
- // Read errors presumably because of EOF
- //
- // If there is valid data in threadInfo[num_avail], then fake
- // a blank line in ensure that the last address gets parsed.
- //
- bool valid = false;
- for (i = 0; i <= maxIndex; i++) {
- if (threadInfo[num_avail][i] != UINT_MAX) {
- valid = true;
- }
- }
- if (! valid) {
- break;
- }
- buf[0] = 0;
- } else if (!buf[sizeof(buf) - 1]) {
- //
- // The line is longer than the buffer. Set a flag and don't
- // emit an error if we were going to ignore the line, anyway.
- //
- long_line = true;
-
-#define CHECK_LINE \
- if (long_line) { \
- CLEANUP_THREAD_INFO; \
- *msg_id = kmp_i18n_str_LongLineCpuinfo; \
- return -1; \
- }
- }
- (*line)++;
+ *msg_id = kmp_i18n_str_NoProcRecords;
+ return -1;
+ }
+ if (num_records > (unsigned)__kmp_xproc) {
+ *line = 0;
+ *msg_id = kmp_i18n_str_TooManyProcRecords;
+ return -1;
+ }
+
+ // Set the file pointer back to the begginning, so that we can scan the file
+ // again, this time performing a full parse of the data. Allocate a vector of
+ // ProcCpuInfo object, where we will place the data. Adding an extra element
+ // at the end allows us to remove a lot of extra checks for termination
+ // conditions.
+ if (fseek(f, 0, SEEK_SET) != 0) {
+ *line = 0;
+ *msg_id = kmp_i18n_str_CantRewindCpuinfo;
+ return -1;
+ }
+
+ // Allocate the array of records to store the proc info in. The dummy
+ // element at the end makes the logic in filling them out easier to code.
+ unsigned **threadInfo =
+ (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *));
+ unsigned i;
+ for (i = 0; i <= num_records; i++) {
+ threadInfo[i] =
+ (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
+ }
+
+#define CLEANUP_THREAD_INFO \
+ for (i = 0; i <= num_records; i++) { \
+ __kmp_free(threadInfo[i]); \
+ } \
+ __kmp_free(threadInfo);
+
+ // A value of UINT_MAX means that we didn't find the field
+ unsigned __index;
+
+#define INIT_PROC_INFO(p) \
+ for (__index = 0; __index <= maxIndex; __index++) { \
+ (p)[__index] = UINT_MAX; \
+ }
+
+ for (i = 0; i <= num_records; i++) {
+ INIT_PROC_INFO(threadInfo[i]);
+ }
+
+ unsigned num_avail = 0;
+ *line = 0;
+ while (!feof(f)) {
+ // Create an inner scoping level, so that all the goto targets at the end of
+ // the loop appear in an outer scoping level. This avoids warnings about
+ // jumping past an initialization to a target in the same block.
+ {
+ buf[sizeof(buf) - 1] = 1;
+ bool long_line = false;
+ if (!fgets(buf, sizeof(buf), f)) {
+ // Read errors presumably because of EOF
+ // If there is valid data in threadInfo[num_avail], then fake
+ // a blank line in ensure that the last address gets parsed.
+ bool valid = false;
+ for (i = 0; i <= maxIndex; i++) {
+ if (threadInfo[num_avail][i] != UINT_MAX) {
+ valid = true;
+ }
+ }
+ if (!valid) {
+ break;
+ }
+ buf[0] = 0;
+ } else if (!buf[sizeof(buf) - 1]) {
+ // The line is longer than the buffer. Set a flag and don't
+ // emit an error if we were going to ignore the line, anyway.
+ long_line = true;
+
+#define CHECK_LINE \
+ if (long_line) { \
+ CLEANUP_THREAD_INFO; \
+ *msg_id = kmp_i18n_str_LongLineCpuinfo; \
+ return -1; \
+ }
+ }
+ (*line)++;
- char s1[] = "processor";
- if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
- CHECK_LINE;
- char *p = strchr(buf + sizeof(s1) - 1, ':');
- unsigned val;
- if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
- if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field;
- threadInfo[num_avail][osIdIndex] = val;
+ char s1[] = "processor";
+ if (strncmp(buf, s1, sizeof(s1) - 1) == 0) {
+ CHECK_LINE;
+ char *p = strchr(buf + sizeof(s1) - 1, ':');
+ unsigned val;
+ if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
+ goto no_val;
+ if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
+ goto dup_field;
+ threadInfo[num_avail][osIdIndex] = val;
#if KMP_OS_LINUX && USE_SYSFS_INFO
- char path[256];
- KMP_SNPRINTF(path, sizeof(path),
- "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
- threadInfo[num_avail][osIdIndex]);
- __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
-
- KMP_SNPRINTF(path, sizeof(path),
- "/sys/devices/system/cpu/cpu%u/topology/core_id",
- threadInfo[num_avail][osIdIndex]);
- __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
- continue;
+ char path[256];
+ KMP_SNPRINTF(
+ path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
+ threadInfo[num_avail][osIdIndex]);
+ __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]);
+
+ KMP_SNPRINTF(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%u/topology/core_id",
+ threadInfo[num_avail][osIdIndex]);
+ __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]);
+ continue;
#else
- }
- char s2[] = "physical id";
- if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
- CHECK_LINE;
- char *p = strchr(buf + sizeof(s2) - 1, ':');
- unsigned val;
- if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
- if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field;
- threadInfo[num_avail][pkgIdIndex] = val;
- continue;
- }
- char s3[] = "core id";
- if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
- CHECK_LINE;
- char *p = strchr(buf + sizeof(s3) - 1, ':');
- unsigned val;
- if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
- if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field;
- threadInfo[num_avail][coreIdIndex] = val;
- continue;
+ }
+ char s2[] = "physical id";
+ if (strncmp(buf, s2, sizeof(s2) - 1) == 0) {
+ CHECK_LINE;
+ char *p = strchr(buf + sizeof(s2) - 1, ':');
+ unsigned val;
+ if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
+ goto no_val;
+ if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
+ goto dup_field;
+ threadInfo[num_avail][pkgIdIndex] = val;
+ continue;
+ }
+ char s3[] = "core id";
+ if (strncmp(buf, s3, sizeof(s3) - 1) == 0) {
+ CHECK_LINE;
+ char *p = strchr(buf + sizeof(s3) - 1, ':');
+ unsigned val;
+ if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
+ goto no_val;
+ if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
+ goto dup_field;
+ threadInfo[num_avail][coreIdIndex] = val;
+ continue;
#endif // KMP_OS_LINUX && USE_SYSFS_INFO
- }
- char s4[] = "thread id";
- if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
- CHECK_LINE;
- char *p = strchr(buf + sizeof(s4) - 1, ':');
- unsigned val;
- if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
- if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field;
- threadInfo[num_avail][threadIdIndex] = val;
- continue;
- }
- unsigned level;
- if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
- CHECK_LINE;
- char *p = strchr(buf + sizeof(s4) - 1, ':');
- unsigned val;
- if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val;
- KMP_ASSERT(nodeIdIndex + level <= maxIndex);
- if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field;
- threadInfo[num_avail][nodeIdIndex + level] = val;
- continue;
- }
-
- //
- // We didn't recognize the leading token on the line.
- // There are lots of leading tokens that we don't recognize -
- // if the line isn't empty, go on to the next line.
- //
- if ((*buf != 0) && (*buf != '\n')) {
- //
- // If the line is longer than the buffer, read characters
- // until we find a newline.
- //
- if (long_line) {
- int ch;
- while (((ch = fgetc(f)) != EOF) && (ch != '\n'));
- }
- continue;
- }
-
- //
- // A newline has signalled the end of the processor record.
- // Check that there aren't too many procs specified.
- //
- if ((int)num_avail == __kmp_xproc) {
- CLEANUP_THREAD_INFO;
- *msg_id = kmp_i18n_str_TooManyEntries;
- return -1;
- }
-
- //
- // Check for missing fields. The osId field must be there, and we
- // currently require that the physical id field is specified, also.
- //
- if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
- CLEANUP_THREAD_INFO;
- *msg_id = kmp_i18n_str_MissingProcField;
- return -1;
- }
- if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
- CLEANUP_THREAD_INFO;
- *msg_id = kmp_i18n_str_MissingPhysicalIDField;
- return -1;
- }
-
- //
- // Skip this proc if it is not included in the machine model.
- //
- if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) {
- INIT_PROC_INFO(threadInfo[num_avail]);
- continue;
- }
+ }
+ char s4[] = "thread id";
+ if (strncmp(buf, s4, sizeof(s4) - 1) == 0) {
+ CHECK_LINE;
+ char *p = strchr(buf + sizeof(s4) - 1, ':');
+ unsigned val;
+ if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
+ goto no_val;
+ if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
+ goto dup_field;
+ threadInfo[num_avail][threadIdIndex] = val;
+ continue;
+ }
+ unsigned level;
+ if (KMP_SSCANF(buf, "node_%d id", &level) == 1) {
+ CHECK_LINE;
+ char *p = strchr(buf + sizeof(s4) - 1, ':');
+ unsigned val;
+ if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1))
+ goto no_val;
+ KMP_ASSERT(nodeIdIndex + level <= maxIndex);
+ if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
+ goto dup_field;
+ threadInfo[num_avail][nodeIdIndex + level] = val;
+ continue;
+ }
- //
- // We have a successful parse of this proc's info.
- // Increment the counter, and prepare for the next proc.
- //
- num_avail++;
- KMP_ASSERT(num_avail <= num_records);
- INIT_PROC_INFO(threadInfo[num_avail]);
+ // We didn't recognize the leading token on the line. There are lots of
+ // leading tokens that we don't recognize - if the line isn't empty, go on
+ // to the next line.
+ if ((*buf != 0) && (*buf != '\n')) {
+ // If the line is longer than the buffer, read characters
+ // until we find a newline.
+ if (long_line) {
+ int ch;
+ while (((ch = fgetc(f)) != EOF) && (ch != '\n'))
+ ;
}
continue;
+ }
- no_val:
+ // A newline has signalled the end of the processor record.
+ // Check that there aren't too many procs specified.
+ if ((int)num_avail == __kmp_xproc) {
CLEANUP_THREAD_INFO;
- *msg_id = kmp_i18n_str_MissingValCpuinfo;
+ *msg_id = kmp_i18n_str_TooManyEntries;
return -1;
+ }
- dup_field:
+ // Check for missing fields. The osId field must be there, and we
+ // currently require that the physical id field is specified, also.
+ if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
CLEANUP_THREAD_INFO;
- *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
+ *msg_id = kmp_i18n_str_MissingProcField;
return -1;
- }
- *line = 0;
-
-# if KMP_MIC && REDUCE_TEAM_SIZE
- unsigned teamSize = 0;
-# endif // KMP_MIC && REDUCE_TEAM_SIZE
-
- // check for num_records == __kmp_xproc ???
-
- //
- // If there's only one thread context to bind to, form an Address object
- // with depth 1 and return immediately (or, if affinity is off, set
- // address2os to NULL and return).
- //
- // If it is configured to omit the package level when there is only a
- // single package, the logic at the end of this routine won't work if
- // there is only a single thread - it would try to form an Address
- // object with depth 0.
- //
- KMP_ASSERT(num_avail > 0);
- KMP_ASSERT(num_avail <= num_records);
- if (num_avail == 1) {
- __kmp_ncores = 1;
- __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
- if (__kmp_affinity_verbose) {
- if (! KMP_AFFINITY_CAPABLE()) {
- KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- }
- else {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- __kmp_affin_fullMask);
- KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- }
- int index;
- kmp_str_buf_t buf;
- __kmp_str_buf_init(&buf);
- __kmp_str_buf_print(&buf, "1");
- for (index = maxIndex - 1; index > pkgIdIndex; index--) {
- __kmp_str_buf_print(&buf, " x 1");
- }
- KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
- __kmp_str_buf_free(&buf);
- }
-
- if (__kmp_affinity_type == affinity_none) {
- CLEANUP_THREAD_INFO;
- return 0;
- }
-
- *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair));
- Address addr(1);
- addr.labels[0] = threadInfo[0][pkgIdIndex];
- (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
-
- if (__kmp_affinity_gran_levels < 0) {
- __kmp_affinity_gran_levels = 0;
- }
-
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
- }
-
+ }
+ if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
CLEANUP_THREAD_INFO;
- return 1;
- }
-
- //
- // Sort the threadInfo table by physical Id.
- //
- qsort(threadInfo, num_avail, sizeof(*threadInfo),
- __kmp_affinity_cmp_ProcCpuInfo_phys_id);
-
- //
- // The table is now sorted by pkgId / coreId / threadId, but we really
- // don't know the radix of any of the fields. pkgId's may be sparsely
- // assigned among the chips on a system. Although coreId's are usually
- // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned
- // [0..threadsPerCore-1], we don't want to make any such assumptions.
- //
- // For that matter, we don't know what coresPerPkg and threadsPerCore
- // (or the total # packages) are at this point - we want to determine
- // that now. We only have an upper bound on the first two figures.
- //
- unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1)
- * sizeof(unsigned));
- unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1)
- * sizeof(unsigned));
- unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1)
- * sizeof(unsigned));
- unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1)
- * sizeof(unsigned));
-
- bool assign_thread_ids = false;
- unsigned threadIdCt;
- unsigned index;
+ *msg_id = kmp_i18n_str_MissingPhysicalIDField;
+ return -1;
+ }
- restart_radix_check:
- threadIdCt = 0;
+ // Skip this proc if it is not included in the machine model.
+ if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
+ __kmp_affin_fullMask)) {
+ INIT_PROC_INFO(threadInfo[num_avail]);
+ continue;
+ }
- //
- // Initialize the counter arrays with data from threadInfo[0].
- //
- if (assign_thread_ids) {
- if (threadInfo[0][threadIdIndex] == UINT_MAX) {
- threadInfo[0][threadIdIndex] = threadIdCt++;
- }
- else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
- threadIdCt = threadInfo[0][threadIdIndex] + 1;
- }
- }
- for (index = 0; index <= maxIndex; index++) {
- counts[index] = 1;
- maxCt[index] = 1;
- totals[index] = 1;
- lastId[index] = threadInfo[0][index];;
+ // We have a successful parse of this proc's info.
+ // Increment the counter, and prepare for the next proc.
+ num_avail++;
+ KMP_ASSERT(num_avail <= num_records);
+ INIT_PROC_INFO(threadInfo[num_avail]);
}
+ continue;
- //
- // Run through the rest of the OS procs.
- //
- for (i = 1; i < num_avail; i++) {
- //
- // Find the most significant index whose id differs
- // from the id for the previous OS proc.
- //
- for (index = maxIndex; index >= threadIdIndex; index--) {
- if (assign_thread_ids && (index == threadIdIndex)) {
- //
- // Auto-assign the thread id field if it wasn't specified.
- //
- if (threadInfo[i][threadIdIndex] == UINT_MAX) {
- threadInfo[i][threadIdIndex] = threadIdCt++;
- }
+ no_val:
+ CLEANUP_THREAD_INFO;
+ *msg_id = kmp_i18n_str_MissingValCpuinfo;
+ return -1;
- //
- // Aparrently the thread id field was specified for some
- // entries and not others. Start the thread id counter
- // off at the next higher thread id.
- //
- else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
- threadIdCt = threadInfo[i][threadIdIndex] + 1;
- }
- }
- if (threadInfo[i][index] != lastId[index]) {
- //
- // Run through all indices which are less significant,
- // and reset the counts to 1.
- //
- // At all levels up to and including index, we need to
- // increment the totals and record the last id.
- //
- unsigned index2;
- for (index2 = threadIdIndex; index2 < index; index2++) {
- totals[index2]++;
- if (counts[index2] > maxCt[index2]) {
- maxCt[index2] = counts[index2];
- }
- counts[index2] = 1;
- lastId[index2] = threadInfo[i][index2];
- }
- counts[index]++;
- totals[index]++;
- lastId[index] = threadInfo[i][index];
-
- if (assign_thread_ids && (index > threadIdIndex)) {
-
-# if KMP_MIC && REDUCE_TEAM_SIZE
- //
- // The default team size is the total #threads in the machine
- // minus 1 thread for every core that has 3 or more threads.
- //
- teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
-# endif // KMP_MIC && REDUCE_TEAM_SIZE
-
- //
- // Restart the thread counter, as we are on a new core.
- //
- threadIdCt = 0;
-
- //
- // Auto-assign the thread id field if it wasn't specified.
- //
- if (threadInfo[i][threadIdIndex] == UINT_MAX) {
- threadInfo[i][threadIdIndex] = threadIdCt++;
- }
-
- //
- // Aparrently the thread id field was specified for some
- // entries and not others. Start the thread id counter
- // off at the next higher thread id.
- //
- else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
- threadIdCt = threadInfo[i][threadIdIndex] + 1;
- }
- }
- break;
- }
+ dup_field:
+ CLEANUP_THREAD_INFO;
+ *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
+ return -1;
+ }
+ *line = 0;
+
+#if KMP_MIC && REDUCE_TEAM_SIZE
+ unsigned teamSize = 0;
+#endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+ // check for num_records == __kmp_xproc ???
+
+ // If there's only one thread context to bind to, form an Address object with
+ // depth 1 and return immediately (or, if affinity is off, set address2os to
+ // NULL and return).
+ //
+ // If it is configured to omit the package level when there is only a single
+ // package, the logic at the end of this routine won't work if there is only a
+ // single thread - it would try to form an Address object with depth 0.
+ KMP_ASSERT(num_avail > 0);
+ KMP_ASSERT(num_avail <= num_records);
+ if (num_avail == 1) {
+ __kmp_ncores = 1;
+ __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
+ if (__kmp_affinity_verbose) {
+ if (!KMP_AFFINITY_CAPABLE()) {
+ KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ __kmp_affin_fullMask);
+ KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
}
- if (index < threadIdIndex) {
- //
- // If thread ids were specified, it is an error if they are not
- // unique. Also, check that we waven't already restarted the
- // loop (to be safe - shouldn't need to).
- //
- if ((threadInfo[i][threadIdIndex] != UINT_MAX)
- || assign_thread_ids) {
- __kmp_free(lastId);
- __kmp_free(totals);
- __kmp_free(maxCt);
- __kmp_free(counts);
- CLEANUP_THREAD_INFO;
- *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
- return -1;
- }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ }
+ int index;
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ __kmp_str_buf_print(&buf, "1");
+ for (index = maxIndex - 1; index > pkgIdIndex; index--) {
+ __kmp_str_buf_print(&buf, " x 1");
+ }
+ KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1);
+ __kmp_str_buf_free(&buf);
+ }
- //
- // If the thread ids were not specified and we see entries
- // entries that are duplicates, start the loop over and
- // assign the thread ids manually.
- //
- assign_thread_ids = true;
- goto restart_radix_check;
- }
+ if (__kmp_affinity_type == affinity_none) {
+ CLEANUP_THREAD_INFO;
+ return 0;
}
-# if KMP_MIC && REDUCE_TEAM_SIZE
- //
- // The default team size is the total #threads in the machine
- // minus 1 thread for every core that has 3 or more threads.
- //
- teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
-# endif // KMP_MIC && REDUCE_TEAM_SIZE
+ *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair));
+ Address addr(1);
+ addr.labels[0] = threadInfo[0][pkgIdIndex];
+ (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
- for (index = threadIdIndex; index <= maxIndex; index++) {
- if (counts[index] > maxCt[index]) {
- maxCt[index] = counts[index];
- }
+ if (__kmp_affinity_gran_levels < 0) {
+ __kmp_affinity_gran_levels = 0;
}
- __kmp_nThreadsPerCore = maxCt[threadIdIndex];
- nCoresPerPkg = maxCt[coreIdIndex];
- nPackages = totals[pkgIdIndex];
-
- //
- // Check to see if the machine topology is uniform
- //
- unsigned prod = totals[maxIndex];
- for (index = threadIdIndex; index < maxIndex; index++) {
- prod *= maxCt[index];
+ if (__kmp_affinity_verbose) {
+ __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
}
- bool uniform = (prod == totals[threadIdIndex]);
- //
- // When affinity is off, this routine will still be called to set
- // __kmp_ncores, as well as __kmp_nThreadsPerCore,
- // nCoresPerPkg, & nPackages. Make sure all these vars are set
- // correctly, and return now if affinity is not enabled.
- //
- __kmp_ncores = totals[coreIdIndex];
+ CLEANUP_THREAD_INFO;
+ return 1;
+ }
- if (__kmp_affinity_verbose) {
- if (! KMP_AFFINITY_CAPABLE()) {
- KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (uniform) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
+ // Sort the threadInfo table by physical Id.
+ qsort(threadInfo, num_avail, sizeof(*threadInfo),
+ __kmp_affinity_cmp_ProcCpuInfo_phys_id);
+
+ // The table is now sorted by pkgId / coreId / threadId, but we really don't
+ // know the radix of any of the fields. pkgId's may be sparsely assigned among
+ // the chips on a system. Although coreId's are usually assigned
+ // [0 .. coresPerPkg-1] and threadId's are usually assigned
+ // [0..threadsPerCore-1], we don't want to make any such assumptions.
+ //
+ // For that matter, we don't know what coresPerPkg and threadsPerCore (or the
+ // total # packages) are at this point - we want to determine that now. We
+ // only have an upper bound on the first two figures.
+ unsigned *counts =
+ (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
+ unsigned *maxCt =
+ (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
+ unsigned *totals =
+ (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
+ unsigned *lastId =
+ (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned));
+
+ bool assign_thread_ids = false;
+ unsigned threadIdCt;
+ unsigned index;
+
+restart_radix_check:
+ threadIdCt = 0;
+
+ // Initialize the counter arrays with data from threadInfo[0].
+ if (assign_thread_ids) {
+ if (threadInfo[0][threadIdIndex] == UINT_MAX) {
+ threadInfo[0][threadIdIndex] = threadIdCt++;
+ } else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
+ threadIdCt = threadInfo[0][threadIdIndex] + 1;
+ }
+ }
+ for (index = 0; index <= maxIndex; index++) {
+ counts[index] = 1;
+ maxCt[index] = 1;
+ totals[index] = 1;
+ lastId[index] = threadInfo[0][index];
+ ;
+ }
+
+ // Run through the rest of the OS procs.
+ for (i = 1; i < num_avail; i++) {
+ // Find the most significant index whose id differs from the id for the
+ // previous OS proc.
+ for (index = maxIndex; index >= threadIdIndex; index--) {
+ if (assign_thread_ids && (index == threadIdIndex)) {
+ // Auto-assign the thread id field if it wasn't specified.
+ if (threadInfo[i][threadIdIndex] == UINT_MAX) {
+ threadInfo[i][threadIdIndex] = threadIdCt++;
+ }
+ // Aparrently the thread id field was specified for some entries and not
+ // others. Start the thread id counter off at the next higher thread id.
+ else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
+ threadIdCt = threadInfo[i][threadIdIndex] + 1;
}
- else {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask);
- KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
- }
- KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
- if (uniform) {
- KMP_INFORM(Uniform, "KMP_AFFINITY");
- } else {
- KMP_INFORM(NonUniform, "KMP_AFFINITY");
- }
+ }
+ if (threadInfo[i][index] != lastId[index]) {
+ // Run through all indices which are less significant, and reset the
+ // counts to 1. At all levels up to and including index, we need to
+ // increment the totals and record the last id.
+ unsigned index2;
+ for (index2 = threadIdIndex; index2 < index; index2++) {
+ totals[index2]++;
+ if (counts[index2] > maxCt[index2]) {
+ maxCt[index2] = counts[index2];
+ }
+ counts[index2] = 1;
+ lastId[index2] = threadInfo[i][index2];
}
- kmp_str_buf_t buf;
- __kmp_str_buf_init(&buf);
+ counts[index]++;
+ totals[index]++;
+ lastId[index] = threadInfo[i][index];
+
+ if (assign_thread_ids && (index > threadIdIndex)) {
+
+#if KMP_MIC && REDUCE_TEAM_SIZE
+ // The default team size is the total #threads in the machine
+ // minus 1 thread for every core that has 3 or more threads.
+ teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
+#endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+ // Restart the thread counter, as we are on a new core.
+ threadIdCt = 0;
+
+ // Auto-assign the thread id field if it wasn't specified.
+ if (threadInfo[i][threadIdIndex] == UINT_MAX) {
+ threadInfo[i][threadIdIndex] = threadIdCt++;
+ }
- __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
- for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
- __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
+ // Aparrently the thread id field was specified for some entries and
+ // not others. Start the thread id counter off at the next higher
+ // thread id.
+ else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
+ threadIdCt = threadInfo[i][threadIdIndex] + 1;
+ }
}
- KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
- maxCt[threadIdIndex], __kmp_ncores);
-
- __kmp_str_buf_free(&buf);
- }
-
-# if KMP_MIC && REDUCE_TEAM_SIZE
- //
- // Set the default team size.
- //
- if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
- __kmp_dflt_team_nth = teamSize;
- KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
- __kmp_dflt_team_nth));
- }
-# endif // KMP_MIC && REDUCE_TEAM_SIZE
-
- KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
- KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
- __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
- for (i = 0; i < num_avail; ++i) { // fill the os indices
- __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
+ break;
+ }
}
-
- if (__kmp_affinity_type == affinity_none) {
+ if (index < threadIdIndex) {
+ // If thread ids were specified, it is an error if they are not unique.
+ // Also, check that we waven't already restarted the loop (to be safe -
+ // shouldn't need to).
+ if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
__kmp_free(lastId);
__kmp_free(totals);
__kmp_free(maxCt);
__kmp_free(counts);
CLEANUP_THREAD_INFO;
- return 0;
- }
-
- //
- // Count the number of levels which have more nodes at that level than
- // at the parent's level (with there being an implicit root node of
- // the top level). This is equivalent to saying that there is at least
- // one node at this level which has a sibling. These levels are in the
- // map, and the package level is always in the map.
- //
- bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
- int level = 0;
- for (index = threadIdIndex; index < maxIndex; index++) {
- KMP_ASSERT(totals[index] >= totals[index + 1]);
- inMap[index] = (totals[index] > totals[index + 1]);
- }
- inMap[maxIndex] = (totals[maxIndex] > 1);
- inMap[pkgIdIndex] = true;
-
- int depth = 0;
- for (index = threadIdIndex; index <= maxIndex; index++) {
- if (inMap[index]) {
- depth++;
- }
- }
- KMP_ASSERT(depth > 0);
-
- //
- // Construct the data structure that is to be returned.
- //
- *address2os = (AddrUnsPair*)
- __kmp_allocate(sizeof(AddrUnsPair) * num_avail);
- int pkgLevel = -1;
- int coreLevel = -1;
- int threadLevel = -1;
-
- for (i = 0; i < num_avail; ++i) {
- Address addr(depth);
- unsigned os = threadInfo[i][osIdIndex];
- int src_index;
- int dst_index = 0;
-
- for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
- if (! inMap[src_index]) {
- continue;
- }
- addr.labels[dst_index] = threadInfo[i][src_index];
- if (src_index == pkgIdIndex) {
- pkgLevel = dst_index;
- }
- else if (src_index == coreIdIndex) {
- coreLevel = dst_index;
- }
- else if (src_index == threadIdIndex) {
- threadLevel = dst_index;
- }
- dst_index++;
- }
- (*address2os)[i] = AddrUnsPair(addr, os);
- }
-
- if (__kmp_affinity_gran_levels < 0) {
- //
- // Set the granularity level based on what levels are modeled
- // in the machine topology map.
- //
- unsigned src_index;
- __kmp_affinity_gran_levels = 0;
- for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
- if (! inMap[src_index]) {
- continue;
- }
- switch (src_index) {
- case threadIdIndex:
- if (__kmp_affinity_gran > affinity_gran_thread) {
- __kmp_affinity_gran_levels++;
- }
-
- break;
- case coreIdIndex:
- if (__kmp_affinity_gran > affinity_gran_core) {
- __kmp_affinity_gran_levels++;
- }
- break;
+ *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
+ return -1;
+ }
- case pkgIdIndex:
- if (__kmp_affinity_gran > affinity_gran_package) {
- __kmp_affinity_gran_levels++;
- }
- break;
- }
- }
+ // If the thread ids were not specified and we see entries entries that
+ // are duplicates, start the loop over and assign the thread ids manually.
+ assign_thread_ids = true;
+ goto restart_radix_check;
+ }
+ }
+
+#if KMP_MIC && REDUCE_TEAM_SIZE
+ // The default team size is the total #threads in the machine
+ // minus 1 thread for every core that has 3 or more threads.
+ teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
+#endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+ for (index = threadIdIndex; index <= maxIndex; index++) {
+ if (counts[index] > maxCt[index]) {
+ maxCt[index] = counts[index];
+ }
+ }
+
+ __kmp_nThreadsPerCore = maxCt[threadIdIndex];
+ nCoresPerPkg = maxCt[coreIdIndex];
+ nPackages = totals[pkgIdIndex];
+
+ // Check to see if the machine topology is uniform
+ unsigned prod = totals[maxIndex];
+ for (index = threadIdIndex; index < maxIndex; index++) {
+ prod *= maxCt[index];
+ }
+ bool uniform = (prod == totals[threadIdIndex]);
+
+ // When affinity is off, this routine will still be called to set
+ // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
+ // Make sure all these vars are set correctly, and return now if affinity is
+ // not enabled.
+ __kmp_ncores = totals[coreIdIndex];
+
+ if (__kmp_affinity_verbose) {
+ if (!KMP_AFFINITY_CAPABLE()) {
+ KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY");
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (uniform) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
+ }
+ } else {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ __kmp_affin_fullMask);
+ KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY");
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+ }
+ KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc);
+ if (uniform) {
+ KMP_INFORM(Uniform, "KMP_AFFINITY");
+ } else {
+ KMP_INFORM(NonUniform, "KMP_AFFINITY");
+ }
}
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
- if (__kmp_affinity_verbose) {
- __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
- coreLevel, threadLevel);
- }
+ __kmp_str_buf_print(&buf, "%d", totals[maxIndex]);
+ for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
+ __kmp_str_buf_print(&buf, " x %d", maxCt[index]);
+ }
+ KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
+ maxCt[threadIdIndex], __kmp_ncores);
+
+ __kmp_str_buf_free(&buf);
+ }
+
+#if KMP_MIC && REDUCE_TEAM_SIZE
+ // Set the default team size.
+ if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
+ __kmp_dflt_team_nth = teamSize;
+ KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting "
+ "__kmp_dflt_team_nth = %d\n",
+ __kmp_dflt_team_nth));
+ }
+#endif // KMP_MIC && REDUCE_TEAM_SIZE
+
+ KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
+ KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
+ __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc);
+ for (i = 0; i < num_avail; ++i) { // fill the os indices
+ __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
+ }
- __kmp_free(inMap);
+ if (__kmp_affinity_type == affinity_none) {
__kmp_free(lastId);
__kmp_free(totals);
__kmp_free(maxCt);
__kmp_free(counts);
CLEANUP_THREAD_INFO;
- return depth;
-}
-
+ return 0;
+ }
-//
-// Create and return a table of affinity masks, indexed by OS thread ID.
-// This routine handles OR'ing together all the affinity masks of threads
-// that are sufficiently close, if granularity > fine.
-//
-static kmp_affin_mask_t *
-__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique,
- AddrUnsPair *address2os, unsigned numAddrs)
-{
- //
- // First form a table of affinity masks in order of OS thread id.
- //
- unsigned depth;
- unsigned maxOsId;
- unsigned i;
+ // Count the number of levels which have more nodes at that level than at the
+ // parent's level (with there being an implicit root node of the top level).
+ // This is equivalent to saying that there is at least one node at this level
+ // which has a sibling. These levels are in the map, and the package level is
+ // always in the map.
+ bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool));
+ int level = 0;
+ for (index = threadIdIndex; index < maxIndex; index++) {
+ KMP_ASSERT(totals[index] >= totals[index + 1]);
+ inMap[index] = (totals[index] > totals[index + 1]);
+ }
+ inMap[maxIndex] = (totals[maxIndex] > 1);
+ inMap[pkgIdIndex] = true;
+
+ int depth = 0;
+ for (index = threadIdIndex; index <= maxIndex; index++) {
+ if (inMap[index]) {
+ depth++;
+ }
+ }
+ KMP_ASSERT(depth > 0);
+
+ // Construct the data structure that is to be returned.
+ *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail);
+ int pkgLevel = -1;
+ int coreLevel = -1;
+ int threadLevel = -1;
+
+ for (i = 0; i < num_avail; ++i) {
+ Address addr(depth);
+ unsigned os = threadInfo[i][osIdIndex];
+ int src_index;
+ int dst_index = 0;
- KMP_ASSERT(numAddrs > 0);
- depth = address2os[0].first.depth;
+ for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
+ if (!inMap[src_index]) {
+ continue;
+ }
+ addr.labels[dst_index] = threadInfo[i][src_index];
+ if (src_index == pkgIdIndex) {
+ pkgLevel = dst_index;
+ } else if (src_index == coreIdIndex) {
+ coreLevel = dst_index;
+ } else if (src_index == threadIdIndex) {
+ threadLevel = dst_index;
+ }
+ dst_index++;
+ }
+ (*address2os)[i] = AddrUnsPair(addr, os);
+ }
- maxOsId = 0;
- for (i = 0; i < numAddrs; i++) {
- unsigned osId = address2os[i].second;
- if (osId > maxOsId) {
- maxOsId = osId;
+ if (__kmp_affinity_gran_levels < 0) {
+ // Set the granularity level based on what levels are modeled
+ // in the machine topology map.
+ unsigned src_index;
+ __kmp_affinity_gran_levels = 0;
+ for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
+ if (!inMap[src_index]) {
+ continue;
+ }
+ switch (src_index) {
+ case threadIdIndex:
+ if (__kmp_affinity_gran > affinity_gran_thread) {
+ __kmp_affinity_gran_levels++;
}
- }
- kmp_affin_mask_t *osId2Mask;
- KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1));
- //
- // Sort the address2os table according to physical order. Doing so
- // will put all threads on the same core/package/node in consecutive
- // locations.
- //
- qsort(address2os, numAddrs, sizeof(*address2os),
- __kmp_affinity_cmp_Address_labels);
+ break;
+ case coreIdIndex:
+ if (__kmp_affinity_gran > affinity_gran_core) {
+ __kmp_affinity_gran_levels++;
+ }
+ break;
- KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
- if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
- KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
- }
- if (__kmp_affinity_gran_levels >= (int)depth) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffThreadsMayMigrate);
+ case pkgIdIndex:
+ if (__kmp_affinity_gran > affinity_gran_package) {
+ __kmp_affinity_gran_levels++;
}
+ break;
+ }
}
+ }
- //
- // Run through the table, forming the masks for all threads on each
- // core. Threads on the same core will have identical "Address"
- // objects, not considering the last level, which must be the thread
- // id. All threads on a core will appear consecutively.
- //
- unsigned unique = 0;
- unsigned j = 0; // index of 1st thread on core
- unsigned leader = 0;
- Address *leaderAddr = &(address2os[0].first);
- kmp_affin_mask_t *sum;
- KMP_CPU_ALLOC_ON_STACK(sum);
- KMP_CPU_ZERO(sum);
- KMP_CPU_SET(address2os[0].second, sum);
- for (i = 1; i < numAddrs; i++) {
- //
- // If this thread is sufficiently close to the leader (within the
- // granularity setting), then set the bit for this os thread in the
- // affinity mask for this group, and go on to the next thread.
- //
- if (leaderAddr->isClose(address2os[i].first,
- __kmp_affinity_gran_levels)) {
- KMP_CPU_SET(address2os[i].second, sum);
- continue;
- }
-
- //
- // For every thread in this group, copy the mask to the thread's
- // entry in the osId2Mask table. Mark the first address as a
- // leader.
- //
- for (; j < i; j++) {
- unsigned osId = address2os[j].second;
- KMP_DEBUG_ASSERT(osId <= maxOsId);
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
- KMP_CPU_COPY(mask, sum);
- address2os[j].first.leader = (j == leader);
- }
- unique++;
-
- //
- // Start a new mask.
- //
- leader = i;
- leaderAddr = &(address2os[i].first);
- KMP_CPU_ZERO(sum);
- KMP_CPU_SET(address2os[i].second, sum);
+ if (__kmp_affinity_verbose) {
+ __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
+ coreLevel, threadLevel);
+ }
+
+ __kmp_free(inMap);
+ __kmp_free(lastId);
+ __kmp_free(totals);
+ __kmp_free(maxCt);
+ __kmp_free(counts);
+ CLEANUP_THREAD_INFO;
+ return depth;
+}
+
+// Create and return a table of affinity masks, indexed by OS thread ID.
+// This routine handles OR'ing together all the affinity masks of threads
+// that are sufficiently close, if granularity > fine.
+static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
+ unsigned *numUnique,
+ AddrUnsPair *address2os,
+ unsigned numAddrs) {
+ // First form a table of affinity masks in order of OS thread id.
+ unsigned depth;
+ unsigned maxOsId;
+ unsigned i;
+
+ KMP_ASSERT(numAddrs > 0);
+ depth = address2os[0].first.depth;
+
+ maxOsId = 0;
+ for (i = 0; i < numAddrs; i++) {
+ unsigned osId = address2os[i].second;
+ if (osId > maxOsId) {
+ maxOsId = osId;
+ }
+ }
+ kmp_affin_mask_t *osId2Mask;
+ KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
+
+ // Sort the address2os table according to physical order. Doing so will put
+ // all threads on the same core/package/node in consecutive locations.
+ qsort(address2os, numAddrs, sizeof(*address2os),
+ __kmp_affinity_cmp_Address_labels);
+
+ KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
+ if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
+ KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
+ }
+ if (__kmp_affinity_gran_levels >= (int)depth) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffThreadsMayMigrate);
+ }
+ }
+
+ // Run through the table, forming the masks for all threads on each core.
+ // Threads on the same core will have identical "Address" objects, not
+ // considering the last level, which must be the thread id. All threads on a
+ // core will appear consecutively.
+ unsigned unique = 0;
+ unsigned j = 0; // index of 1st thread on core
+ unsigned leader = 0;
+ Address *leaderAddr = &(address2os[0].first);
+ kmp_affin_mask_t *sum;
+ KMP_CPU_ALLOC_ON_STACK(sum);
+ KMP_CPU_ZERO(sum);
+ KMP_CPU_SET(address2os[0].second, sum);
+ for (i = 1; i < numAddrs; i++) {
+ // If this thread is sufficiently close to the leader (within the
+ // granularity setting), then set the bit for this os thread in the
+ // affinity mask for this group, and go on to the next thread.
+ if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
+ KMP_CPU_SET(address2os[i].second, sum);
+ continue;
}
- //
- // For every thread in last group, copy the mask to the thread's
- // entry in the osId2Mask table.
- //
+ // For every thread in this group, copy the mask to the thread's entry in
+ // the osId2Mask table. Mark the first address as a leader.
for (; j < i; j++) {
- unsigned osId = address2os[j].second;
- KMP_DEBUG_ASSERT(osId <= maxOsId);
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
- KMP_CPU_COPY(mask, sum);
- address2os[j].first.leader = (j == leader);
+ unsigned osId = address2os[j].second;
+ KMP_DEBUG_ASSERT(osId <= maxOsId);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
+ KMP_CPU_COPY(mask, sum);
+ address2os[j].first.leader = (j == leader);
}
unique++;
- KMP_CPU_FREE_FROM_STACK(sum);
- *maxIndex = maxOsId;
- *numUnique = unique;
- return osId2Mask;
-}
+ // Start a new mask.
+ leader = i;
+ leaderAddr = &(address2os[i].first);
+ KMP_CPU_ZERO(sum);
+ KMP_CPU_SET(address2os[i].second, sum);
+ }
+ // For every thread in last group, copy the mask to the thread's
+ // entry in the osId2Mask table.
+ for (; j < i; j++) {
+ unsigned osId = address2os[j].second;
+ KMP_DEBUG_ASSERT(osId <= maxOsId);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
+ KMP_CPU_COPY(mask, sum);
+ address2os[j].first.leader = (j == leader);
+ }
+ unique++;
+ KMP_CPU_FREE_FROM_STACK(sum);
+
+ *maxIndex = maxOsId;
+ *numUnique = unique;
+ return osId2Mask;
+}
-//
// Stuff for the affinity proclist parsers. It's easier to declare these vars
// as file-static than to try and pass them through the calling sequence of
// the recursive-descent OMP_PLACES parser.
-//
static kmp_affin_mask_t *newMasks;
static int numNewMasks;
static int nextNewMask;
-#define ADD_MASK(_mask) \
- { \
- if (nextNewMask >= numNewMasks) { \
- int i; \
- numNewMasks *= 2; \
- kmp_affin_mask_t* temp; \
- KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
- for(i=0;i<numNewMasks/2;i++) { \
- kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); \
- kmp_affin_mask_t* dest = KMP_CPU_INDEX(temp, i); \
- KMP_CPU_COPY(dest, src); \
- } \
- KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks/2); \
- newMasks = temp; \
- } \
- KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
- nextNewMask++; \
- }
-
-#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
- { \
- if (((_osId) > _maxOsId) || \
- (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
- && (__kmp_affinity_type != affinity_none))) { \
- KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
- } \
- } \
- else { \
- ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
- } \
- }
-
+#define ADD_MASK(_mask) \
+ { \
+ if (nextNewMask >= numNewMasks) { \
+ int i; \
+ numNewMasks *= 2; \
+ kmp_affin_mask_t *temp; \
+ KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \
+ for (i = 0; i < numNewMasks / 2; i++) { \
+ kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \
+ kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \
+ KMP_CPU_COPY(dest, src); \
+ } \
+ KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \
+ newMasks = temp; \
+ } \
+ KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
+ nextNewMask++; \
+ }
+
+#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \
+ { \
+ if (((_osId) > _maxOsId) || \
+ (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
+ if (__kmp_affinity_verbose || \
+ (__kmp_affinity_warnings && \
+ (__kmp_affinity_type != affinity_none))) { \
+ KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
+ } \
+ } else { \
+ ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
+ } \
+ }
-//
// Re-parse the proclist (for the explicit affinity type), and form the list
// of affinity newMasks indexed by gtid.
-//
-static void
-__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
- unsigned int *out_numMasks, const char *proclist,
- kmp_affin_mask_t *osId2Mask, int maxOsId)
-{
- int i;
- const char *scan = proclist;
- const char *next = proclist;
-
- //
- // We use malloc() for the temporary mask vector,
- // so that we can use realloc() to extend it.
- //
- numNewMasks = 2;
- KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
- nextNewMask = 0;
- kmp_affin_mask_t *sumMask;
- KMP_CPU_ALLOC(sumMask);
- int setSize = 0;
-
- for (;;) {
- int start, end, stride;
-
- SKIP_WS(scan);
- next = scan;
- if (*next == '\0') {
- break;
- }
-
- if (*next == '{') {
- int num;
- setSize = 0;
- next++; // skip '{'
- SKIP_WS(next);
- scan = next;
-
- //
- // Read the first integer in the set.
- //
- KMP_ASSERT2((*next >= '0') && (*next <= '9'),
- "bad proclist");
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(scan, *next);
- KMP_ASSERT2(num >= 0, "bad explicit proc list");
-
- //
- // Copy the mask for that osId to the sum (union) mask.
- //
- if ((num > maxOsId) ||
- (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, num);
- }
- KMP_CPU_ZERO(sumMask);
- }
- else {
- KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
- setSize = 1;
- }
-
- for (;;) {
- //
- // Check for end of set.
- //
- SKIP_WS(next);
- if (*next == '}') {
- next++; // skip '}'
- break;
- }
-
- //
- // Skip optional comma.
- //
- if (*next == ',') {
- next++;
- }
- SKIP_WS(next);
-
- //
- // Read the next integer in the set.
- //
- scan = next;
- KMP_ASSERT2((*next >= '0') && (*next <= '9'),
- "bad explicit proc list");
-
- SKIP_DIGITS(next);
- num = __kmp_str_to_int(scan, *next);
- KMP_ASSERT2(num >= 0, "bad explicit proc list");
-
- //
- // Add the mask for that osId to the sum mask.
- //
- if ((num > maxOsId) ||
- (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, num);
- }
- }
- else {
- KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
- setSize++;
- }
- }
- if (setSize > 0) {
- ADD_MASK(sumMask);
- }
-
- SKIP_WS(next);
- if (*next == ',') {
- next++;
- }
- scan = next;
- continue;
+static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
+ unsigned int *out_numMasks,
+ const char *proclist,
+ kmp_affin_mask_t *osId2Mask,
+ int maxOsId) {
+ int i;
+ const char *scan = proclist;
+ const char *next = proclist;
+
+ // We use malloc() for the temporary mask vector, so that we can use
+ // realloc() to extend it.
+ numNewMasks = 2;
+ KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
+ nextNewMask = 0;
+ kmp_affin_mask_t *sumMask;
+ KMP_CPU_ALLOC(sumMask);
+ int setSize = 0;
+
+ for (;;) {
+ int start, end, stride;
+
+ SKIP_WS(scan);
+ next = scan;
+ if (*next == '\0') {
+ break;
+ }
+
+ if (*next == '{') {
+ int num;
+ setSize = 0;
+ next++; // skip '{'
+ SKIP_WS(next);
+ scan = next;
+
+ // Read the first integer in the set.
+ KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist");
+ SKIP_DIGITS(next);
+ num = __kmp_str_to_int(scan, *next);
+ KMP_ASSERT2(num >= 0, "bad explicit proc list");
+
+ // Copy the mask for that osId to the sum (union) mask.
+ if ((num > maxOsId) ||
+ (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, num);
}
+ KMP_CPU_ZERO(sumMask);
+ } else {
+ KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
+ setSize = 1;
+ }
- //
- // Read the first integer.
- //
- KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
- SKIP_DIGITS(next);
- start = __kmp_str_to_int(scan, *next);
- KMP_ASSERT2(start >= 0, "bad explicit proc list");
+ for (;;) {
+ // Check for end of set.
SKIP_WS(next);
-
- //
- // If this isn't a range, then add a mask to the list and go on.
- //
- if (*next != '-') {
- ADD_MASK_OSID(start, osId2Mask, maxOsId);
-
- //
- // Skip optional comma.
- //
- if (*next == ',') {
- next++;
- }
- scan = next;
- continue;
+ if (*next == '}') {
+ next++; // skip '}'
+ break;
}
- //
- // This is a range. Skip over the '-' and read in the 2nd int.
- //
- next++; // skip '-'
+ // Skip optional comma.
+ if (*next == ',') {
+ next++;
+ }
SKIP_WS(next);
+
+ // Read the next integer in the set.
scan = next;
KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
+
SKIP_DIGITS(next);
- end = __kmp_str_to_int(scan, *next);
- KMP_ASSERT2(end >= 0, "bad explicit proc list");
+ num = __kmp_str_to_int(scan, *next);
+ KMP_ASSERT2(num >= 0, "bad explicit proc list");
- //
- // Check for a stride parameter
- //
- stride = 1;
- SKIP_WS(next);
- if (*next == ':') {
- //
- // A stride is specified. Skip over the ':" and read the 3rd int.
- //
- int sign = +1;
- next++; // skip ':'
- SKIP_WS(next);
- scan = next;
- if (*next == '-') {
- sign = -1;
- next++;
- SKIP_WS(next);
- scan = next;
- }
- KMP_ASSERT2((*next >= '0') && (*next <= '9'),
- "bad explicit proc list");
- SKIP_DIGITS(next);
- stride = __kmp_str_to_int(scan, *next);
- KMP_ASSERT2(stride >= 0, "bad explicit proc list");
- stride *= sign;
- }
-
- //
- // Do some range checks.
- //
- KMP_ASSERT2(stride != 0, "bad explicit proc list");
- if (stride > 0) {
- KMP_ASSERT2(start <= end, "bad explicit proc list");
- }
- else {
- KMP_ASSERT2(start >= end, "bad explicit proc list");
- }
- KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
-
- //
- // Add the mask for each OS proc # to the list.
- //
- if (stride > 0) {
- do {
- ADD_MASK_OSID(start, osId2Mask, maxOsId);
- start += stride;
- } while (start <= end);
- }
- else {
- do {
- ADD_MASK_OSID(start, osId2Mask, maxOsId);
- start += stride;
- } while (start >= end);
+ // Add the mask for that osId to the sum mask.
+ if ((num > maxOsId) ||
+ (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, num);
+ }
+ } else {
+ KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
+ setSize++;
}
+ }
+ if (setSize > 0) {
+ ADD_MASK(sumMask);
+ }
- //
- // Skip optional comma.
- //
+ SKIP_WS(next);
+ if (*next == ',') {
+ next++;
+ }
+ scan = next;
+ continue;
+ }
+
+ // Read the first integer.
+ KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
+ SKIP_DIGITS(next);
+ start = __kmp_str_to_int(scan, *next);
+ KMP_ASSERT2(start >= 0, "bad explicit proc list");
+ SKIP_WS(next);
+
+ // If this isn't a range, then add a mask to the list and go on.
+ if (*next != '-') {
+ ADD_MASK_OSID(start, osId2Mask, maxOsId);
+
+ // Skip optional comma.
+ if (*next == ',') {
+ next++;
+ }
+ scan = next;
+ continue;
+ }
+
+ // This is a range. Skip over the '-' and read in the 2nd int.
+ next++; // skip '-'
+ SKIP_WS(next);
+ scan = next;
+ KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
+ SKIP_DIGITS(next);
+ end = __kmp_str_to_int(scan, *next);
+ KMP_ASSERT2(end >= 0, "bad explicit proc list");
+
+ // Check for a stride parameter
+ stride = 1;
+ SKIP_WS(next);
+ if (*next == ':') {
+ // A stride is specified. Skip over the ':" and read the 3rd int.
+ int sign = +1;
+ next++; // skip ':'
+ SKIP_WS(next);
+ scan = next;
+ if (*next == '-') {
+ sign = -1;
+ next++;
SKIP_WS(next);
- if (*next == ',') {
- next++;
- }
scan = next;
+ }
+ KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list");
+ SKIP_DIGITS(next);
+ stride = __kmp_str_to_int(scan, *next);
+ KMP_ASSERT2(stride >= 0, "bad explicit proc list");
+ stride *= sign;
}
- *out_numMasks = nextNewMask;
- if (nextNewMask == 0) {
- *out_masks = NULL;
- KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
- return;
- }
- KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
- for(i = 0; i < nextNewMask; i++) {
- kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
- kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
- KMP_CPU_COPY(dest, src);
+ // Do some range checks.
+ KMP_ASSERT2(stride != 0, "bad explicit proc list");
+ if (stride > 0) {
+ KMP_ASSERT2(start <= end, "bad explicit proc list");
+ } else {
+ KMP_ASSERT2(start >= end, "bad explicit proc list");
}
+ KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list");
+
+ // Add the mask for each OS proc # to the list.
+ if (stride > 0) {
+ do {
+ ADD_MASK_OSID(start, osId2Mask, maxOsId);
+ start += stride;
+ } while (start <= end);
+ } else {
+ do {
+ ADD_MASK_OSID(start, osId2Mask, maxOsId);
+ start += stride;
+ } while (start >= end);
+ }
+
+ // Skip optional comma.
+ SKIP_WS(next);
+ if (*next == ',') {
+ next++;
+ }
+ scan = next;
+ }
+
+ *out_numMasks = nextNewMask;
+ if (nextNewMask == 0) {
+ *out_masks = NULL;
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
- KMP_CPU_FREE(sumMask);
+ return;
+ }
+ KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
+ for (i = 0; i < nextNewMask; i++) {
+ kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
+ kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
+ KMP_CPU_COPY(dest, src);
+ }
+ KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+ KMP_CPU_FREE(sumMask);
}
-
-# if OMP_40_ENABLED
+#if OMP_40_ENABLED
/*-----------------------------------------------------------------------------
-
Re-parse the OMP_PLACES proc id list, forming the newMasks for the different
places. Again, Here is the grammar:
@@ -3044,756 +2707,574 @@ subplace := num : num : signed
signed := num
signed := + signed
signed := - signed
-
-----------------------------------------------------------------------------*/
-static void
-__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask,
- int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
-{
- const char *next;
-
- for (;;) {
- int start, count, stride, i;
+static void __kmp_process_subplace_list(const char **scan,
+ kmp_affin_mask_t *osId2Mask,
+ int maxOsId, kmp_affin_mask_t *tempMask,
+ int *setSize) {
+ const char *next;
- //
- // Read in the starting proc id
- //
- SKIP_WS(*scan);
- KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
- "bad explicit places list");
- next = *scan;
- SKIP_DIGITS(next);
- start = __kmp_str_to_int(*scan, *next);
- KMP_ASSERT(start >= 0);
- *scan = next;
-
- //
- // valid follow sets are ',' ':' and '}'
- //
- SKIP_WS(*scan);
- if (**scan == '}' || **scan == ',') {
- if ((start > maxOsId) ||
- (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, start);
- }
- }
- else {
- KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
- (*setSize)++;
- }
- if (**scan == '}') {
- break;
- }
- (*scan)++; // skip ','
- continue;
- }
- KMP_ASSERT2(**scan == ':', "bad explicit places list");
- (*scan)++; // skip ':'
+ for (;;) {
+ int start, count, stride, i;
- //
- // Read count parameter
- //
- SKIP_WS(*scan);
- KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
- "bad explicit places list");
- next = *scan;
- SKIP_DIGITS(next);
- count = __kmp_str_to_int(*scan, *next);
- KMP_ASSERT(count >= 0);
- *scan = next;
-
- //
- // valid follow sets are ',' ':' and '}'
- //
- SKIP_WS(*scan);
- if (**scan == '}' || **scan == ',') {
- for (i = 0; i < count; i++) {
- if ((start > maxOsId) ||
- (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, start);
- }
- break; // don't proliferate warnings for large count
- }
- else {
- KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
- start++;
- (*setSize)++;
- }
- }
- if (**scan == '}') {
- break;
- }
- (*scan)++; // skip ','
- continue;
- }
- KMP_ASSERT2(**scan == ':', "bad explicit places list");
- (*scan)++; // skip ':'
+ // Read in the starting proc id
+ SKIP_WS(*scan);
+ KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
+ next = *scan;
+ SKIP_DIGITS(next);
+ start = __kmp_str_to_int(*scan, *next);
+ KMP_ASSERT(start >= 0);
+ *scan = next;
- //
- // Read stride parameter
- //
- int sign = +1;
- for (;;) {
- SKIP_WS(*scan);
- if (**scan == '+') {
- (*scan)++; // skip '+'
- continue;
- }
- if (**scan == '-') {
- sign *= -1;
- (*scan)++; // skip '-'
- continue;
- }
- break;
- }
- SKIP_WS(*scan);
- KMP_ASSERT2((**scan >= '0') && (**scan <= '9'),
- "bad explicit places list");
- next = *scan;
- SKIP_DIGITS(next);
- stride = __kmp_str_to_int(*scan, *next);
- KMP_ASSERT(stride >= 0);
- *scan = next;
- stride *= sign;
-
- //
- // valid follow sets are ',' and '}'
- //
- SKIP_WS(*scan);
- if (**scan == '}' || **scan == ',') {
- for (i = 0; i < count; i++) {
- if ((start > maxOsId) ||
- (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, start);
- }
- break; // don't proliferate warnings for large count
- }
- else {
- KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
- start += stride;
- (*setSize)++;
- }
- }
- if (**scan == '}') {
- break;
- }
- (*scan)++; // skip ','
- continue;
+ // valid follow sets are ',' ':' and '}'
+ SKIP_WS(*scan);
+ if (**scan == '}' || **scan == ',') {
+ if ((start > maxOsId) ||
+ (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, start);
}
-
- KMP_ASSERT2(0, "bad explicit places list");
+ } else {
+ KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
+ (*setSize)++;
+ }
+ if (**scan == '}') {
+ break;
+ }
+ (*scan)++; // skip ','
+ continue;
}
-}
+ KMP_ASSERT2(**scan == ':', "bad explicit places list");
+ (*scan)++; // skip ':'
+ // Read count parameter
+ SKIP_WS(*scan);
+ KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
+ next = *scan;
+ SKIP_DIGITS(next);
+ count = __kmp_str_to_int(*scan, *next);
+ KMP_ASSERT(count >= 0);
+ *scan = next;
-static void
-__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
- int maxOsId, kmp_affin_mask_t *tempMask, int *setSize)
-{
- const char *next;
-
- //
- // valid follow sets are '{' '!' and num
- //
+ // valid follow sets are ',' ':' and '}'
SKIP_WS(*scan);
- if (**scan == '{') {
- (*scan)++; // skip '{'
- __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
- setSize);
- KMP_ASSERT2(**scan == '}', "bad explicit places list");
- (*scan)++; // skip '}'
- }
- else if (**scan == '!') {
- (*scan)++; // skip '!'
- __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
- KMP_CPU_COMPLEMENT(maxOsId, tempMask);
- }
- else if ((**scan >= '0') && (**scan <= '9')) {
- next = *scan;
- SKIP_DIGITS(next);
- int num = __kmp_str_to_int(*scan, *next);
- KMP_ASSERT(num >= 0);
- if ((num > maxOsId) ||
- (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffIgnoreInvalidProcID, num);
- }
- }
- else {
- KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
- (*setSize)++;
+ if (**scan == '}' || **scan == ',') {
+ for (i = 0; i < count; i++) {
+ if ((start > maxOsId) ||
+ (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, start);
+ }
+ break; // don't proliferate warnings for large count
+ } else {
+ KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
+ start++;
+ (*setSize)++;
}
- *scan = next; // skip num
- }
- else {
- KMP_ASSERT2(0, "bad explicit places list");
+ }
+ if (**scan == '}') {
+ break;
+ }
+ (*scan)++; // skip ','
+ continue;
}
-}
-
-
-//static void
-void
-__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
- unsigned int *out_numMasks, const char *placelist,
- kmp_affin_mask_t *osId2Mask, int maxOsId)
-{
- int i,j,count,stride,sign;
- const char *scan = placelist;
- const char *next = placelist;
-
- numNewMasks = 2;
- KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
- nextNewMask = 0;
-
- // tempMask is modified based on the previous or initial
- // place to form the current place
- // previousMask contains the previous place
- kmp_affin_mask_t *tempMask;
- kmp_affin_mask_t *previousMask;
- KMP_CPU_ALLOC(tempMask);
- KMP_CPU_ZERO(tempMask);
- KMP_CPU_ALLOC(previousMask);
- KMP_CPU_ZERO(previousMask);
- int setSize = 0;
+ KMP_ASSERT2(**scan == ':', "bad explicit places list");
+ (*scan)++; // skip ':'
+ // Read stride parameter
+ int sign = +1;
for (;;) {
- __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
+ SKIP_WS(*scan);
+ if (**scan == '+') {
+ (*scan)++; // skip '+'
+ continue;
+ }
+ if (**scan == '-') {
+ sign *= -1;
+ (*scan)++; // skip '-'
+ continue;
+ }
+ break;
+ }
+ SKIP_WS(*scan);
+ KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list");
+ next = *scan;
+ SKIP_DIGITS(next);
+ stride = __kmp_str_to_int(*scan, *next);
+ KMP_ASSERT(stride >= 0);
+ *scan = next;
+ stride *= sign;
- //
- // valid follow sets are ',' ':' and EOL
- //
- SKIP_WS(scan);
- if (*scan == '\0' || *scan == ',') {
- if (setSize > 0) {
- ADD_MASK(tempMask);
- }
- KMP_CPU_ZERO(tempMask);
- setSize = 0;
- if (*scan == '\0') {
- break;
- }
- scan++; // skip ','
- continue;
+ // valid follow sets are ',' and '}'
+ SKIP_WS(*scan);
+ if (**scan == '}' || **scan == ',') {
+ for (i = 0; i < count; i++) {
+ if ((start > maxOsId) ||
+ (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, start);
+ }
+ break; // don't proliferate warnings for large count
+ } else {
+ KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
+ start += stride;
+ (*setSize)++;
}
+ }
+ if (**scan == '}') {
+ break;
+ }
+ (*scan)++; // skip ','
+ continue;
+ }
- KMP_ASSERT2(*scan == ':', "bad explicit places list");
- scan++; // skip ':'
+ KMP_ASSERT2(0, "bad explicit places list");
+ }
+}
- //
- // Read count parameter
- //
- SKIP_WS(scan);
- KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
- "bad explicit places list");
- next = scan;
- SKIP_DIGITS(next);
- count = __kmp_str_to_int(scan, *next);
- KMP_ASSERT(count >= 0);
- scan = next;
+static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
+ int maxOsId, kmp_affin_mask_t *tempMask,
+ int *setSize) {
+ const char *next;
+
+ // valid follow sets are '{' '!' and num
+ SKIP_WS(*scan);
+ if (**scan == '{') {
+ (*scan)++; // skip '{'
+ __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
+ KMP_ASSERT2(**scan == '}', "bad explicit places list");
+ (*scan)++; // skip '}'
+ } else if (**scan == '!') {
+ (*scan)++; // skip '!'
+ __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
+ KMP_CPU_COMPLEMENT(maxOsId, tempMask);
+ } else if ((**scan >= '0') && (**scan <= '9')) {
+ next = *scan;
+ SKIP_DIGITS(next);
+ int num = __kmp_str_to_int(*scan, *next);
+ KMP_ASSERT(num >= 0);
+ if ((num > maxOsId) ||
+ (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, num);
+ }
+ } else {
+ KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
+ (*setSize)++;
+ }
+ *scan = next; // skip num
+ } else {
+ KMP_ASSERT2(0, "bad explicit places list");
+ }
+}
+
+// static void
+void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
+ unsigned int *out_numMasks,
+ const char *placelist,
+ kmp_affin_mask_t *osId2Mask,
+ int maxOsId) {
+ int i, j, count, stride, sign;
+ const char *scan = placelist;
+ const char *next = placelist;
+
+ numNewMasks = 2;
+ KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
+ nextNewMask = 0;
+
+ // tempMask is modified based on the previous or initial
+ // place to form the current place
+ // previousMask contains the previous place
+ kmp_affin_mask_t *tempMask;
+ kmp_affin_mask_t *previousMask;
+ KMP_CPU_ALLOC(tempMask);
+ KMP_CPU_ZERO(tempMask);
+ KMP_CPU_ALLOC(previousMask);
+ KMP_CPU_ZERO(previousMask);
+ int setSize = 0;
+
+ for (;;) {
+ __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
+
+ // valid follow sets are ',' ':' and EOL
+ SKIP_WS(scan);
+ if (*scan == '\0' || *scan == ',') {
+ if (setSize > 0) {
+ ADD_MASK(tempMask);
+ }
+ KMP_CPU_ZERO(tempMask);
+ setSize = 0;
+ if (*scan == '\0') {
+ break;
+ }
+ scan++; // skip ','
+ continue;
+ }
- //
- // valid follow sets are ',' ':' and EOL
- //
- SKIP_WS(scan);
- if (*scan == '\0' || *scan == ',') {
- stride = +1;
- }
- else {
- KMP_ASSERT2(*scan == ':', "bad explicit places list");
- scan++; // skip ':'
-
- //
- // Read stride parameter
- //
- sign = +1;
- for (;;) {
- SKIP_WS(scan);
- if (*scan == '+') {
- scan++; // skip '+'
- continue;
- }
- if (*scan == '-') {
- sign *= -1;
- scan++; // skip '-'
- continue;
- }
- break;
- }
- SKIP_WS(scan);
- KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
- "bad explicit places list");
- next = scan;
- SKIP_DIGITS(next);
- stride = __kmp_str_to_int(scan, *next);
- KMP_DEBUG_ASSERT(stride >= 0);
- scan = next;
- stride *= sign;
- }
+ KMP_ASSERT2(*scan == ':', "bad explicit places list");
+ scan++; // skip ':'
- // Add places determined by initial_place : count : stride
- for (i = 0; i < count; i++) {
- if (setSize == 0) {
- break;
- }
- // Add the current place, then build the next place (tempMask) from that
- KMP_CPU_COPY(previousMask, tempMask);
- ADD_MASK(previousMask);
- KMP_CPU_ZERO(tempMask);
- setSize = 0;
- KMP_CPU_SET_ITERATE(j, previousMask) {
- if (! KMP_CPU_ISSET(j, previousMask)) {
- continue;
- }
- if ((j+stride > maxOsId) || (j+stride < 0) ||
- (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
- (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) {
- if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
- KMP_WARNING(AffIgnoreInvalidProcID, j+stride);
- }
- continue;
- }
- KMP_CPU_SET(j+stride, tempMask);
- setSize++;
- }
- }
- KMP_CPU_ZERO(tempMask);
- setSize = 0;
+ // Read count parameter
+ SKIP_WS(scan);
+ KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
+ next = scan;
+ SKIP_DIGITS(next);
+ count = __kmp_str_to_int(scan, *next);
+ KMP_ASSERT(count >= 0);
+ scan = next;
+
+ // valid follow sets are ',' ':' and EOL
+ SKIP_WS(scan);
+ if (*scan == '\0' || *scan == ',') {
+ stride = +1;
+ } else {
+ KMP_ASSERT2(*scan == ':', "bad explicit places list");
+ scan++; // skip ':'
- //
- // valid follow sets are ',' and EOL
- //
+ // Read stride parameter
+ sign = +1;
+ for (;;) {
SKIP_WS(scan);
- if (*scan == '\0') {
- break;
- }
- if (*scan == ',') {
- scan++; // skip ','
- continue;
+ if (*scan == '+') {
+ scan++; // skip '+'
+ continue;
+ }
+ if (*scan == '-') {
+ sign *= -1;
+ scan++; // skip '-'
+ continue;
}
+ break;
+ }
+ SKIP_WS(scan);
+ KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list");
+ next = scan;
+ SKIP_DIGITS(next);
+ stride = __kmp_str_to_int(scan, *next);
+ KMP_DEBUG_ASSERT(stride >= 0);
+ scan = next;
+ stride *= sign;
+ }
- KMP_ASSERT2(0, "bad explicit places list");
+ // Add places determined by initial_place : count : stride
+ for (i = 0; i < count; i++) {
+ if (setSize == 0) {
+ break;
+ }
+ // Add the current place, then build the next place (tempMask) from that
+ KMP_CPU_COPY(previousMask, tempMask);
+ ADD_MASK(previousMask);
+ KMP_CPU_ZERO(tempMask);
+ setSize = 0;
+ KMP_CPU_SET_ITERATE(j, previousMask) {
+ if (!KMP_CPU_ISSET(j, previousMask)) {
+ continue;
+ }
+ if ((j + stride > maxOsId) || (j + stride < 0) ||
+ (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
+ (!KMP_CPU_ISSET(j + stride,
+ KMP_CPU_INDEX(osId2Mask, j + stride)))) {
+ if ((__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) &&
+ i < count - 1) {
+ KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
+ }
+ continue;
+ }
+ KMP_CPU_SET(j + stride, tempMask);
+ setSize++;
+ }
}
+ KMP_CPU_ZERO(tempMask);
+ setSize = 0;
- *out_numMasks = nextNewMask;
- if (nextNewMask == 0) {
- *out_masks = NULL;
- KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
- return;
+ // valid follow sets are ',' and EOL
+ SKIP_WS(scan);
+ if (*scan == '\0') {
+ break;
}
- KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
- KMP_CPU_FREE(tempMask);
- KMP_CPU_FREE(previousMask);
- for(i = 0; i < nextNewMask; i++) {
- kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i);
- kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i);
- KMP_CPU_COPY(dest, src);
+ if (*scan == ',') {
+ scan++; // skip ','
+ continue;
}
+
+ KMP_ASSERT2(0, "bad explicit places list");
+ }
+
+ *out_numMasks = nextNewMask;
+ if (nextNewMask == 0) {
+ *out_masks = NULL;
KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
+ return;
+ }
+ KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
+ KMP_CPU_FREE(tempMask);
+ KMP_CPU_FREE(previousMask);
+ for (i = 0; i < nextNewMask; i++) {
+ kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
+ kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
+ KMP_CPU_COPY(dest, src);
+ }
+ KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
}
-# endif /* OMP_40_ENABLED */
+#endif /* OMP_40_ENABLED */
#undef ADD_MASK
#undef ADD_MASK_OSID
#if KMP_USE_HWLOC
-static int
-__kmp_hwloc_count_children_by_type(
- hwloc_topology_t t, hwloc_obj_t o, hwloc_obj_type_t type, hwloc_obj_t* f)
-{
- if (!hwloc_compare_types(o->type, type)) {
- if (*f == NULL)
- *f = o; // output first descendant found
- return 1;
- }
- int sum = 0;
- for (unsigned i = 0; i < o->arity; i++)
- sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
- return sum; // will be 0 if no one found (as PU arity is 0)
+static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
+ hwloc_obj_type_t type,
+ hwloc_obj_t* f) {
+ if (!hwloc_compare_types(o->type, type)) {
+ if (*f == NULL)
+ *f = o; // output first descendant found
+ return 1;
+ }
+ int sum = 0;
+ for (unsigned i = 0; i < o->arity; i++)
+ sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
+ return sum; // will be 0 if no one found (as PU arity is 0)
}
-static int
-__kmp_hwloc_count_children_by_depth(
- hwloc_topology_t t, hwloc_obj_t o, unsigned depth, hwloc_obj_t* f)
-{
- if (o->depth == depth) {
- if (*f == NULL)
- *f = o; // output first descendant found
- return 1;
- }
- int sum = 0;
- for (unsigned i = 0; i < o->arity; i++)
- sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
- return sum; // will be 0 if no one found (as PU arity is 0)
-}
-
-static int
-__kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o)
-{ // skip PUs descendants of the object o
- int skipped = 0;
- hwloc_obj_t hT = NULL;
- int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
- for (int i = 0; i < N; ++i) {
- KMP_DEBUG_ASSERT(hT);
- unsigned idx = hT->os_index;
- if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
- KMP_CPU_CLR(idx, __kmp_affin_fullMask);
- KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
- ++skipped;
- }
- hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
- }
- return skipped; // count number of skipped units
-}
-
-static int
-__kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o)
-{ // check if obj has PUs present in fullMask
- hwloc_obj_t hT = NULL;
- int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
- for (int i = 0; i < N; ++i) {
- KMP_DEBUG_ASSERT(hT);
- unsigned idx = hT->os_index;
- if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
- return 1; // found PU
- hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
- }
- return 0; // no PUs found
+static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
+ hwloc_obj_t o, unsigned depth,
+ hwloc_obj_t* f) {
+ if (o->depth == depth) {
+ if (*f == NULL)
+ *f = o; // output first descendant found
+ return 1;
+ }
+ int sum = 0;
+ for (unsigned i = 0; i < o->arity; i++)
+ sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
+ return sum; // will be 0 if no one found (as PU arity is 0)
+}
+
+static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
+ // skip PUs descendants of the object o
+ int skipped = 0;
+ hwloc_obj_t hT = NULL;
+ int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
+ for (int i = 0; i < N; ++i) {
+ KMP_DEBUG_ASSERT(hT);
+ unsigned idx = hT->os_index;
+ if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ ++skipped;
+ }
+ hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
+ }
+ return skipped; // count number of skipped units
+}
+
+static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
+ // check if obj has PUs present in fullMask
+ hwloc_obj_t hT = NULL;
+ int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
+ for (int i = 0; i < N; ++i) {
+ KMP_DEBUG_ASSERT(hT);
+ unsigned idx = hT->os_index;
+ if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
+ return 1; // found PU
+ hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
+ }
+ return 0; // no PUs found
}
#endif // KMP_USE_HWLOC
-static void
-__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
-{
- AddrUnsPair *newAddr;
- if (__kmp_hws_requested == 0)
- goto _exit; // no topology limiting actions requested, exit
+static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) {
+ AddrUnsPair *newAddr;
+ if (__kmp_hws_requested == 0)
+ goto _exit; // no topology limiting actions requested, exit
#if KMP_USE_HWLOC
- if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
- // Number of subobjects calculated dynamically, this works fine for
- // any non-uniform topology.
- // L2 cache objects are determined by depth, other objects - by type.
- hwloc_topology_t tp = __kmp_hwloc_topology;
- int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
- int nCr=0, nTr=0; // number of requested units
- int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
- hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
- int L2depth, idx;
-
- // check support of extensions ----------------------------------
- int numa_support = 0, tile_support = 0;
- if (__kmp_pu_os_idx)
- hT = hwloc_get_pu_obj_by_os_index(
- tp, __kmp_pu_os_idx[__kmp_avail_proc - 1]);
- else
- hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
- if (hT == NULL) { // something's gone wrong
- KMP_WARNING(AffHWSubsetUnsupported);
+ if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
+ // Number of subobjects calculated dynamically, this works fine for
+ // any non-uniform topology.
+ // L2 cache objects are determined by depth, other objects - by type.
+ hwloc_topology_t tp = __kmp_hwloc_topology;
+ int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped
+ int nCr=0, nTr=0; // number of requested units
+ int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters
+ hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to)
+ int L2depth, idx;
+
+ // check support of extensions ----------------------------------
+ int numa_support = 0, tile_support = 0;
+ if (__kmp_pu_os_idx)
+ hT = hwloc_get_pu_obj_by_os_index(tp,
+ __kmp_pu_os_idx[__kmp_avail_proc - 1]);
+ else
+ hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
+ if (hT == NULL) { // something's gone wrong
+ KMP_WARNING(AffHWSubsetUnsupported);
+ goto _exit;
+ }
+ // check NUMA node
+ hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
+ hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
+ if (hN != NULL && hN->depth > hS->depth) {
+ numa_support = 1; // 1 in case socket includes node(s)
+ } else if (__kmp_hws_node.num > 0) {
+ // don't support sockets inside NUMA node (no such HW found for testing)
+ KMP_WARNING(AffHWSubsetUnsupported);
+ goto _exit;
+ }
+ // check L2 cahce, get object by depth because of multiple caches
+ L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
+ hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
+ if (hL != NULL && __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
+ &hC) > 1) {
+ tile_support = 1; // no sense to count L2 if it includes single core
+ } else if (__kmp_hws_tile.num > 0) {
+ if (__kmp_hws_core.num == 0) {
+ __kmp_hws_core = __kmp_hws_tile; // replace L2 with core
+ __kmp_hws_tile.num = 0;
+ } else {
+ // L2 and core are both requested, but represent same object
+ KMP_WARNING(AffHWSubsetInvalid);
goto _exit;
}
- // check NUMA node
- hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
- hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
- if (hN != NULL && hN->depth > hS->depth) {
- numa_support = 1; // 1 in case socket includes node(s)
- } else if (__kmp_hws_node.num > 0) {
- // don't support sockets inside NUMA node (no such HW found for testing)
- KMP_WARNING(AffHWSubsetUnsupported);
+ }
+ // end of check of extensions -----------------------------------
+
+ // fill in unset items, validate settings -----------------------
+ if (__kmp_hws_socket.num == 0)
+ __kmp_hws_socket.num = nPackages; // use all available sockets
+ if (__kmp_hws_socket.offset >= nPackages) {
+ KMP_WARNING(AffHWSubsetManySockets);
+ goto _exit;
+ }
+ if (numa_support) {
+ int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
+ &hN); // num nodes in socket
+ if (__kmp_hws_node.num == 0)
+ __kmp_hws_node.num = NN; // use all available nodes
+ if (__kmp_hws_node.offset >= NN) {
+ KMP_WARNING(AffHWSubsetManyNodes);
goto _exit;
}
- // check L2 cahce, get object by depth because of multiple caches
- L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
- hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
- if (hL != NULL && __kmp_hwloc_count_children_by_type(
- tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
- tile_support = 1; // no sense to count L2 if it includes single core
- } else if (__kmp_hws_tile.num > 0) {
- if (__kmp_hws_core.num == 0) {
- __kmp_hws_core = __kmp_hws_tile; // replace L2 with core
- __kmp_hws_tile.num = 0;
- } else {
- // L2 and core are both requested, but represent same object
- KMP_WARNING(AffHWSubsetInvalid);
+ if (tile_support) {
+ // get num tiles in node
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
+ if (__kmp_hws_tile.num == 0) {
+ __kmp_hws_tile.num = NL + 1;
+ } // use all available tiles, some node may have more tiles, thus +1
+ if (__kmp_hws_tile.offset >= NL) {
+ KMP_WARNING(AffHWSubsetManyTiles);
goto _exit;
}
- }
- // end of check of extensions -----------------------------------
-
- // fill in unset items, validate settings -----------------------
- if (__kmp_hws_socket.num == 0)
- __kmp_hws_socket.num = nPackages; // use all available sockets
- if (__kmp_hws_socket.offset >= nPackages) {
- KMP_WARNING(AffHWSubsetManySockets);
+ int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
+ &hC); // num cores in tile
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
goto _exit;
- }
- if (numa_support) {
- int NN = __kmp_hwloc_count_children_by_type(
- tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in socket
- if (__kmp_hws_node.num == 0)
- __kmp_hws_node.num = NN; // use all available nodes
- if (__kmp_hws_node.offset >= NN) {
- KMP_WARNING(AffHWSubsetManyNodes);
+ }
+ } else { // tile_support
+ int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
+ &hC); // num cores in node
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
goto _exit;
}
- if (tile_support) {
- // get num tiles in node
- int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
- if (__kmp_hws_tile.num == 0) {
- __kmp_hws_tile.num = NL + 1;
- } // use all available tiles, some node may have more tiles, thus +1
- if (__kmp_hws_tile.offset >= NL) {
- KMP_WARNING(AffHWSubsetManyTiles);
- goto _exit;
- }
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
- if (__kmp_hws_core.num == 0)
- __kmp_hws_core.num = NC; // use all available cores
- if (__kmp_hws_core.offset >= NC) {
- KMP_WARNING(AffHWSubsetManyCores);
- goto _exit;
- }
- } else { // tile_support
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in node
- if (__kmp_hws_core.num == 0)
- __kmp_hws_core.num = NC; // use all available cores
- if (__kmp_hws_core.offset >= NC) {
- KMP_WARNING(AffHWSubsetManyCores);
- goto _exit;
- }
- } // tile_support
- } else { // numa_support
- if (tile_support) {
- // get num tiles in socket
- int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
- if (__kmp_hws_tile.num == 0)
- __kmp_hws_tile.num = NL; // use all available tiles
- if (__kmp_hws_tile.offset >= NL) {
- KMP_WARNING(AffHWSubsetManyTiles);
- goto _exit;
- }
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile
- if (__kmp_hws_core.num == 0)
- __kmp_hws_core.num = NC; // use all available cores
- if (__kmp_hws_core.offset >= NC) {
- KMP_WARNING(AffHWSubsetManyCores);
- goto _exit;
- }
- } else { // tile_support
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
- if (__kmp_hws_core.num == 0)
- __kmp_hws_core.num = NC; // use all available cores
- if (__kmp_hws_core.offset >= NC) {
- KMP_WARNING(AffHWSubsetManyCores);
- goto _exit;
- }
- } // tile_support
- }
- if (__kmp_hws_proc.num == 0)
- __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
- if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
- KMP_WARNING(AffHWSubsetManyProcs);
- goto _exit;
+ } // tile_support
+ } else { // numa_support
+ if (tile_support) {
+ // get num tiles in socket
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
+ if (__kmp_hws_tile.num == 0)
+ __kmp_hws_tile.num = NL; // use all available tiles
+ if (__kmp_hws_tile.offset >= NL) {
+ KMP_WARNING(AffHWSubsetManyTiles);
+ goto _exit;
+ }
+ int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
+ &hC); // num cores in tile
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
+ goto _exit;
+ }
+ } else { // tile_support
+ int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
+ &hC); // num cores in socket
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = NC; // use all available cores
+ if (__kmp_hws_core.offset >= NC) {
+ KMP_WARNING(AffHWSubsetManyCores);
+ goto _exit;
+ }
+ } // tile_support
+ }
+ if (__kmp_hws_proc.num == 0)
+ __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs
+ if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
+ KMP_WARNING(AffHWSubsetManyProcs);
+ goto _exit;
+ }
+ // end of validation --------------------------------------------
+
+ if (pAddr) // pAddr is NULL in case of affinity_none
+ newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) *
+ __kmp_avail_proc); // max size
+ // main loop to form HW subset ----------------------------------
+ hS = NULL;
+ int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
+ for (int s = 0; s < NP; ++s) {
+ // Check Socket -----------------------------------------------
+ hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
+ if (!__kmp_hwloc_obj_has_PUs(tp, hS))
+ continue; // skip socket if all PUs are out of fullMask
+ ++nS; // only count objects those have PUs in affinity mask
+ if (nS <= __kmp_hws_socket.offset ||
+ nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
+ continue; // move to next socket
}
- // end of validation --------------------------------------------
-
- if (pAddr) // pAddr is NULL in case of affinity_none
- newAddr = (AddrUnsPair *)__kmp_allocate(
- sizeof(AddrUnsPair) * __kmp_avail_proc); // max size
- // main loop to form HW subset ----------------------------------
- hS = NULL;
- int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
- for (int s = 0; s < NP; ++s) {
- // Check Socket -----------------------------------------------
- hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
- if (!__kmp_hwloc_obj_has_PUs(tp, hS))
- continue; // skip socket if all PUs are out of fullMask
- ++nS; // only count objects those have PUs in affinity mask
- if (nS <= __kmp_hws_socket.offset ||
- nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
- n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket
- continue; // move to next socket
- }
- nCr = 0; // count number of cores per socket
- // socket requested, go down the topology tree
- // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
- if (numa_support) {
- nN = 0;
- hN = NULL;
- int NN = __kmp_hwloc_count_children_by_type(
- tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in current socket
- for (int n = 0; n < NN; ++n) {
- // Check NUMA Node ----------------------------------------
- if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
- hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
- continue; // skip node if all PUs are out of fullMask
- }
- ++nN;
- if (nN <= __kmp_hws_node.offset ||
- nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
- // skip node as not requested
- n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
- hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
- continue; // move to next node
- }
- // node requested, go down the topology tree
- if (tile_support) {
- nL = 0;
- hL = NULL;
- int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
- for (int l = 0; l < NL; ++l) {
- // Check L2 (tile) ------------------------------------
- if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
- hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
- continue; // skip tile if all PUs are out of fullMask
- }
- ++nL;
- if (nL <= __kmp_hws_tile.offset ||
- nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
- // skip tile as not requested
- n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
- hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
- continue; // move to next tile
- }
- // tile requested, go down the topology tree
- nC = 0;
- hC = NULL;
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in current tile
- for (int c = 0; c < NC; ++c) {
- // Check Core ---------------------------------------
- if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
- hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
- continue; // skip core if all PUs are out of fullMask
- }
- ++nC;
- if (nC <= __kmp_hws_core.offset ||
- nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
- // skip node as not requested
- n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
- hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
- continue; // move to next node
- }
- // core requested, go down to PUs
- nT = 0;
- nTr = 0;
- hT = NULL;
- int NT = __kmp_hwloc_count_children_by_type(
- tp, hC, HWLOC_OBJ_PU, &hT); // num procs in current core
- for (int t = 0; t < NT; ++t) {
- // Check PU ---------------------------------------
- idx = hT->os_index;
- if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
- hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
- continue; // skip PU if not in fullMask
- }
- ++nT;
- if (nT <= __kmp_hws_proc.offset ||
- nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
- // skip PU
- KMP_CPU_CLR(idx, __kmp_affin_fullMask);
- ++n_old;
- KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
- hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
- continue; // move to next node
- }
- ++nTr;
- if (pAddr) // collect requested thread's data
- newAddr[n_new] = (*pAddr)[n_old];
- ++n_new;
- ++n_old;
- hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
- } // threads loop
- if (nTr > 0) {
- ++nCr; // num cores per socket
- ++nCo; // total num cores
- if (nTr > nTpC)
- nTpC = nTr; // calc max threads per core
- }
- hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
- } // cores loop
- hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
- } // tiles loop
- } else { // tile_support
- // no tiles, check cores
- nC = 0;
- hC = NULL;
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in current node
- for (int c = 0; c < NC; ++c) {
- // Check Core ---------------------------------------
- if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
- hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
- continue; // skip core if all PUs are out of fullMask
- }
- ++nC;
- if (nC <= __kmp_hws_core.offset ||
- nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
- // skip node as not requested
- n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
- hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
- continue; // move to next node
- }
- // core requested, go down to PUs
- nT = 0;
- nTr = 0;
- hT = NULL;
- int NT = __kmp_hwloc_count_children_by_type(
- tp, hC, HWLOC_OBJ_PU, &hT);
- for (int t = 0; t < NT; ++t) {
- // Check PU ---------------------------------------
- idx = hT->os_index;
- if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
- hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
- continue; // skip PU if not in fullMask
- }
- ++nT;
- if (nT <= __kmp_hws_proc.offset ||
- nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
- // skip PU
- KMP_CPU_CLR(idx, __kmp_affin_fullMask);
- ++n_old;
- KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
- hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
- continue; // move to next node
- }
- ++nTr;
- if (pAddr) // collect requested thread's data
- newAddr[n_new] = (*pAddr)[n_old];
- ++n_new;
- ++n_old;
- hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
- } // threads loop
- if (nTr > 0) {
- ++nCr; // num cores per socket
- ++nCo; // total num cores
- if (nTr > nTpC)
- nTpC = nTr; // calc max threads per core
- }
- hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
- } // cores loop
- } // tiles support
+ nCr = 0; // count number of cores per socket
+ // socket requested, go down the topology tree
+ // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile)
+ if (numa_support) {
+ nN = 0;
+ hN = NULL;
+ // num nodes in current socket
+ int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
+ &hN);
+ for (int n = 0; n < NN; ++n) {
+ // Check NUMA Node ----------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
+ hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
+ continue; // skip node if all PUs are out of fullMask
+ }
+ ++nN;
+ if (nN <= __kmp_hws_node.offset ||
+ nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node
hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
- } // nodes loop
- } else { // numa_support
- // no NUMA support
+ continue; // move to next node
+ }
+ // node requested, go down the topology tree
if (tile_support) {
nL = 0;
hL = NULL;
- int NL = __kmp_hwloc_count_children_by_depth(
- tp, hS, L2depth, &hL); // num tiles in current socket
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
for (int l = 0; l < NL; ++l) {
// Check L2 (tile) ------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
@@ -3811,8 +3292,9 @@ __kmp_apply_thread_places(AddrUnsPair **
// tile requested, go down the topology tree
nC = 0;
hC = NULL;
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hL, HWLOC_OBJ_CORE, &hC); // num cores per tile
+ // num cores in current tile
+ int NC = __kmp_hwloc_count_children_by_type(tp, hL,
+ HWLOC_OBJ_CORE, &hC);
for (int c = 0; c < NC; ++c) {
// Check Core ---------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
@@ -3831,8 +3313,9 @@ __kmp_apply_thread_places(AddrUnsPair **
nT = 0;
nTr = 0;
hT = NULL;
- int NT = __kmp_hwloc_count_children_by_type(
- tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
+ // num procs in current core
+ int NT = __kmp_hwloc_count_children_by_type(tp, hC,
+ HWLOC_OBJ_PU, &hT);
for (int t = 0; t < NT; ++t) {
// Check PU ---------------------------------------
idx = hT->os_index;
@@ -3871,10 +3354,11 @@ __kmp_apply_thread_places(AddrUnsPair **
// no tiles, check cores
nC = 0;
hC = NULL;
- int NC = __kmp_hwloc_count_children_by_type(
- tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket
+ // num cores in current node
+ int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
+ &hC);
for (int c = 0; c < NC; ++c) {
- // Check Core -------------------------------------------
+ // Check Core ---------------------------------------
if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
continue; // skip core if all PUs are out of fullMask
@@ -3891,8 +3375,8 @@ __kmp_apply_thread_places(AddrUnsPair **
nT = 0;
nTr = 0;
hT = NULL;
- int NT = __kmp_hwloc_count_children_by_type(
- tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core
+ int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
+ &hT);
for (int t = 0; t < NT; ++t) {
// Check PU ---------------------------------------
idx = hT->os_index;
@@ -3926,85 +3410,232 @@ __kmp_apply_thread_places(AddrUnsPair **
hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
} // cores loop
} // tiles support
- } // numa_support
- if (nCr > 0) { // found cores?
- ++nPkg; // num sockets
- if (nCr > nCpP)
- nCpP = nCr; // calc max cores per socket
- }
- } // sockets loop
-
- // check the subset is valid
- KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
- KMP_DEBUG_ASSERT(nPkg > 0);
- KMP_DEBUG_ASSERT(nCpP > 0);
- KMP_DEBUG_ASSERT(nTpC > 0);
- KMP_DEBUG_ASSERT(nCo > 0);
- KMP_DEBUG_ASSERT(nPkg <= nPackages);
- KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
- KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
- KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
-
- nPackages = nPkg; // correct num sockets
- nCoresPerPkg = nCpP; // correct num cores per socket
- __kmp_nThreadsPerCore = nTpC; // correct num threads per core
- __kmp_avail_proc = n_new; // correct num procs
- __kmp_ncores = nCo; // correct num cores
- // hwloc topology method end
- } else
-#endif // KMP_USE_HWLOC
- {
- int n_old = 0, n_new = 0, proc_num = 0;
- if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
- KMP_WARNING(AffHWSubsetNoHWLOC);
- goto _exit;
+ hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
+ } // nodes loop
+ } else { // numa_support
+ // no NUMA support
+ if (tile_support) {
+ nL = 0;
+ hL = NULL;
+ // num tiles in current socket
+ int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
+ for (int l = 0; l < NL; ++l) {
+ // Check L2 (tile) ------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ continue; // skip tile if all PUs are out of fullMask
+ }
+ ++nL;
+ if (nL <= __kmp_hws_tile.offset ||
+ nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
+ // skip tile as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ continue; // move to next tile
+ }
+ // tile requested, go down the topology tree
+ nC = 0;
+ hC = NULL;
+ // num cores per tile
+ int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
+ &hC);
+ for (int c = 0; c < NC; ++c) {
+ // Check Core ---------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // skip core if all PUs are out of fullMask
+ }
+ ++nC;
+ if (nC <= __kmp_hws_core.offset ||
+ nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // move to next node
+ }
+ // core requested, go down to PUs
+ nT = 0;
+ nTr = 0;
+ hT = NULL;
+ // num procs per core
+ int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
+ &hT);
+ for (int t = 0; t < NT; ++t) {
+ // Check PU ---------------------------------------
+ idx = hT->os_index;
+ if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // skip PU if not in fullMask
+ }
+ ++nT;
+ if (nT <= __kmp_hws_proc.offset ||
+ nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
+ // skip PU
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ ++n_old;
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // move to next node
+ }
+ ++nTr;
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ ++n_new;
+ ++n_old;
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ } // threads loop
+ if (nTr > 0) {
+ ++nCr; // num cores per socket
+ ++nCo; // total num cores
+ if (nTr > nTpC)
+ nTpC = nTr; // calc max threads per core
+ }
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ } // cores loop
+ hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
+ } // tiles loop
+ } else { // tile_support
+ // no tiles, check cores
+ nC = 0;
+ hC = NULL;
+ // num cores in socket
+ int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
+ &hC);
+ for (int c = 0; c < NC; ++c) {
+ // Check Core -------------------------------------------
+ if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // skip core if all PUs are out of fullMask
+ }
+ ++nC;
+ if (nC <= __kmp_hws_core.offset ||
+ nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
+ // skip node as not requested
+ n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ continue; // move to next node
+ }
+ // core requested, go down to PUs
+ nT = 0;
+ nTr = 0;
+ hT = NULL;
+ // num procs per core
+ int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU,
+ &hT);
+ for (int t = 0; t < NT; ++t) {
+ // Check PU ---------------------------------------
+ idx = hT->os_index;
+ if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // skip PU if not in fullMask
+ }
+ ++nT;
+ if (nT <= __kmp_hws_proc.offset ||
+ nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
+ // skip PU
+ KMP_CPU_CLR(idx, __kmp_affin_fullMask);
+ ++n_old;
+ KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx));
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ continue; // move to next node
+ }
+ ++nTr;
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ ++n_new;
+ ++n_old;
+ hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
+ } // threads loop
+ if (nTr > 0) {
+ ++nCr; // num cores per socket
+ ++nCo; // total num cores
+ if (nTr > nTpC)
+ nTpC = nTr; // calc max threads per core
+ }
+ hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
+ } // cores loop
+ } // tiles support
+ } // numa_support
+ if (nCr > 0) { // found cores?
+ ++nPkg; // num sockets
+ if (nCr > nCpP)
+ nCpP = nCr; // calc max cores per socket
}
- if (__kmp_hws_socket.num == 0)
- __kmp_hws_socket.num = nPackages; // use all available sockets
- if (__kmp_hws_core.num == 0)
- __kmp_hws_core.num = nCoresPerPkg; // use all available cores
- if (__kmp_hws_proc.num == 0 ||
+ } // sockets loop
+
+ // check the subset is valid
+ KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
+ KMP_DEBUG_ASSERT(nPkg > 0);
+ KMP_DEBUG_ASSERT(nCpP > 0);
+ KMP_DEBUG_ASSERT(nTpC > 0);
+ KMP_DEBUG_ASSERT(nCo > 0);
+ KMP_DEBUG_ASSERT(nPkg <= nPackages);
+ KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
+ KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
+ KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
+
+ nPackages = nPkg; // correct num sockets
+ nCoresPerPkg = nCpP; // correct num cores per socket
+ __kmp_nThreadsPerCore = nTpC; // correct num threads per core
+ __kmp_avail_proc = n_new; // correct num procs
+ __kmp_ncores = nCo; // correct num cores
+ // hwloc topology method end
+ } else
+#endif // KMP_USE_HWLOC
+ {
+ int n_old = 0, n_new = 0, proc_num = 0;
+ if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
+ KMP_WARNING(AffHWSubsetNoHWLOC);
+ goto _exit;
+ }
+ if (__kmp_hws_socket.num == 0)
+ __kmp_hws_socket.num = nPackages; // use all available sockets
+ if (__kmp_hws_core.num == 0)
+ __kmp_hws_core.num = nCoresPerPkg; // use all available cores
+ if (__kmp_hws_proc.num == 0 ||
__kmp_hws_proc.num > __kmp_nThreadsPerCore)
- __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
- if ( !__kmp_affinity_uniform_topology() ) {
- KMP_WARNING( AffHWSubsetNonUniform );
- goto _exit; // don't support non-uniform topology
- }
- if ( depth > 3 ) {
- KMP_WARNING( AffHWSubsetNonThreeLevel );
- goto _exit; // don't support not-3-level topology
- }
- if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
- KMP_WARNING(AffHWSubsetManySockets);
- goto _exit;
- }
- if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
- KMP_WARNING( AffHWSubsetManyCores );
- goto _exit;
- }
- // Form the requested subset
- if (pAddr) // pAddr is NULL in case of affinity_none
- newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) *
- __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num);
- for (int i = 0; i < nPackages; ++i) {
- if (i < __kmp_hws_socket.offset ||
- i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
- // skip not-requested socket
- n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
- if (__kmp_pu_os_idx != NULL) {
- // walk through skipped socket
- for (int j = 0; j < nCoresPerPkg; ++j) {
- for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
- KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
- ++proc_num;
- }
+ __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts
+ if ( !__kmp_affinity_uniform_topology() ) {
+ KMP_WARNING( AffHWSubsetNonUniform );
+ goto _exit; // don't support non-uniform topology
+ }
+ if ( depth > 3 ) {
+ KMP_WARNING( AffHWSubsetNonThreeLevel );
+ goto _exit; // don't support not-3-level topology
+ }
+ if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
+ KMP_WARNING(AffHWSubsetManySockets);
+ goto _exit;
+ }
+ if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) {
+ KMP_WARNING( AffHWSubsetManyCores );
+ goto _exit;
+ }
+ // Form the requested subset
+ if (pAddr) // pAddr is NULL in case of affinity_none
+ newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) *
+ __kmp_hws_socket.num *
+ __kmp_hws_core.num *
+ __kmp_hws_proc.num);
+ for (int i = 0; i < nPackages; ++i) {
+ if (i < __kmp_hws_socket.offset ||
+ i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
+ // skip not-requested socket
+ n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
+ if (__kmp_pu_os_idx != NULL) {
+ // walk through skipped socket
+ for (int j = 0; j < nCoresPerPkg; ++j) {
+ for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
+ ++proc_num;
}
}
- } else {
- // walk through requested socket
- for (int j = 0; j < nCoresPerPkg; ++j) {
- if (j < __kmp_hws_core.offset ||
- j >= __kmp_hws_core.offset + __kmp_hws_core.num)
+ }
+ } else {
+ // walk through requested socket
+ for (int j = 0; j < nCoresPerPkg; ++j) {
+ if (j < __kmp_hws_core.offset ||
+ j >= __kmp_hws_core.offset + __kmp_hws_core.num)
{ // skip not-requested core
n_old += __kmp_nThreadsPerCore;
if (__kmp_pu_os_idx != NULL) {
@@ -4014,1428 +3645,1350 @@ __kmp_apply_thread_places(AddrUnsPair **
}
}
} else {
- // walk through requested core
- for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
- if (k < __kmp_hws_proc.num) {
- if (pAddr) // collect requested thread's data
- newAddr[n_new] = (*pAddr)[n_old];
- n_new++;
- } else {
- if (__kmp_pu_os_idx != NULL)
- KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
- }
- n_old++;
- ++proc_num;
+ // walk through requested core
+ for (int k = 0; k < __kmp_nThreadsPerCore; ++k) {
+ if (k < __kmp_hws_proc.num) {
+ if (pAddr) // collect requested thread's data
+ newAddr[n_new] = (*pAddr)[n_old];
+ n_new++;
+ } else {
+ if (__kmp_pu_os_idx != NULL)
+ KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
}
+ n_old++;
+ ++proc_num;
}
}
}
- }
- KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
- KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
- __kmp_hws_proc.num);
- nPackages = __kmp_hws_socket.num; // correct nPackages
- nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
- __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
- __kmp_avail_proc = n_new; // correct avail_proc
- __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
- } // non-hwloc topology method
- if (pAddr) {
- __kmp_free( *pAddr );
- *pAddr = newAddr; // replace old topology with new one
- }
- if (__kmp_affinity_verbose) {
- char m[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
- if (__kmp_affinity_respect_mask) {
- KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
- } else {
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
- }
- KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
- kmp_str_buf_t buf;
- __kmp_str_buf_init(&buf);
- __kmp_str_buf_print(&buf, "%d", nPackages);
- KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
- __kmp_nThreadsPerCore, __kmp_ncores);
- __kmp_str_buf_free(&buf);
+ }
}
-_exit:
- if (__kmp_pu_os_idx != NULL) {
- __kmp_free(__kmp_pu_os_idx);
- __kmp_pu_os_idx = NULL;
+ KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
+ KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num *
+ __kmp_hws_proc.num);
+ nPackages = __kmp_hws_socket.num; // correct nPackages
+ nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg
+ __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore
+ __kmp_avail_proc = n_new; // correct avail_proc
+ __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores
+ } // non-hwloc topology method
+ if (pAddr) {
+ __kmp_free( *pAddr );
+ *pAddr = newAddr; // replace old topology with new one
+ }
+ if (__kmp_affinity_verbose) {
+ char m[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask);
+ if (__kmp_affinity_respect_mask) {
+ KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m);
+ } else {
+ KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m);
}
-}
-
-//
-// This function figures out the deepest level at which there is at least one cluster/core
-// with more than one processing unit bound to it.
-//
-static int
-__kmp_affinity_find_core_level(const AddrUnsPair *address2os, int nprocs, int bottom_level)
-{
- int core_level = 0;
-
- for( int i = 0; i < nprocs; i++ ) {
- for( int j = bottom_level; j > 0; j-- ) {
- if( address2os[i].first.labels[j] > 0 ) {
- if( core_level < ( j - 1 ) ) {
- core_level = j - 1;
- }
- }
+ KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc);
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ __kmp_str_buf_print(&buf, "%d", nPackages);
+ KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg,
+ __kmp_nThreadsPerCore, __kmp_ncores);
+ __kmp_str_buf_free(&buf);
+ }
+ _exit:
+ if (__kmp_pu_os_idx != NULL) {
+ __kmp_free(__kmp_pu_os_idx);
+ __kmp_pu_os_idx = NULL;
+ }
+}
+
+// This function figures out the deepest level at which there is at least one
+// cluster/core with more than one processing unit bound to it.
+static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os,
+ int nprocs, int bottom_level) {
+ int core_level = 0;
+
+ for (int i = 0; i < nprocs; i++) {
+ for (int j = bottom_level; j > 0; j--) {
+ if (address2os[i].first.labels[j] > 0) {
+ if (core_level < (j - 1)) {
+ core_level = j - 1;
}
+ }
}
- return core_level;
+ }
+ return core_level;
}
-//
// This function counts number of clusters/cores at given level.
-//
-static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level)
-{
- int ncores = 0;
- int i, j;
-
- j = bottom_level;
- for( i = 0; i < nprocs; i++ ) {
- for ( j = bottom_level; j > core_level; j-- ) {
- if( ( i + 1 ) < nprocs ) {
- if( address2os[i + 1].first.labels[j] > 0 ) {
- break;
- }
- }
- }
- if( j == core_level ) {
- ncores++;
+static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os,
+ int nprocs, int bottom_level,
+ int core_level) {
+ int ncores = 0;
+ int i, j;
+
+ j = bottom_level;
+ for (i = 0; i < nprocs; i++) {
+ for (j = bottom_level; j > core_level; j--) {
+ if ((i + 1) < nprocs) {
+ if (address2os[i + 1].first.labels[j] > 0) {
+ break;
}
+ }
}
- if( j > core_level ) {
- //
- // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one core.
- // May occur when called from __kmp_affinity_find_core().
- //
- ncores++;
+ if (j == core_level) {
+ ncores++;
}
- return ncores;
+ }
+ if (j > core_level) {
+ // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one
+ // core. May occur when called from __kmp_affinity_find_core().
+ ncores++;
+ }
+ return ncores;
}
-//
// This function finds to which cluster/core given processing unit is bound.
-//
-static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, int bottom_level, int core_level)
-{
- return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, core_level) - 1;
-}
+static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc,
+ int bottom_level, int core_level) {
+ return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
+ core_level) - 1;
+}
+
+// This function finds maximal number of processing units bound to a
+// cluster/core at given level.
+static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os,
+ int nprocs, int bottom_level,
+ int core_level) {
+ int maxprocpercore = 0;
+
+ if (core_level < bottom_level) {
+ for (int i = 0; i < nprocs; i++) {
+ int percore = address2os[i].first.labels[core_level + 1] + 1;
-//
-// This function finds maximal number of processing units bound to a cluster/core at given level.
-//
-static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level)
-{
- int maxprocpercore = 0;
-
- if( core_level < bottom_level ) {
- for( int i = 0; i < nprocs; i++ ) {
- int percore = address2os[i].first.labels[core_level + 1] + 1;
-
- if( percore > maxprocpercore ) {
- maxprocpercore = percore;
- }
- }
- } else {
- maxprocpercore = 1;
+ if (percore > maxprocpercore) {
+ maxprocpercore = percore;
+ }
}
- return maxprocpercore;
+ } else {
+ maxprocpercore = 1;
+ }
+ return maxprocpercore;
}
static AddrUnsPair *address2os = NULL;
-static int * procarr = NULL;
-static int __kmp_aff_depth = 0;
+static int *procarr = NULL;
+static int __kmp_aff_depth = 0;
-#define KMP_EXIT_AFF_NONE \
- KMP_ASSERT(__kmp_affinity_type == affinity_none); \
- KMP_ASSERT(address2os == NULL); \
- __kmp_apply_thread_places(NULL, 0); \
- return;
-
-static int
-__kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
-{
- const Address *aa = (const Address *)&(((AddrUnsPair *)a)
- ->first);
- const Address *bb = (const Address *)&(((AddrUnsPair *)b)
- ->first);
- unsigned depth = aa->depth;
- unsigned i;
- KMP_DEBUG_ASSERT(depth == bb->depth);
- KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
- KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
- for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
- int j = depth - i - 1;
- if (aa->childNums[j] < bb->childNums[j]) return -1;
- if (aa->childNums[j] > bb->childNums[j]) return 1;
- }
- for (; i < depth; i++) {
- int j = i - __kmp_affinity_compact;
- if (aa->childNums[j] < bb->childNums[j]) return -1;
- if (aa->childNums[j] > bb->childNums[j]) return 1;
- }
- return 0;
+#define KMP_EXIT_AFF_NONE \
+ KMP_ASSERT(__kmp_affinity_type == affinity_none); \
+ KMP_ASSERT(address2os == NULL); \
+ __kmp_apply_thread_places(NULL, 0); \
+ return;
+
+static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) {
+ const Address *aa = (const Address *)&(((AddrUnsPair *)a)->first);
+ const Address *bb = (const Address *)&(((AddrUnsPair *)b)->first);
+ unsigned depth = aa->depth;
+ unsigned i;
+ KMP_DEBUG_ASSERT(depth == bb->depth);
+ KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
+ KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
+ for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
+ int j = depth - i - 1;
+ if (aa->childNums[j] < bb->childNums[j])
+ return -1;
+ if (aa->childNums[j] > bb->childNums[j])
+ return 1;
+ }
+ for (; i < depth; i++) {
+ int j = i - __kmp_affinity_compact;
+ if (aa->childNums[j] < bb->childNums[j])
+ return -1;
+ if (aa->childNums[j] > bb->childNums[j])
+ return 1;
+ }
+ return 0;
}
-static void
-__kmp_aux_affinity_initialize(void)
-{
- if (__kmp_affinity_masks != NULL) {
- KMP_ASSERT(__kmp_affin_fullMask != NULL);
- return;
- }
-
- //
- // Create the "full" mask - this defines all of the processors that we
- // consider to be in the machine model. If respect is set, then it is
- // the initialization thread's affinity mask. Otherwise, it is all
- // processors that we know about on the machine.
- //
- if (__kmp_affin_fullMask == NULL) {
- KMP_CPU_ALLOC(__kmp_affin_fullMask);
- }
- if (KMP_AFFINITY_CAPABLE()) {
- if (__kmp_affinity_respect_mask) {
- __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
+static void __kmp_aux_affinity_initialize(void) {
+ if (__kmp_affinity_masks != NULL) {
+ KMP_ASSERT(__kmp_affin_fullMask != NULL);
+ return;
+ }
- //
- // Count the number of available processors.
- //
- unsigned i;
- __kmp_avail_proc = 0;
- KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
- if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
- continue;
- }
- __kmp_avail_proc++;
- }
- if (__kmp_avail_proc > __kmp_xproc) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(ErrorInitializeAffinity);
- }
- __kmp_affinity_type = affinity_none;
- KMP_AFFINITY_DISABLE();
- return;
- }
+ // Create the "full" mask - this defines all of the processors that we
+ // consider to be in the machine model. If respect is set, then it is the
+ // initialization thread's affinity mask. Otherwise, it is all processors that
+ // we know about on the machine.
+ if (__kmp_affin_fullMask == NULL) {
+ KMP_CPU_ALLOC(__kmp_affin_fullMask);
+ }
+ if (KMP_AFFINITY_CAPABLE()) {
+ if (__kmp_affinity_respect_mask) {
+ __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
+
+ // Count the number of available processors.
+ unsigned i;
+ __kmp_avail_proc = 0;
+ KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
+ if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
+ continue;
}
- else {
- __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
- __kmp_avail_proc = __kmp_xproc;
+ __kmp_avail_proc++;
+ }
+ if (__kmp_avail_proc > __kmp_xproc) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings &&
+ (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(ErrorInitializeAffinity);
}
+ __kmp_affinity_type = affinity_none;
+ KMP_AFFINITY_DISABLE();
+ return;
+ }
+ } else {
+ __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
+ __kmp_avail_proc = __kmp_xproc;
}
+ }
- int depth = -1;
- kmp_i18n_id_t msg_id = kmp_i18n_null;
+ int depth = -1;
+ kmp_i18n_id_t msg_id = kmp_i18n_null;
- //
- // For backward compatibility, setting KMP_CPUINFO_FILE =>
- // KMP_TOPOLOGY_METHOD=cpuinfo
- //
- if ((__kmp_cpuinfo_file != NULL) &&
+ // For backward compatibility, setting KMP_CPUINFO_FILE =>
+ // KMP_TOPOLOGY_METHOD=cpuinfo
+ if ((__kmp_cpuinfo_file != NULL) &&
(__kmp_affinity_top_method == affinity_top_method_all)) {
- __kmp_affinity_top_method = affinity_top_method_cpuinfo;
- }
-
- if (__kmp_affinity_top_method == affinity_top_method_all) {
- //
- // In the default code path, errors are not fatal - we just try using
- // another method. We only emit a warning message if affinity is on,
- // or the verbose flag is set, an the nowarnings flag was not set.
- //
- const char *file_name = NULL;
- int line = 0;
-# if KMP_USE_HWLOC
- if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
- }
- if(!__kmp_hwloc_error) {
- depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- } else if(depth < 0 && __kmp_affinity_verbose) {
- KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
- }
- } else if(__kmp_affinity_verbose) {
- KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
- }
- }
-# endif
-
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64
-
- if (depth < 0) {
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
- }
-
- file_name = NULL;
- depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
-
- if (depth < 0) {
- if (__kmp_affinity_verbose) {
- if (msg_id != kmp_i18n_null) {
- KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
- KMP_I18N_STR(DecodingLegacyAPIC));
- }
- else {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
- }
- }
-
- file_name = NULL;
- depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
- }
- }
-
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-
-# if KMP_OS_LINUX
-
- if (depth < 0) {
- if (__kmp_affinity_verbose) {
- if (msg_id != kmp_i18n_null) {
- KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
- }
- else {
- KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
- }
- }
-
- FILE *f = fopen("/proc/cpuinfo", "r");
- if (f == NULL) {
- msg_id = kmp_i18n_str_CantOpenCpuinfo;
- }
- else {
- file_name = "/proc/cpuinfo";
- depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
- fclose(f);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
- }
- }
-
-# endif /* KMP_OS_LINUX */
-
-# if KMP_GROUP_AFFINITY
-
- if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
- }
-
- depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
- KMP_ASSERT(depth != 0);
- }
-
-# endif /* KMP_GROUP_AFFINITY */
-
- if (depth < 0) {
- if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
- if (file_name == NULL) {
- KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
- }
- else if (line == 0) {
- KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
- }
- else {
- KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
- }
- }
- // FIXME - print msg if msg_id = kmp_i18n_null ???
+ __kmp_affinity_top_method = affinity_top_method_cpuinfo;
+ }
- file_name = "";
- depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
- KMP_ASSERT(depth > 0);
- KMP_ASSERT(address2os != NULL);
+ if (__kmp_affinity_top_method == affinity_top_method_all) {
+ // In the default code path, errors are not fatal - we just try using
+ // another method. We only emit a warning message if affinity is on, or the
+ // verbose flag is set, an the nowarnings flag was not set.
+ const char *file_name = NULL;
+ int line = 0;
+#if KMP_USE_HWLOC
+ if (depth < 0 &&
+ __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+ }
+ if (!__kmp_hwloc_error) {
+ depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ } else if (depth < 0 && __kmp_affinity_verbose) {
+ KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
}
+ } else if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+ }
}
+#endif
- //
- // If the user has specified that a paricular topology discovery method
- // is to be used, then we abort if that method fails. The exception is
- // group affinity, which might have been implicitly set.
- //
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-# if KMP_ARCH_X86 || KMP_ARCH_X86_64
+ if (depth < 0) {
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
+ }
- else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
- KMP_I18N_STR(Decodingx2APIC));
- }
+ file_name = NULL;
+ depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ }
- depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
- if (depth < 0) {
- KMP_ASSERT(msg_id != kmp_i18n_null);
- KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
- }
- }
- else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
+ if (depth < 0) {
if (__kmp_affinity_verbose) {
+ if (msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY",
+ __kmp_i18n_catgets(msg_id),
+ KMP_I18N_STR(DecodingLegacyAPIC));
+ } else {
KMP_INFORM(AffInfoStr, "KMP_AFFINITY",
- KMP_I18N_STR(DecodingLegacyAPIC));
+ KMP_I18N_STR(DecodingLegacyAPIC));
+ }
}
+ file_name = NULL;
depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
- if (depth < 0) {
- KMP_ASSERT(msg_id != kmp_i18n_null);
- KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
+ KMP_EXIT_AFF_NONE;
}
+ }
}
-# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
- else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
- const char *filename;
- if (__kmp_cpuinfo_file != NULL) {
- filename = __kmp_cpuinfo_file;
- }
- else {
- filename = "/proc/cpuinfo";
- }
+#if KMP_OS_LINUX
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
+ if (depth < 0) {
+ if (__kmp_affinity_verbose) {
+ if (msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY",
+ __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
+ } else {
+ KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
}
+ }
- FILE *f = fopen(filename, "r");
- if (f == NULL) {
- int code = errno;
- if (__kmp_cpuinfo_file != NULL) {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG(CantOpenFileForReading, filename),
- KMP_ERR(code),
- KMP_HNT(NameComesFrom_CPUINFO_FILE),
- __kmp_msg_null
- );
- }
- else {
- __kmp_msg(
- kmp_ms_fatal,
- KMP_MSG(CantOpenFileForReading, filename),
- KMP_ERR(code),
- __kmp_msg_null
- );
- }
- }
- int line = 0;
- depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
+ FILE *f = fopen("/proc/cpuinfo", "r");
+ if (f == NULL) {
+ msg_id = kmp_i18n_str_CantOpenCpuinfo;
+ } else {
+ file_name = "/proc/cpuinfo";
+ depth =
+ __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
fclose(f);
- if (depth < 0) {
- KMP_ASSERT(msg_id != kmp_i18n_null);
- if (line > 0) {
- KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
- }
- else {
- KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
- }
- }
- if (__kmp_affinity_type == affinity_none) {
- KMP_ASSERT(depth == 0);
- KMP_EXIT_AFF_NONE;
- }
- }
-
-# if KMP_GROUP_AFFINITY
-
- else if (__kmp_affinity_top_method == affinity_top_method_group) {
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
- }
-
- depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
- KMP_ASSERT(depth != 0);
- if (depth < 0) {
- KMP_ASSERT(msg_id != kmp_i18n_null);
- KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
- }
- }
-
-# endif /* KMP_GROUP_AFFINITY */
-
- else if (__kmp_affinity_top_method == affinity_top_method_flat) {
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
- }
-
- depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
if (depth == 0) {
- KMP_EXIT_AFF_NONE;
+ KMP_EXIT_AFF_NONE;
}
- // should not fail
- KMP_ASSERT(depth > 0);
- KMP_ASSERT(address2os != NULL);
+ }
}
-# if KMP_USE_HWLOC
- else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
- KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
- if (__kmp_affinity_verbose) {
- KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
- }
- depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_EXIT_AFF_NONE;
- }
- }
-# endif // KMP_USE_HWLOC
+#endif /* KMP_OS_LINUX */
- if (address2os == NULL) {
- if (KMP_AFFINITY_CAPABLE()
- && (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none)))) {
- KMP_WARNING(ErrorInitializeAffinity);
- }
- __kmp_affinity_type = affinity_none;
- KMP_AFFINITY_DISABLE();
- return;
- }
+#if KMP_GROUP_AFFINITY
- __kmp_apply_thread_places(&address2os, depth);
+ if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
+ }
- //
- // Create the table of masks, indexed by thread Id.
- //
- unsigned maxIndex;
- unsigned numUnique;
- kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
- address2os, __kmp_avail_proc);
- if (__kmp_affinity_gran_levels == 0) {
- KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
+ depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
+ KMP_ASSERT(depth != 0);
}
- //
- // Set the childNums vector in all Address objects. This must be done
- // before we can sort using __kmp_affinity_cmp_Address_child_num(),
- // which takes into account the setting of __kmp_affinity_compact.
- //
- __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
-
- switch (__kmp_affinity_type) {
+#endif /* KMP_GROUP_AFFINITY */
- case affinity_explicit:
- KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
-# if OMP_40_ENABLED
- if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
-# endif
- {
- __kmp_affinity_process_proclist(&__kmp_affinity_masks,
- &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
- maxIndex);
- }
-# if OMP_40_ENABLED
- else {
- __kmp_affinity_process_placelist(&__kmp_affinity_masks,
- &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
- maxIndex);
- }
-# endif
- if (__kmp_affinity_num_masks == 0) {
- if (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none))) {
- KMP_WARNING(AffNoValidProcID);
- }
- __kmp_affinity_type = affinity_none;
- return;
- }
- break;
-
- //
- // The other affinity types rely on sorting the Addresses according
- // to some permutation of the machine topology tree. Set
- // __kmp_affinity_compact and __kmp_affinity_offset appropriately,
- // then jump to a common code fragment to do the sort and create
- // the array of affinity masks.
- //
-
- case affinity_logical:
- __kmp_affinity_compact = 0;
- if (__kmp_affinity_offset) {
- __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
- % __kmp_avail_proc;
- }
- goto sortAddresses;
-
- case affinity_physical:
- if (__kmp_nThreadsPerCore > 1) {
- __kmp_affinity_compact = 1;
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = 0;
- }
+ if (depth < 0) {
+ if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
+ if (file_name == NULL) {
+ KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
+ } else if (line == 0) {
+ KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
} else {
- __kmp_affinity_compact = 0;
- }
- if (__kmp_affinity_offset) {
- __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
- % __kmp_avail_proc;
- }
- goto sortAddresses;
-
- case affinity_scatter:
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = 0;
- }
- else {
- __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
- }
- goto sortAddresses;
-
- case affinity_compact:
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = depth - 1;
- }
- goto sortAddresses;
-
- case affinity_balanced:
- if( depth <= 1 ) {
- if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
- KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
- }
- __kmp_affinity_type = affinity_none;
- return;
- } else if( __kmp_affinity_uniform_topology() ) {
- break;
- } else { // Non-uniform topology
-
- // Save the depth for further usage
- __kmp_aff_depth = depth;
-
- int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, depth - 1);
- int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, depth - 1, core_level);
- int maxprocpercore = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, depth - 1, core_level);
-
- int nproc = ncores * maxprocpercore;
- if( ( nproc < 2 ) || ( nproc < __kmp_avail_proc ) ) {
- if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
- KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" );
- }
- __kmp_affinity_type = affinity_none;
- return;
- }
-
- procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
- for( int i = 0; i < nproc; i++ ) {
- procarr[ i ] = -1;
- }
-
- int lastcore = -1;
- int inlastcore = 0;
- for( int i = 0; i < __kmp_avail_proc; i++ ) {
- int proc = address2os[ i ].second;
- int core = __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
-
- if ( core == lastcore ) {
- inlastcore++;
- } else {
- inlastcore = 0;
- }
- lastcore = core;
-
- procarr[ core * maxprocpercore + inlastcore ] = proc;
- }
-
- break;
- }
-
- sortAddresses:
- //
- // Allocate the gtid->affinity mask table.
- //
- if (__kmp_affinity_dups) {
- __kmp_affinity_num_masks = __kmp_avail_proc;
- }
- else {
- __kmp_affinity_num_masks = numUnique;
- }
-
-# if OMP_40_ENABLED
- if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
- && ( __kmp_affinity_num_places > 0 )
- && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
- __kmp_affinity_num_masks = __kmp_affinity_num_places;
- }
-# endif
-
- KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
-
- //
- // Sort the address2os table according to the current setting of
- // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
- //
- qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
- __kmp_affinity_cmp_Address_child_num);
- {
- int i;
- unsigned j;
- for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
- if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
- continue;
- }
- unsigned osId = address2os[i].second;
- kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
- kmp_affin_mask_t *dest
- = KMP_CPU_INDEX(__kmp_affinity_masks, j);
- KMP_ASSERT(KMP_CPU_ISSET(osId, src));
- KMP_CPU_COPY(dest, src);
- if (++j >= __kmp_affinity_num_masks) {
- break;
- }
- }
- KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
- }
- break;
-
- default:
- KMP_ASSERT2(0, "Unexpected affinity setting");
- }
-
- KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex+1);
- machine_hierarchy.init(address2os, __kmp_avail_proc);
-}
-#undef KMP_EXIT_AFF_NONE
-
-
-void
-__kmp_affinity_initialize(void)
-{
- //
- // Much of the code above was written assumming that if a machine was not
- // affinity capable, then __kmp_affinity_type == affinity_none. We now
- // explicitly represent this as __kmp_affinity_type == affinity_disabled.
- //
- // There are too many checks for __kmp_affinity_type == affinity_none
- // in this code. Instead of trying to change them all, check if
- // __kmp_affinity_type == affinity_disabled, and if so, slam it with
- // affinity_none, call the real initialization routine, then restore
- // __kmp_affinity_type to affinity_disabled.
- //
- int disabled = (__kmp_affinity_type == affinity_disabled);
- if (! KMP_AFFINITY_CAPABLE()) {
- KMP_ASSERT(disabled);
- }
- if (disabled) {
- __kmp_affinity_type = affinity_none;
- }
- __kmp_aux_affinity_initialize();
- if (disabled) {
- __kmp_affinity_type = affinity_disabled;
- }
-}
-
-
-void
-__kmp_affinity_uninitialize(void)
-{
- if (__kmp_affinity_masks != NULL) {
- KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
- __kmp_affinity_masks = NULL;
- }
- if (__kmp_affin_fullMask != NULL) {
- KMP_CPU_FREE(__kmp_affin_fullMask);
- __kmp_affin_fullMask = NULL;
- }
- __kmp_affinity_num_masks = 0;
- __kmp_affinity_type = affinity_default;
-# if OMP_40_ENABLED
- __kmp_affinity_num_places = 0;
-# endif
- if (__kmp_affinity_proclist != NULL) {
- __kmp_free(__kmp_affinity_proclist);
- __kmp_affinity_proclist = NULL;
- }
- if( address2os != NULL ) {
- __kmp_free( address2os );
- address2os = NULL;
- }
- if( procarr != NULL ) {
- __kmp_free( procarr );
- procarr = NULL;
- }
-# if KMP_USE_HWLOC
- if (__kmp_hwloc_topology != NULL) {
- hwloc_topology_destroy(__kmp_hwloc_topology);
- __kmp_hwloc_topology = NULL;
- }
-# endif
- KMPAffinity::destroy_api();
-}
-
-
-void
-__kmp_affinity_set_init_mask(int gtid, int isa_root)
-{
- if (! KMP_AFFINITY_CAPABLE()) {
- return;
- }
-
- kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
- if (th->th.th_affin_mask == NULL) {
- KMP_CPU_ALLOC(th->th.th_affin_mask);
- }
- else {
- KMP_CPU_ZERO(th->th.th_affin_mask);
- }
-
- //
- // Copy the thread mask to the kmp_info_t strucuture.
- // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one
- // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask
- // is set, then the full mask is the same as the mask of the initialization
- // thread.
- //
- kmp_affin_mask_t *mask;
- int i;
-
-# if OMP_40_ENABLED
- if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
-# endif
- {
- if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
- ) {
-# if KMP_GROUP_AFFINITY
- if (__kmp_num_proc_groups > 1) {
- return;
- }
-# endif
- KMP_ASSERT(__kmp_affin_fullMask != NULL);
- i = KMP_PLACE_ALL;
- mask = __kmp_affin_fullMask;
- }
- else {
- KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
- i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
- mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
+ KMP_INFORM(UsingFlatOSFileLine, file_name, line,
+ __kmp_i18n_catgets(msg_id));
}
+ }
+ // FIXME - print msg if msg_id = kmp_i18n_null ???
+
+ file_name = "";
+ depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ }
+ KMP_ASSERT(depth > 0);
+ KMP_ASSERT(address2os != NULL);
}
-# if OMP_40_ENABLED
- else {
- if ((! isa_root)
- || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
-# if KMP_GROUP_AFFINITY
- if (__kmp_num_proc_groups > 1) {
- return;
- }
-# endif
- KMP_ASSERT(__kmp_affin_fullMask != NULL);
- i = KMP_PLACE_ALL;
- mask = __kmp_affin_fullMask;
- }
- else {
- //
- // int i = some hash function or just a counter that doesn't
- // always start at 0. Use gtid for now.
- //
- KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
- i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
- mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
- }
- }
-# endif
-
-# if OMP_40_ENABLED
- th->th.th_current_place = i;
- if (isa_root) {
- th->th.th_new_place = i;
- th->th.th_first_place = 0;
- th->th.th_last_place = __kmp_affinity_num_masks - 1;
- }
-
- if (i == KMP_PLACE_ALL) {
- KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
- gtid));
- }
- else {
- KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
- gtid, i));
- }
-# else
- if (i == -1) {
- KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
- gtid));
- }
- else {
- KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
- gtid, i));
- }
-# endif /* OMP_40_ENABLED */
+ }
- KMP_CPU_COPY(th->th.th_affin_mask, mask);
+// If the user has specified that a paricular topology discovery method is to be
+// used, then we abort if that method fails. The exception is group affinity,
+// which might have been implicitly set.
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+
+ else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- th->th.th_affin_mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_gettid(), gtid, buf);
+ KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
}
-# if KMP_OS_WINDOWS
- //
- // On Windows* OS, the process affinity mask might have changed.
- // If the user didn't request affinity and this call fails,
- // just continue silently. See CQ171393.
- //
- if ( __kmp_affinity_type == affinity_none ) {
- __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
+ depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ }
+ if (depth < 0) {
+ KMP_ASSERT(msg_id != kmp_i18n_null);
+ KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
+ }
+ } else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
}
- else
-# endif
- __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
-}
+ depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ }
+ if (depth < 0) {
+ KMP_ASSERT(msg_id != kmp_i18n_null);
+ KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
+ }
+ }
-# if OMP_40_ENABLED
+#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-void
-__kmp_affinity_set_place(int gtid)
-{
- int retval;
+ else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
+ const char *filename;
+ if (__kmp_cpuinfo_file != NULL) {
+ filename = __kmp_cpuinfo_file;
+ } else {
+ filename = "/proc/cpuinfo";
+ }
- if (! KMP_AFFINITY_CAPABLE()) {
- return;
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
}
- kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
+ FILE *f = fopen(filename, "r");
+ if (f == NULL) {
+ int code = errno;
+ if (__kmp_cpuinfo_file != NULL) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
+ KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
+ __kmp_msg_null);
+ } else {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
+ KMP_ERR(code), __kmp_msg_null);
+ }
+ }
+ int line = 0;
+ depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
+ fclose(f);
+ if (depth < 0) {
+ KMP_ASSERT(msg_id != kmp_i18n_null);
+ if (line > 0) {
+ KMP_FATAL(FileLineMsgExiting, filename, line,
+ __kmp_i18n_catgets(msg_id));
+ } else {
+ KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
+ }
+ }
+ if (__kmp_affinity_type == affinity_none) {
+ KMP_ASSERT(depth == 0);
+ KMP_EXIT_AFF_NONE;
+ }
+ }
- KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
- gtid, th->th.th_new_place, th->th.th_current_place));
+#if KMP_GROUP_AFFINITY
- //
- // Check that the new place is within this thread's partition.
- //
- KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
- KMP_ASSERT(th->th.th_new_place >= 0);
- KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
- if (th->th.th_first_place <= th->th.th_last_place) {
- KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
- && (th->th.th_new_place <= th->th.th_last_place));
+ else if (__kmp_affinity_top_method == affinity_top_method_group) {
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
}
- else {
- KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
- || (th->th.th_new_place >= th->th.th_last_place));
+
+ depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
+ KMP_ASSERT(depth != 0);
+ if (depth < 0) {
+ KMP_ASSERT(msg_id != kmp_i18n_null);
+ KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
}
+ }
- //
- // Copy the thread mask to the kmp_info_t strucuture,
- // and set this thread's affinity.
- //
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
- th->th.th_new_place);
- KMP_CPU_COPY(th->th.th_affin_mask, mask);
- th->th.th_current_place = th->th.th_new_place;
+#endif /* KMP_GROUP_AFFINITY */
+ else if (__kmp_affinity_top_method == affinity_top_method_flat) {
if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- th->th.th_affin_mask);
- KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
+ KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY");
}
- __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
-}
-
-# endif /* OMP_40_ENABLED */
+ depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ }
+ // should not fail
+ KMP_ASSERT(depth > 0);
+ KMP_ASSERT(address2os != NULL);
+ }
-int
-__kmp_aux_set_affinity(void **mask)
-{
- int gtid;
- kmp_info_t *th;
- int retval;
+#if KMP_USE_HWLOC
+ else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
+ KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
+ }
+ depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
+ if (depth == 0) {
+ KMP_EXIT_AFF_NONE;
+ }
+ }
+#endif // KMP_USE_HWLOC
- if (! KMP_AFFINITY_CAPABLE()) {
- return -1;
+ if (address2os == NULL) {
+ if (KMP_AFFINITY_CAPABLE() &&
+ (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
+ KMP_WARNING(ErrorInitializeAffinity);
}
+ __kmp_affinity_type = affinity_none;
+ KMP_AFFINITY_DISABLE();
+ return;
+ }
- gtid = __kmp_entry_gtid();
- KA_TRACE(1000, ;{
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- (kmp_affin_mask_t *)(*mask));
- __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n",
- gtid, buf);
- });
-
- if (__kmp_env_consistency_check) {
- if ((mask == NULL) || (*mask == NULL)) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
- else {
- unsigned proc;
- int num_procs = 0;
-
- KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) {
- if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
- if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
- continue;
- }
- num_procs++;
- }
- if (num_procs == 0) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
+ __kmp_apply_thread_places(&address2os, depth);
-# if KMP_GROUP_AFFINITY
- if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
-# endif /* KMP_GROUP_AFFINITY */
+ // Create the table of masks, indexed by thread Id.
+ unsigned maxIndex;
+ unsigned numUnique;
+ kmp_affin_mask_t *osId2Mask =
+ __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
+ if (__kmp_affinity_gran_levels == 0) {
+ KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
+ }
+
+ // Set the childNums vector in all Address objects. This must be done before
+ // we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into
+ // account the setting of __kmp_affinity_compact.
+ __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
+
+ switch (__kmp_affinity_type) {
+
+ case affinity_explicit:
+ KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
+#if OMP_40_ENABLED
+ if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
+#endif
+ {
+ __kmp_affinity_process_proclist(
+ &__kmp_affinity_masks, &__kmp_affinity_num_masks,
+ __kmp_affinity_proclist, osId2Mask, maxIndex);
+ }
+#if OMP_40_ENABLED
+ else {
+ __kmp_affinity_process_placelist(
+ &__kmp_affinity_masks, &__kmp_affinity_num_masks,
+ __kmp_affinity_proclist, osId2Mask, maxIndex);
+ }
+#endif
+ if (__kmp_affinity_num_masks == 0) {
+ if (__kmp_affinity_verbose ||
+ (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffNoValidProcID);
+ }
+ __kmp_affinity_type = affinity_none;
+ return;
+ }
+ break;
- }
+ // The other affinity types rely on sorting the Addresses according to some
+ // permutation of the machine topology tree. Set __kmp_affinity_compact and
+ // __kmp_affinity_offset appropriately, then jump to a common code fragment
+ // to do the sort and create the array of affinity masks.
+
+ case affinity_logical:
+ __kmp_affinity_compact = 0;
+ if (__kmp_affinity_offset) {
+ __kmp_affinity_offset =
+ __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
+ }
+ goto sortAddresses;
+
+ case affinity_physical:
+ if (__kmp_nThreadsPerCore > 1) {
+ __kmp_affinity_compact = 1;
+ if (__kmp_affinity_compact >= depth) {
+ __kmp_affinity_compact = 0;
+ }
+ } else {
+ __kmp_affinity_compact = 0;
+ }
+ if (__kmp_affinity_offset) {
+ __kmp_affinity_offset =
+ __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
}
+ goto sortAddresses;
- th = __kmp_threads[gtid];
- KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
- retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
- if (retval == 0) {
- KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
+ case affinity_scatter:
+ if (__kmp_affinity_compact >= depth) {
+ __kmp_affinity_compact = 0;
+ } else {
+ __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
}
+ goto sortAddresses;
-# if OMP_40_ENABLED
- th->th.th_current_place = KMP_PLACE_UNDEFINED;
- th->th.th_new_place = KMP_PLACE_UNDEFINED;
- th->th.th_first_place = 0;
- th->th.th_last_place = __kmp_affinity_num_masks - 1;
+ case affinity_compact:
+ if (__kmp_affinity_compact >= depth) {
+ __kmp_affinity_compact = depth - 1;
+ }
+ goto sortAddresses;
- //
- // Turn off 4.0 affinity for the current tread at this parallel level.
- //
- th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
-# endif
+ case affinity_balanced:
+ if (depth <= 1) {
+ if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
+ KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
+ }
+ __kmp_affinity_type = affinity_none;
+ return;
+ } else if (__kmp_affinity_uniform_topology()) {
+ break;
+ } else { // Non-uniform topology
- return retval;
-}
+ // Save the depth for further usage
+ __kmp_aff_depth = depth;
+ int core_level = __kmp_affinity_find_core_level(
+ address2os, __kmp_avail_proc, depth - 1);
+ int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
+ depth - 1, core_level);
+ int maxprocpercore = __kmp_affinity_max_proc_per_core(
+ address2os, __kmp_avail_proc, depth - 1, core_level);
+
+ int nproc = ncores * maxprocpercore;
+ if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
+ if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
+ KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
+ }
+ __kmp_affinity_type = affinity_none;
+ return;
+ }
-int
-__kmp_aux_get_affinity(void **mask)
-{
- int gtid;
- int retval;
- kmp_info_t *th;
+ procarr = (int *)__kmp_allocate(sizeof(int) * nproc);
+ for (int i = 0; i < nproc; i++) {
+ procarr[i] = -1;
+ }
- if (! KMP_AFFINITY_CAPABLE()) {
- return -1;
- }
+ int lastcore = -1;
+ int inlastcore = 0;
+ for (int i = 0; i < __kmp_avail_proc; i++) {
+ int proc = address2os[i].second;
+ int core =
+ __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
- gtid = __kmp_entry_gtid();
- th = __kmp_threads[gtid];
- KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+ if (core == lastcore) {
+ inlastcore++;
+ } else {
+ inlastcore = 0;
+ }
+ lastcore = core;
- KA_TRACE(1000, ;{
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- th->th.th_affin_mask);
- __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
- });
+ procarr[core * maxprocpercore + inlastcore] = proc;
+ }
- if (__kmp_env_consistency_check) {
- if ((mask == NULL) || (*mask == NULL)) {
- KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
- }
+ break;
}
-# if !KMP_OS_WINDOWS
+ sortAddresses:
+ // Allocate the gtid->affinity mask table.
+ if (__kmp_affinity_dups) {
+ __kmp_affinity_num_masks = __kmp_avail_proc;
+ } else {
+ __kmp_affinity_num_masks = numUnique;
+ }
- retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
- KA_TRACE(1000, ;{
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- (kmp_affin_mask_t *)(*mask));
- __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
- });
- return retval;
+#if OMP_40_ENABLED
+ if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
+ (__kmp_affinity_num_places > 0) &&
+ ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
+ __kmp_affinity_num_masks = __kmp_affinity_num_places;
+ }
+#endif
-# else
+ KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
- KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
- return 0;
+ // Sort the address2os table according to the current setting of
+ // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
+ qsort(address2os, __kmp_avail_proc, sizeof(*address2os),
+ __kmp_affinity_cmp_Address_child_num);
+ {
+ int i;
+ unsigned j;
+ for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
+ if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
+ continue;
+ }
+ unsigned osId = address2os[i].second;
+ kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
+ kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
+ KMP_ASSERT(KMP_CPU_ISSET(osId, src));
+ KMP_CPU_COPY(dest, src);
+ if (++j >= __kmp_affinity_num_masks) {
+ break;
+ }
+ }
+ KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
+ }
+ break;
-# endif /* KMP_OS_WINDOWS */
+ default:
+ KMP_ASSERT2(0, "Unexpected affinity setting");
+ }
+ KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
+ machine_hierarchy.init(address2os, __kmp_avail_proc);
}
+#undef KMP_EXIT_AFF_NONE
-int
-__kmp_aux_get_affinity_max_proc() {
- if (! KMP_AFFINITY_CAPABLE()) {
- return 0;
- }
-#if KMP_GROUP_AFFINITY
- if ( __kmp_num_proc_groups > 1 ) {
- return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT);
- }
+void __kmp_affinity_initialize(void) {
+ // Much of the code above was written assumming that if a machine was not
+ // affinity capable, then __kmp_affinity_type == affinity_none. We now
+ // explicitly represent this as __kmp_affinity_type == affinity_disabled.
+ // There are too many checks for __kmp_affinity_type == affinity_none
+ // in this code. Instead of trying to change them all, check if
+ // __kmp_affinity_type == affinity_disabled, and if so, slam it with
+ // affinity_none, call the real initialization routine, then restore
+ // __kmp_affinity_type to affinity_disabled.
+ int disabled = (__kmp_affinity_type == affinity_disabled);
+ if (!KMP_AFFINITY_CAPABLE()) {
+ KMP_ASSERT(disabled);
+ }
+ if (disabled) {
+ __kmp_affinity_type = affinity_none;
+ }
+ __kmp_aux_affinity_initialize();
+ if (disabled) {
+ __kmp_affinity_type = affinity_disabled;
+ }
+}
+
+void __kmp_affinity_uninitialize(void) {
+ if (__kmp_affinity_masks != NULL) {
+ KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
+ __kmp_affinity_masks = NULL;
+ }
+ if (__kmp_affin_fullMask != NULL) {
+ KMP_CPU_FREE(__kmp_affin_fullMask);
+ __kmp_affin_fullMask = NULL;
+ }
+ __kmp_affinity_num_masks = 0;
+ __kmp_affinity_type = affinity_default;
+#if OMP_40_ENABLED
+ __kmp_affinity_num_places = 0;
+#endif
+ if (__kmp_affinity_proclist != NULL) {
+ __kmp_free(__kmp_affinity_proclist);
+ __kmp_affinity_proclist = NULL;
+ }
+ if (address2os != NULL) {
+ __kmp_free(address2os);
+ address2os = NULL;
+ }
+ if (procarr != NULL) {
+ __kmp_free(procarr);
+ procarr = NULL;
+ }
+#if KMP_USE_HWLOC
+ if (__kmp_hwloc_topology != NULL) {
+ hwloc_topology_destroy(__kmp_hwloc_topology);
+ __kmp_hwloc_topology = NULL;
+ }
#endif
- return __kmp_xproc;
+ KMPAffinity::destroy_api();
}
-int
-__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
-{
- int retval;
-
- if (! KMP_AFFINITY_CAPABLE()) {
- return -1;
- }
+void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return;
+ }
- KA_TRACE(1000, ;{
- int gtid = __kmp_entry_gtid();
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- (kmp_affin_mask_t *)(*mask));
- __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
- proc, gtid, buf);
- });
+ kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
+ if (th->th.th_affin_mask == NULL) {
+ KMP_CPU_ALLOC(th->th.th_affin_mask);
+ } else {
+ KMP_CPU_ZERO(th->th.th_affin_mask);
+ }
+
+ // Copy the thread mask to the kmp_info_t strucuture. If
+ // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
+ // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
+ // then the full mask is the same as the mask of the initialization thread.
+ kmp_affin_mask_t *mask;
+ int i;
- if (__kmp_env_consistency_check) {
- if ((mask == NULL) || (*mask == NULL)) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
- }
+#if OMP_40_ENABLED
+ if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
+#endif
+ {
+ if ((__kmp_affinity_type == affinity_none) ||
+ (__kmp_affinity_type == affinity_balanced)) {
+#if KMP_GROUP_AFFINITY
+ if (__kmp_num_proc_groups > 1) {
+ return;
+ }
+#endif
+ KMP_ASSERT(__kmp_affin_fullMask != NULL);
+ i = KMP_PLACE_ALL;
+ mask = __kmp_affin_fullMask;
+ } else {
+ KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
+ i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
+ mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
+ }
+ }
+#if OMP_40_ENABLED
+ else {
+ if ((!isa_root) ||
+ (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
+#if KMP_GROUP_AFFINITY
+ if (__kmp_num_proc_groups > 1) {
+ return;
+ }
+#endif
+ KMP_ASSERT(__kmp_affin_fullMask != NULL);
+ i = KMP_PLACE_ALL;
+ mask = __kmp_affin_fullMask;
+ } else {
+ // int i = some hash function or just a counter that doesn't
+ // always start at 0. Use gtid for now.
+ KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
+ i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
+ mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
}
+ }
+#endif
- if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
- return -1;
- }
- if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
- return -2;
- }
+#if OMP_40_ENABLED
+ th->th.th_current_place = i;
+ if (isa_root) {
+ th->th.th_new_place = i;
+ th->th.th_first_place = 0;
+ th->th.th_last_place = __kmp_affinity_num_masks - 1;
+ }
- KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
- return 0;
+ if (i == KMP_PLACE_ALL) {
+ KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
+ gtid));
+ } else {
+ KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
+ gtid, i));
+ }
+#else
+ if (i == -1) {
+ KA_TRACE(
+ 100,
+ ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
+ gtid));
+ } else {
+ KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
+ gtid, i));
+ }
+#endif /* OMP_40_ENABLED */
+
+ KMP_CPU_COPY(th->th.th_affin_mask, mask);
+
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ th->th.th_affin_mask);
+ KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+ __kmp_gettid(), gtid, buf);
+ }
+
+#if KMP_OS_WINDOWS
+ // On Windows* OS, the process affinity mask might have changed. If the user
+ // didn't request affinity and this call fails, just continue silently.
+ // See CQ171393.
+ if (__kmp_affinity_type == affinity_none) {
+ __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
+ } else
+#endif
+ __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
}
+#if OMP_40_ENABLED
-int
-__kmp_aux_unset_affinity_mask_proc(int proc, void **mask)
-{
- int retval;
+void __kmp_affinity_set_place(int gtid) {
+ int retval;
- if (! KMP_AFFINITY_CAPABLE()) {
- return -1;
- }
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return;
+ }
- KA_TRACE(1000, ;{
- int gtid = __kmp_entry_gtid();
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- (kmp_affin_mask_t *)(*mask));
- __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
- proc, gtid, buf);
- });
+ kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
+
+ KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
+ "place = %d)\n",
+ gtid, th->th.th_new_place, th->th.th_current_place));
+
+ // Check that the new place is within this thread's partition.
+ KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+ KMP_ASSERT(th->th.th_new_place >= 0);
+ KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
+ if (th->th.th_first_place <= th->th.th_last_place) {
+ KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
+ (th->th.th_new_place <= th->th.th_last_place));
+ } else {
+ KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
+ (th->th.th_new_place >= th->th.th_last_place));
+ }
+
+ // Copy the thread mask to the kmp_info_t strucuture,
+ // and set this thread's affinity.
+ kmp_affin_mask_t *mask =
+ KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
+ KMP_CPU_COPY(th->th.th_affin_mask, mask);
+ th->th.th_current_place = th->th.th_new_place;
+
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ th->th.th_affin_mask);
+ KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
+ __kmp_gettid(), gtid, buf);
+ }
+ __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
+}
+
+#endif /* OMP_40_ENABLED */
+
+int __kmp_aux_set_affinity(void **mask) {
+ int gtid;
+ kmp_info_t *th;
+ int retval;
+
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return -1;
+ }
+
+ gtid = __kmp_entry_gtid();
+ KA_TRACE(1000, ; {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ (kmp_affin_mask_t *)(*mask));
+ __kmp_debug_printf(
+ "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
+ buf);
+ });
+
+ if (__kmp_env_consistency_check) {
+ if ((mask == NULL) || (*mask == NULL)) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+ } else {
+ unsigned proc;
+ int num_procs = 0;
- if (__kmp_env_consistency_check) {
- if ((mask == NULL) || (*mask == NULL)) {
- KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
+ KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
+ if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
}
- }
+ if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
+ continue;
+ }
+ num_procs++;
+ }
+ if (num_procs == 0) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+ }
- if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
- return -1;
- }
- if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
- return -2;
+#if KMP_GROUP_AFFINITY
+ if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+ }
+#endif /* KMP_GROUP_AFFINITY */
}
+ }
- KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
- return 0;
-}
+ th = __kmp_threads[gtid];
+ KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+ retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
+ if (retval == 0) {
+ KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
+ }
+
+#if OMP_40_ENABLED
+ th->th.th_current_place = KMP_PLACE_UNDEFINED;
+ th->th.th_new_place = KMP_PLACE_UNDEFINED;
+ th->th.th_first_place = 0;
+ th->th.th_last_place = __kmp_affinity_num_masks - 1;
+ // Turn off 4.0 affinity for the current tread at this parallel level.
+ th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
+#endif
-int
-__kmp_aux_get_affinity_mask_proc(int proc, void **mask)
-{
- int retval;
+ return retval;
+}
- if (! KMP_AFFINITY_CAPABLE()) {
- return -1;
- }
+int __kmp_aux_get_affinity(void **mask) {
+ int gtid;
+ int retval;
+ kmp_info_t *th;
+
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return -1;
+ }
+
+ gtid = __kmp_entry_gtid();
+ th = __kmp_threads[gtid];
+ KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
+
+ KA_TRACE(1000, ; {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ th->th.th_affin_mask);
+ __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n",
+ gtid, buf);
+ });
+
+ if (__kmp_env_consistency_check) {
+ if ((mask == NULL) || (*mask == NULL)) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity");
+ }
+ }
+
+#if !KMP_OS_WINDOWS
+
+ retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
+ KA_TRACE(1000, ; {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ (kmp_affin_mask_t *)(*mask));
+ __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n",
+ gtid, buf);
+ });
+ return retval;
- KA_TRACE(1000, ;{
- int gtid = __kmp_entry_gtid();
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
- (kmp_affin_mask_t *)(*mask));
- __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
- proc, gtid, buf);
- });
+#else
- if (__kmp_env_consistency_check) {
- if ((mask == NULL) || (*mask == NULL)) {
- KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
- }
- }
+ KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
+ return 0;
- if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
- return -1;
- }
- if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
- return 0;
- }
+#endif /* KMP_OS_WINDOWS */
+}
- return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
+int __kmp_aux_get_affinity_max_proc() {
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return 0;
+ }
+#if KMP_GROUP_AFFINITY
+ if (__kmp_num_proc_groups > 1) {
+ return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT);
+ }
+#endif
+ return __kmp_xproc;
}
+int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) {
+ int retval;
+
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return -1;
+ }
+
+ KA_TRACE(1000, ; {
+ int gtid = __kmp_entry_gtid();
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ (kmp_affin_mask_t *)(*mask));
+ __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in "
+ "affinity mask for thread %d = %s\n",
+ proc, gtid, buf);
+ });
+
+ if (__kmp_env_consistency_check) {
+ if ((mask == NULL) || (*mask == NULL)) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc");
+ }
+ }
+
+ if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
+ return -1;
+ }
+ if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
+ return -2;
+ }
+
+ KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
+ return 0;
+}
+
+int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) {
+ int retval;
+
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return -1;
+ }
+
+ KA_TRACE(1000, ; {
+ int gtid = __kmp_entry_gtid();
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ (kmp_affin_mask_t *)(*mask));
+ __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in "
+ "affinity mask for thread %d = %s\n",
+ proc, gtid, buf);
+ });
+
+ if (__kmp_env_consistency_check) {
+ if ((mask == NULL) || (*mask == NULL)) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc");
+ }
+ }
+
+ if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
+ return -1;
+ }
+ if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
+ return -2;
+ }
+
+ KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
+ return 0;
+}
+
+int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) {
+ int retval;
+
+ if (!KMP_AFFINITY_CAPABLE()) {
+ return -1;
+ }
+
+ KA_TRACE(1000, ; {
+ int gtid = __kmp_entry_gtid();
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
+ (kmp_affin_mask_t *)(*mask));
+ __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in "
+ "affinity mask for thread %d = %s\n",
+ proc, gtid, buf);
+ });
+
+ if (__kmp_env_consistency_check) {
+ if ((mask == NULL) || (*mask == NULL)) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc");
+ }
+ }
+
+ if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
+ return -1;
+ }
+ if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
+ return 0;
+ }
+
+ return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
+}
// Dynamic affinity settings - Affinity balanced
-void __kmp_balanced_affinity( int tid, int nthreads )
-{
- bool fine_gran = true;
+void __kmp_balanced_affinity(int tid, int nthreads) {
+ bool fine_gran = true;
- switch (__kmp_affinity_gran) {
- case affinity_gran_fine:
- case affinity_gran_thread:
- break;
- case affinity_gran_core:
- if( __kmp_nThreadsPerCore > 1) {
- fine_gran = false;
- }
- break;
- case affinity_gran_package:
- if( nCoresPerPkg > 1) {
- fine_gran = false;
- }
- break;
- default:
- fine_gran = false;
+ switch (__kmp_affinity_gran) {
+ case affinity_gran_fine:
+ case affinity_gran_thread:
+ break;
+ case affinity_gran_core:
+ if (__kmp_nThreadsPerCore > 1) {
+ fine_gran = false;
+ }
+ break;
+ case affinity_gran_package:
+ if (nCoresPerPkg > 1) {
+ fine_gran = false;
+ }
+ break;
+ default:
+ fine_gran = false;
+ }
+
+ if (__kmp_affinity_uniform_topology()) {
+ int coreID;
+ int threadID;
+ // Number of hyper threads per core in HT machine
+ int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
+ // Number of cores
+ int ncores = __kmp_ncores;
+ if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
+ __kmp_nth_per_core = __kmp_avail_proc / nPackages;
+ ncores = nPackages;
+ }
+ // How many threads will be bound to each core
+ int chunk = nthreads / ncores;
+ // How many cores will have an additional thread bound to it - "big cores"
+ int big_cores = nthreads % ncores;
+ // Number of threads on the big cores
+ int big_nth = (chunk + 1) * big_cores;
+ if (tid < big_nth) {
+ coreID = tid / (chunk + 1);
+ threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
+ } else { // tid >= big_nth
+ coreID = (tid - big_cores) / chunk;
+ threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
}
- if( __kmp_affinity_uniform_topology() ) {
- int coreID;
- int threadID;
- // Number of hyper threads per core in HT machine
- int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
- // Number of cores
- int ncores = __kmp_ncores;
- if( ( nPackages > 1 ) && ( __kmp_nth_per_core <= 1 ) ) {
- __kmp_nth_per_core = __kmp_avail_proc / nPackages;
- ncores = nPackages;
- }
- // How many threads will be bound to each core
- int chunk = nthreads / ncores;
- // How many cores will have an additional thread bound to it - "big cores"
- int big_cores = nthreads % ncores;
- // Number of threads on the big cores
- int big_nth = ( chunk + 1 ) * big_cores;
- if( tid < big_nth ) {
- coreID = tid / (chunk + 1 );
- threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
- } else { //tid >= big_nth
- coreID = ( tid - big_cores ) / chunk;
- threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
- }
-
- KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set affinity operation when not capable");
-
- kmp_affin_mask_t *mask;
- KMP_CPU_ALLOC_ON_STACK(mask);
- KMP_CPU_ZERO(mask);
-
- if( fine_gran ) {
- int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
- KMP_CPU_SET( osID, mask);
- } else {
- for( int i = 0; i < __kmp_nth_per_core; i++ ) {
- int osID;
- osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
- KMP_CPU_SET( osID, mask);
- }
- }
- if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
- __kmp_gettid(), tid, buf);
- }
- __kmp_set_system_affinity( mask, TRUE );
- KMP_CPU_FREE_FROM_STACK(mask);
- } else { // Non-uniform topology
+ KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal set affinity operation when not capable");
- kmp_affin_mask_t *mask;
- KMP_CPU_ALLOC_ON_STACK(mask);
- KMP_CPU_ZERO(mask);
-
- int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
- int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
- int nth_per_core = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
-
- // For performance gain consider the special case nthreads == __kmp_avail_proc
- if( nthreads == __kmp_avail_proc ) {
- if( fine_gran ) {
- int osID = address2os[ tid ].second;
- KMP_CPU_SET( osID, mask);
- } else {
- int core = __kmp_affinity_find_core(address2os, tid, __kmp_aff_depth - 1, core_level);
- for( int i = 0; i < __kmp_avail_proc; i++ ) {
- int osID = address2os[ i ].second;
- if( __kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, core_level) == core ) {
- KMP_CPU_SET( osID, mask);
- }
- }
- }
- } else if( nthreads <= ncores ) {
+ kmp_affin_mask_t *mask;
+ KMP_CPU_ALLOC_ON_STACK(mask);
+ KMP_CPU_ZERO(mask);
- int core = 0;
- for( int i = 0; i < ncores; i++ ) {
- // Check if this core from procarr[] is in the mask
- int in_mask = 0;
- for( int j = 0; j < nth_per_core; j++ ) {
- if( procarr[ i * nth_per_core + j ] != - 1 ) {
- in_mask = 1;
- break;
- }
- }
- if( in_mask ) {
- if( tid == core ) {
- for( int j = 0; j < nth_per_core; j++ ) {
- int osID = procarr[ i * nth_per_core + j ];
- if( osID != -1 ) {
- KMP_CPU_SET( osID, mask );
- // For fine granularity it is enough to set the first available osID for this core
- if( fine_gran) {
- break;
- }
- }
- }
- break;
- } else {
- core++;
- }
- }
- }
+ if (fine_gran) {
+ int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
+ KMP_CPU_SET(osID, mask);
+ } else {
+ for (int i = 0; i < __kmp_nth_per_core; i++) {
+ int osID;
+ osID = address2os[coreID * __kmp_nth_per_core + i].second;
+ KMP_CPU_SET(osID, mask);
+ }
+ }
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
+ KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+ __kmp_gettid(), tid, buf);
+ }
+ __kmp_set_system_affinity(mask, TRUE);
+ KMP_CPU_FREE_FROM_STACK(mask);
+ } else { // Non-uniform topology
- } else { // nthreads > ncores
+ kmp_affin_mask_t *mask;
+ KMP_CPU_ALLOC_ON_STACK(mask);
+ KMP_CPU_ZERO(mask);
- // Array to save the number of processors at each core
- int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores);
- // Array to save the number of cores with "x" available processors;
- int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
- // Array to save the number of cores with # procs from x to nth_per_core
- int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1));
-
- for( int i = 0; i <= nth_per_core; i++ ) {
- ncores_with_x_procs[ i ] = 0;
- ncores_with_x_to_max_procs[ i ] = 0;
- }
+ int core_level = __kmp_affinity_find_core_level(
+ address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
+ int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
+ __kmp_aff_depth - 1, core_level);
+ int nth_per_core = __kmp_affinity_max_proc_per_core(
+ address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
+
+ // For performance gain consider the special case nthreads ==
+ // __kmp_avail_proc
+ if (nthreads == __kmp_avail_proc) {
+ if (fine_gran) {
+ int osID = address2os[tid].second;
+ KMP_CPU_SET(osID, mask);
+ } else {
+ int core = __kmp_affinity_find_core(address2os, tid,
+ __kmp_aff_depth - 1, core_level);
+ for (int i = 0; i < __kmp_avail_proc; i++) {
+ int osID = address2os[i].second;
+ if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
+ core_level) == core) {
+ KMP_CPU_SET(osID, mask);
+ }
+ }
+ }
+ } else if (nthreads <= ncores) {
- for( int i = 0; i < ncores; i++ ) {
- int cnt = 0;
- for( int j = 0; j < nth_per_core; j++ ) {
- if( procarr[ i * nth_per_core + j ] != -1 ) {
- cnt++;
- }
+ int core = 0;
+ for (int i = 0; i < ncores; i++) {
+ // Check if this core from procarr[] is in the mask
+ int in_mask = 0;
+ for (int j = 0; j < nth_per_core; j++) {
+ if (procarr[i * nth_per_core + j] != -1) {
+ in_mask = 1;
+ break;
+ }
+ }
+ if (in_mask) {
+ if (tid == core) {
+ for (int j = 0; j < nth_per_core; j++) {
+ int osID = procarr[i * nth_per_core + j];
+ if (osID != -1) {
+ KMP_CPU_SET(osID, mask);
+ // For fine granularity it is enough to set the first available
+ // osID for this core
+ if (fine_gran) {
+ break;
}
- nproc_at_core[ i ] = cnt;
- ncores_with_x_procs[ cnt ]++;
+ }
}
+ break;
+ } else {
+ core++;
+ }
+ }
+ }
+ } else { // nthreads > ncores
+ // Array to save the number of processors at each core
+ int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores);
+ // Array to save the number of cores with "x" available processors;
+ int *ncores_with_x_procs =
+ (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
+ // Array to save the number of cores with # procs from x to nth_per_core
+ int *ncores_with_x_to_max_procs =
+ (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1));
+
+ for (int i = 0; i <= nth_per_core; i++) {
+ ncores_with_x_procs[i] = 0;
+ ncores_with_x_to_max_procs[i] = 0;
+ }
- for( int i = 0; i <= nth_per_core; i++ ) {
- for( int j = i; j <= nth_per_core; j++ ) {
- ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
- }
- }
+ for (int i = 0; i < ncores; i++) {
+ int cnt = 0;
+ for (int j = 0; j < nth_per_core; j++) {
+ if (procarr[i * nth_per_core + j] != -1) {
+ cnt++;
+ }
+ }
+ nproc_at_core[i] = cnt;
+ ncores_with_x_procs[cnt]++;
+ }
- // Max number of processors
- int nproc = nth_per_core * ncores;
- // An array to keep number of threads per each context
- int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc );
- for( int i = 0; i < nproc; i++ ) {
- newarr[ i ] = 0;
- }
+ for (int i = 0; i <= nth_per_core; i++) {
+ for (int j = i; j <= nth_per_core; j++) {
+ ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
+ }
+ }
- int nth = nthreads;
- int flag = 0;
- while( nth > 0 ) {
- for( int j = 1; j <= nth_per_core; j++ ) {
- int cnt = ncores_with_x_to_max_procs[ j ];
- for( int i = 0; i < ncores; i++ ) {
- // Skip the core with 0 processors
- if( nproc_at_core[ i ] == 0 ) {
- continue;
- }
- for( int k = 0; k < nth_per_core; k++ ) {
- if( procarr[ i * nth_per_core + k ] != -1 ) {
- if( newarr[ i * nth_per_core + k ] == 0 ) {
- newarr[ i * nth_per_core + k ] = 1;
- cnt--;
- nth--;
- break;
- } else {
- if( flag != 0 ) {
- newarr[ i * nth_per_core + k ] ++;
- cnt--;
- nth--;
- break;
- }
- }
- }
- }
- if( cnt == 0 || nth == 0 ) {
- break;
- }
- }
- if( nth == 0 ) {
- break;
- }
- }
- flag = 1;
- }
- int sum = 0;
- for( int i = 0; i < nproc; i++ ) {
- sum += newarr[ i ];
- if( sum > tid ) {
- if( fine_gran) {
- int osID = procarr[ i ];
- KMP_CPU_SET( osID, mask);
- } else {
- int coreID = i / nth_per_core;
- for( int ii = 0; ii < nth_per_core; ii++ ) {
- int osID = procarr[ coreID * nth_per_core + ii ];
- if( osID != -1 ) {
- KMP_CPU_SET( osID, mask);
- }
- }
- }
+ // Max number of processors
+ int nproc = nth_per_core * ncores;
+ // An array to keep number of threads per each context
+ int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc);
+ for (int i = 0; i < nproc; i++) {
+ newarr[i] = 0;
+ }
+
+ int nth = nthreads;
+ int flag = 0;
+ while (nth > 0) {
+ for (int j = 1; j <= nth_per_core; j++) {
+ int cnt = ncores_with_x_to_max_procs[j];
+ for (int i = 0; i < ncores; i++) {
+ // Skip the core with 0 processors
+ if (nproc_at_core[i] == 0) {
+ continue;
+ }
+ for (int k = 0; k < nth_per_core; k++) {
+ if (procarr[i * nth_per_core + k] != -1) {
+ if (newarr[i * nth_per_core + k] == 0) {
+ newarr[i * nth_per_core + k] = 1;
+ cnt--;
+ nth--;
+ break;
+ } else {
+ if (flag != 0) {
+ newarr[i * nth_per_core + k]++;
+ cnt--;
+ nth--;
break;
+ }
}
+ }
+ }
+ if (cnt == 0 || nth == 0) {
+ break;
}
- __kmp_free( newarr );
+ }
+ if (nth == 0) {
+ break;
+ }
}
-
- if (__kmp_affinity_verbose) {
- char buf[KMP_AFFIN_MASK_PRINT_LEN];
- __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
- __kmp_gettid(), tid, buf);
+ flag = 1;
+ }
+ int sum = 0;
+ for (int i = 0; i < nproc; i++) {
+ sum += newarr[i];
+ if (sum > tid) {
+ if (fine_gran) {
+ int osID = procarr[i];
+ KMP_CPU_SET(osID, mask);
+ } else {
+ int coreID = i / nth_per_core;
+ for (int ii = 0; ii < nth_per_core; ii++) {
+ int osID = procarr[coreID * nth_per_core + ii];
+ if (osID != -1) {
+ KMP_CPU_SET(osID, mask);
+ }
+ }
+ }
+ break;
}
- __kmp_set_system_affinity( mask, TRUE );
- KMP_CPU_FREE_FROM_STACK(mask);
+ }
+ __kmp_free(newarr);
}
+
+ if (__kmp_affinity_verbose) {
+ char buf[KMP_AFFIN_MASK_PRINT_LEN];
+ __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
+ KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+ __kmp_gettid(), tid, buf);
+ }
+ __kmp_set_system_affinity(mask, TRUE);
+ KMP_CPU_FREE_FROM_STACK(mask);
+ }
}
#if KMP_OS_LINUX
@@ -5451,28 +5004,29 @@ void __kmp_balanced_affinity( int tid, i
#ifdef __cplusplus
extern "C"
#endif
-int
-kmp_set_thread_affinity_mask_initial()
+ int
+ kmp_set_thread_affinity_mask_initial()
// the function returns 0 on success,
// -1 if we cannot bind thread
// >0 (errno) if an error happened during binding
{
- int gtid = __kmp_get_gtid();
- if (gtid < 0) {
- // Do not touch non-omp threads
- KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
- "non-omp thread, returning\n"));
- return -1;
- }
- if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
- KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
- "affinity not initialized, returning\n"));
- return -1;
- }
- KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: "
- "set full mask for thread %d\n", gtid));
- KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
- return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
+ int gtid = __kmp_get_gtid();
+ if (gtid < 0) {
+ // Do not touch non-omp threads
+ KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
+ "non-omp thread, returning\n"));
+ return -1;
+ }
+ if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
+ KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
+ "affinity not initialized, returning\n"));
+ return -1;
+ }
+ KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: "
+ "set full mask for thread %d\n",
+ gtid));
+ KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
+ return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
}
#endif
Modified: openmp/trunk/runtime/src/kmp_affinity.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_affinity.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_affinity.h (original)
+++ openmp/trunk/runtime/src/kmp_affinity.h Fri May 12 13:01:32 2017
@@ -12,765 +12,827 @@
//
//===----------------------------------------------------------------------===//
+
#ifndef KMP_AFFINITY_H
#define KMP_AFFINITY_H
-#include "kmp_os.h"
#include "kmp.h"
+#include "kmp_os.h"
#if KMP_AFFINITY_SUPPORTED
#if KMP_USE_HWLOC
-class KMPHwlocAffinity: public KMPAffinity {
+class KMPHwlocAffinity : public KMPAffinity {
public:
- class Mask : public KMPAffinity::Mask {
- hwloc_cpuset_t mask;
- public:
- Mask() { mask = hwloc_bitmap_alloc(); this->zero(); }
- ~Mask() { hwloc_bitmap_free(mask); }
- void set(int i) override { hwloc_bitmap_set(mask, i); }
- bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
- void clear(int i) override { hwloc_bitmap_clr(mask, i); }
- void zero() override { hwloc_bitmap_zero(mask); }
- void copy(const KMPAffinity::Mask* src) override {
- const Mask* convert = static_cast<const Mask*>(src);
- hwloc_bitmap_copy(mask, convert->mask);
- }
- void bitwise_and(const KMPAffinity::Mask* rhs) override {
- const Mask* convert = static_cast<const Mask*>(rhs);
- hwloc_bitmap_and(mask, mask, convert->mask);
- }
- void bitwise_or(const KMPAffinity::Mask * rhs) override {
- const Mask* convert = static_cast<const Mask*>(rhs);
- hwloc_bitmap_or(mask, mask, convert->mask);
- }
- void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
- int begin() const override { return hwloc_bitmap_first(mask); }
- int end() const override { return -1; }
- int next(int previous) const override { return hwloc_bitmap_next(mask, previous); }
- int get_system_affinity(bool abort_on_error) override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
- int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
- }
- return error;
- }
- int set_system_affinity(bool abort_on_error) const override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
- int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
- }
- return error;
- }
- int get_proc_group() const override {
- int i;
- int group = -1;
-# if KMP_OS_WINDOWS
- if (__kmp_num_proc_groups == 1) {
- return 1;
- }
- for (i = 0; i < __kmp_num_proc_groups; i++) {
- // On windows, the long type is always 32 bits
- unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2);
- unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1);
- if (first_32_bits == 0 && second_32_bits == 0) {
- continue;
- }
- if (group >= 0) {
- return -1;
- }
- group = i;
- }
-# endif /* KMP_OS_WINDOWS */
- return group;
- }
- };
- void determine_capable(const char* var) override {
- const hwloc_topology_support* topology_support;
- if(__kmp_hwloc_topology == NULL) {
- if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
- __kmp_hwloc_error = TRUE;
- if(__kmp_affinity_verbose)
- KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
- }
- if(hwloc_topology_load(__kmp_hwloc_topology) < 0) {
- __kmp_hwloc_error = TRUE;
- if(__kmp_affinity_verbose)
- KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
- }
+ class Mask : public KMPAffinity::Mask {
+ hwloc_cpuset_t mask;
+
+ public:
+ Mask() {
+ mask = hwloc_bitmap_alloc();
+ this->zero();
+ }
+ ~Mask() { hwloc_bitmap_free(mask); }
+ void set(int i) override { hwloc_bitmap_set(mask, i); }
+ bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
+ void clear(int i) override { hwloc_bitmap_clr(mask, i); }
+ void zero() override { hwloc_bitmap_zero(mask); }
+ void copy(const KMPAffinity::Mask *src) override {
+ const Mask *convert = static_cast<const Mask *>(src);
+ hwloc_bitmap_copy(mask, convert->mask);
+ }
+ void bitwise_and(const KMPAffinity::Mask *rhs) override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ hwloc_bitmap_and(mask, mask, convert->mask);
+ }
+ void bitwise_or(const KMPAffinity::Mask *rhs) override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ hwloc_bitmap_or(mask, mask, convert->mask);
+ }
+ void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
+ int begin() const override { return hwloc_bitmap_first(mask); }
+ int end() const override { return -1; }
+ int next(int previous) const override {
+ return hwloc_bitmap_next(mask, previous);
+ }
+ int get_system_affinity(bool abort_on_error) override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval =
+ hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
+ __kmp_msg_null);
+ }
+ return error;
+ }
+ int set_system_affinity(bool abort_on_error) const override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval =
+ hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
+ __kmp_msg_null);
+ }
+ return error;
+ }
+ int get_proc_group() const override {
+ int i;
+ int group = -1;
+#if KMP_OS_WINDOWS
+ if (__kmp_num_proc_groups == 1) {
+ return 1;
+ }
+ for (i = 0; i < __kmp_num_proc_groups; i++) {
+ // On windows, the long type is always 32 bits
+ unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
+ unsigned long second_32_bits =
+ hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
+ if (first_32_bits == 0 && second_32_bits == 0) {
+ continue;
}
- topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
- // Is the system capable of setting/getting this thread's affinity?
- // also, is topology discovery possible? (pu indicates ability to discover processing units)
- // and finally, were there no errors when calling any hwloc_* API functions?
- if(topology_support && topology_support->cpubind->set_thisthread_cpubind &&
- topology_support->cpubind->get_thisthread_cpubind &&
- topology_support->discovery->pu &&
- !__kmp_hwloc_error)
- {
- // enables affinity according to KMP_AFFINITY_CAPABLE() macro
- KMP_AFFINITY_ENABLE(TRUE);
- } else {
- // indicate that hwloc didn't work and disable affinity
- __kmp_hwloc_error = TRUE;
- KMP_AFFINITY_DISABLE();
+ if (group >= 0) {
+ return -1;
}
+ group = i;
+ }
+#endif /* KMP_OS_WINDOWS */
+ return group;
}
- void bind_thread(int which) override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal set affinity operation when not capable");
- KMPAffinity::Mask *mask;
- KMP_CPU_ALLOC_ON_STACK(mask);
- KMP_CPU_ZERO(mask);
- KMP_CPU_SET(which, mask);
- __kmp_set_system_affinity(mask, TRUE);
- KMP_CPU_FREE_FROM_STACK(mask);
- }
- KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
- void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
- KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
- void deallocate_mask_array(KMPAffinity::Mask* array) override {
- Mask* hwloc_array = static_cast<Mask*>(array);
- delete[] hwloc_array;
- }
- KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
- Mask* hwloc_array = static_cast<Mask*>(array);
- return &(hwloc_array[index]);
- }
- api_type get_api_type() const override { return HWLOC; }
+ };
+ void determine_capable(const char *var) override {
+ const hwloc_topology_support *topology_support;
+ if (__kmp_hwloc_topology == NULL) {
+ if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
+ __kmp_hwloc_error = TRUE;
+ if (__kmp_affinity_verbose)
+ KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
+ }
+ if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
+ __kmp_hwloc_error = TRUE;
+ if (__kmp_affinity_verbose)
+ KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
+ }
+ }
+ topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
+ // Is the system capable of setting/getting this thread's affinity?
+ // Also, is topology discovery possible? (pu indicates ability to discover
+ // processing units). And finally, were there no errors when calling any
+ // hwloc_* API functions?
+ if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
+ topology_support->cpubind->get_thisthread_cpubind &&
+ topology_support->discovery->pu && !__kmp_hwloc_error) {
+ // enables affinity according to KMP_AFFINITY_CAPABLE() macro
+ KMP_AFFINITY_ENABLE(TRUE);
+ } else {
+ // indicate that hwloc didn't work and disable affinity
+ __kmp_hwloc_error = TRUE;
+ KMP_AFFINITY_DISABLE();
+ }
+ }
+ void bind_thread(int which) override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal set affinity operation when not capable");
+ KMPAffinity::Mask *mask;
+ KMP_CPU_ALLOC_ON_STACK(mask);
+ KMP_CPU_ZERO(mask);
+ KMP_CPU_SET(which, mask);
+ __kmp_set_system_affinity(mask, TRUE);
+ KMP_CPU_FREE_FROM_STACK(mask);
+ }
+ KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
+ void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
+ KMPAffinity::Mask *allocate_mask_array(int num) override {
+ return new Mask[num];
+ }
+ void deallocate_mask_array(KMPAffinity::Mask *array) override {
+ Mask *hwloc_array = static_cast<Mask *>(array);
+ delete[] hwloc_array;
+ }
+ KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
+ int index) override {
+ Mask *hwloc_array = static_cast<Mask *>(array);
+ return &(hwloc_array[index]);
+ }
+ api_type get_api_type() const override { return HWLOC; }
};
#endif /* KMP_USE_HWLOC */
#if KMP_OS_LINUX
-/*
- * On some of the older OS's that we build on, these constants aren't present
- * in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
- * all systems of the same arch where they are defined, and they cannot change.
- * stone forever.
- */
+/* On some of the older OS's that we build on, these constants aren't present
+ in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
+ all systems of the same arch where they are defined, and they cannot change.
+ stone forever. */
#include <sys/syscall.h>
-# if KMP_ARCH_X86 || KMP_ARCH_ARM
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 241
-# elif __NR_sched_setaffinity != 241
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 242
-# elif __NR_sched_getaffinity != 242
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-# elif KMP_ARCH_AARCH64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 122
-# elif __NR_sched_setaffinity != 122
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 123
-# elif __NR_sched_getaffinity != 123
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-# elif KMP_ARCH_X86_64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 203
-# elif __NR_sched_setaffinity != 203
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 204
-# elif __NR_sched_getaffinity != 204
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-# elif KMP_ARCH_PPC64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 222
-# elif __NR_sched_setaffinity != 222
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 223
-# elif __NR_sched_getaffinity != 223
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-# elif KMP_ARCH_MIPS
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 4239
-# elif __NR_sched_setaffinity != 4239
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 4240
-# elif __NR_sched_getaffinity != 4240
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-# elif KMP_ARCH_MIPS64
-# ifndef __NR_sched_setaffinity
-# define __NR_sched_setaffinity 5195
-# elif __NR_sched_setaffinity != 5195
-# error Wrong code for setaffinity system call.
-# endif /* __NR_sched_setaffinity */
-# ifndef __NR_sched_getaffinity
-# define __NR_sched_getaffinity 5196
-# elif __NR_sched_getaffinity != 5196
-# error Wrong code for getaffinity system call.
-# endif /* __NR_sched_getaffinity */
-# error Unknown or unsupported architecture
-# endif /* KMP_ARCH_* */
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 241
+#elif __NR_sched_setaffinity != 241
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 242
+#elif __NR_sched_getaffinity != 242
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_AARCH64
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 122
+#elif __NR_sched_setaffinity != 122
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 123
+#elif __NR_sched_getaffinity != 123
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_X86_64
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 203
+#elif __NR_sched_setaffinity != 203
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 204
+#elif __NR_sched_getaffinity != 204
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_PPC64
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 222
+#elif __NR_sched_setaffinity != 222
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 223
+#elif __NR_sched_getaffinity != 223
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_MIPS
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 4239
+#elif __NR_sched_setaffinity != 4239
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 4240
+#elif __NR_sched_getaffinity != 4240
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
+#elif KMP_ARCH_MIPS64
+#ifndef __NR_sched_setaffinity
+#define __NR_sched_setaffinity 5195
+#elif __NR_sched_setaffinity != 5195
+#error Wrong code for setaffinity system call.
+#endif /* __NR_sched_setaffinity */
+#ifndef __NR_sched_getaffinity
+#define __NR_sched_getaffinity 5196
+#elif __NR_sched_getaffinity != 5196
+#error Wrong code for getaffinity system call.
+#endif /* __NR_sched_getaffinity */
+#error Unknown or unsupported architecture
+#endif /* KMP_ARCH_* */
class KMPNativeAffinity : public KMPAffinity {
- class Mask : public KMPAffinity::Mask {
- typedef unsigned char mask_t;
- static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
- public:
- mask_t* mask;
- Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); }
- ~Mask() { if (mask) __kmp_free(mask); }
- void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
- bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
- void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
- void zero() override {
- for (size_t i=0; i<__kmp_affin_mask_size; ++i)
- mask[i] = 0;
- }
- void copy(const KMPAffinity::Mask* src) override {
- const Mask * convert = static_cast<const Mask*>(src);
- for (size_t i=0; i<__kmp_affin_mask_size; ++i)
- mask[i] = convert->mask[i];
- }
- void bitwise_and(const KMPAffinity::Mask* rhs) override {
- const Mask * convert = static_cast<const Mask*>(rhs);
- for (size_t i=0; i<__kmp_affin_mask_size; ++i)
- mask[i] &= convert->mask[i];
- }
- void bitwise_or(const KMPAffinity::Mask* rhs) override {
- const Mask * convert = static_cast<const Mask*>(rhs);
- for (size_t i=0; i<__kmp_affin_mask_size; ++i)
- mask[i] |= convert->mask[i];
- }
- void bitwise_not() override {
- for (size_t i=0; i<__kmp_affin_mask_size; ++i)
- mask[i] = ~(mask[i]);
- }
- int begin() const override {
- int retval = 0;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; }
- int next(int previous) const override {
- int retval = previous+1;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int get_system_affinity(bool abort_on_error) override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
- int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask );
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
- }
- return error;
- }
- int set_system_affinity(bool abort_on_error) const override {
- KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
- "Illegal get affinity operation when not capable");
- int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask );
- if (retval >= 0) {
- return 0;
- }
- int error = errno;
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null);
- }
- return error;
- }
- };
- void determine_capable(const char* env_var) override {
- __kmp_affinity_determine_capable(env_var);
- }
- void bind_thread(int which) override {
- __kmp_affinity_bind_thread(which);
- }
- KMPAffinity::Mask* allocate_mask() override {
- KMPNativeAffinity::Mask* retval = new Mask();
- return retval;
- }
- void deallocate_mask(KMPAffinity::Mask* m) override {
- KMPNativeAffinity::Mask* native_mask = static_cast<KMPNativeAffinity::Mask*>(m);
- delete m;
- }
- KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
- void deallocate_mask_array(KMPAffinity::Mask* array) override {
- Mask* linux_array = static_cast<Mask*>(array);
- delete[] linux_array;
- }
- KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
- Mask* linux_array = static_cast<Mask*>(array);
- return &(linux_array[index]);
- }
- api_type get_api_type() const override { return NATIVE_OS; }
+ class Mask : public KMPAffinity::Mask {
+ typedef unsigned char mask_t;
+ static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
+
+ public:
+ mask_t *mask;
+ Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
+ ~Mask() {
+ if (mask)
+ __kmp_free(mask);
+ }
+ void set(int i) override {
+ mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
+ }
+ bool is_set(int i) const override {
+ return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
+ }
+ void clear(int i) override {
+ mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
+ }
+ void zero() override {
+ for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
+ mask[i] = 0;
+ }
+ void copy(const KMPAffinity::Mask *src) override {
+ const Mask *convert = static_cast<const Mask *>(src);
+ for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
+ mask[i] = convert->mask[i];
+ }
+ void bitwise_and(const KMPAffinity::Mask *rhs) override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
+ mask[i] &= convert->mask[i];
+ }
+ void bitwise_or(const KMPAffinity::Mask *rhs) override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
+ mask[i] |= convert->mask[i];
+ }
+ void bitwise_not() override {
+ for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
+ mask[i] = ~(mask[i]);
+ }
+ int begin() const override {
+ int retval = 0;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
+ int next(int previous) const override {
+ int retval = previous + 1;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int get_system_affinity(bool abort_on_error) override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval =
+ syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
+ __kmp_msg_null);
+ }
+ return error;
+ }
+ int set_system_affinity(bool abort_on_error) const override {
+ KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
+ "Illegal get affinity operation when not capable");
+ int retval =
+ syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
+ if (retval >= 0) {
+ return 0;
+ }
+ int error = errno;
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
+ __kmp_msg_null);
+ }
+ return error;
+ }
+ };
+ void determine_capable(const char *env_var) override {
+ __kmp_affinity_determine_capable(env_var);
+ }
+ void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
+ KMPAffinity::Mask *allocate_mask() override {
+ KMPNativeAffinity::Mask *retval = new Mask();
+ return retval;
+ }
+ void deallocate_mask(KMPAffinity::Mask *m) override {
+ KMPNativeAffinity::Mask *native_mask =
+ static_cast<KMPNativeAffinity::Mask *>(m);
+ delete m;
+ }
+ KMPAffinity::Mask *allocate_mask_array(int num) override {
+ return new Mask[num];
+ }
+ void deallocate_mask_array(KMPAffinity::Mask *array) override {
+ Mask *linux_array = static_cast<Mask *>(array);
+ delete[] linux_array;
+ }
+ KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
+ int index) override {
+ Mask *linux_array = static_cast<Mask *>(array);
+ return &(linux_array[index]);
+ }
+ api_type get_api_type() const override { return NATIVE_OS; }
};
#endif /* KMP_OS_LINUX */
#if KMP_OS_WINDOWS
class KMPNativeAffinity : public KMPAffinity {
- class Mask : public KMPAffinity::Mask {
- typedef ULONG_PTR mask_t;
- static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT;
- mask_t* mask;
- public:
- Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); }
- ~Mask() { if (mask) __kmp_free(mask); }
- void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); }
- bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); }
- void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); }
- void zero() override {
- for (size_t i=0; i<__kmp_num_proc_groups; ++i)
- mask[i] = 0;
- }
- void copy(const KMPAffinity::Mask* src) override {
- const Mask * convert = static_cast<const Mask*>(src);
- for (size_t i=0; i<__kmp_num_proc_groups; ++i)
- mask[i] = convert->mask[i];
- }
- void bitwise_and(const KMPAffinity::Mask* rhs) override {
- const Mask * convert = static_cast<const Mask*>(rhs);
- for (size_t i=0; i<__kmp_num_proc_groups; ++i)
- mask[i] &= convert->mask[i];
- }
- void bitwise_or(const KMPAffinity::Mask* rhs) override {
- const Mask * convert = static_cast<const Mask*>(rhs);
- for (size_t i=0; i<__kmp_num_proc_groups; ++i)
- mask[i] |= convert->mask[i];
- }
- void bitwise_not() override {
- for (size_t i=0; i<__kmp_num_proc_groups; ++i)
- mask[i] = ~(mask[i]);
- }
- int begin() const override {
- int retval = 0;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; }
- int next(int previous) const override {
- int retval = previous+1;
- while (retval < end() && !is_set(retval))
- ++retval;
- return retval;
- }
- int set_system_affinity(bool abort_on_error) const override {
- if (__kmp_num_proc_groups > 1) {
- // Check for a valid mask.
- GROUP_AFFINITY ga;
- int group = get_proc_group();
- if (group < 0) {
- if (abort_on_error) {
- KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
- }
- return -1;
- }
- // Transform the bit vector into a GROUP_AFFINITY struct
- // and make the system call to set affinity.
- ga.Group = group;
- ga.Mask = mask[group];
- ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
-
- KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
- if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
- KMP_ERR( error ), __kmp_msg_null);
- }
- return error;
- }
- } else {
- if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ),
- KMP_ERR( error ), __kmp_msg_null);
- }
- return error;
- }
- }
- return 0;
- }
- int get_system_affinity(bool abort_on_error) override {
- if (__kmp_num_proc_groups > 1) {
- this->zero();
- GROUP_AFFINITY ga;
- KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
- if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) {
- return -1;
- }
- mask[ga.Group] = ga.Mask;
- } else {
- mask_t newMask, sysMask, retval;
- if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
- if (! retval) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- return error;
- }
- newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
- if (! newMask) {
- DWORD error = GetLastError();
- if (abort_on_error) {
- __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
- KMP_ERR(error), __kmp_msg_null);
- }
- }
- *mask = retval;
- }
- return 0;
- }
- int get_proc_group() const override {
- int group = -1;
- if (__kmp_num_proc_groups == 1) {
- return 1;
- }
- for (int i = 0; i < __kmp_num_proc_groups; i++) {
- if (mask[i] == 0)
- continue;
- if (group >= 0)
- return -1;
- group = i;
- }
- return group;
- }
- };
- void determine_capable(const char* env_var) override {
- __kmp_affinity_determine_capable(env_var);
- }
- void bind_thread(int which) override {
- __kmp_affinity_bind_thread(which);
- }
- KMPAffinity::Mask* allocate_mask() override { return new Mask(); }
- void deallocate_mask(KMPAffinity::Mask* m) override { delete m; }
- KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; }
- void deallocate_mask_array(KMPAffinity::Mask* array) override {
- Mask* windows_array = static_cast<Mask*>(array);
- delete[] windows_array;
- }
- KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override {
- Mask* windows_array = static_cast<Mask*>(array);
- return &(windows_array[index]);
- }
- api_type get_api_type() const override { return NATIVE_OS; }
+ class Mask : public KMPAffinity::Mask {
+ typedef ULONG_PTR mask_t;
+ static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
+ mask_t *mask;
+
+ public:
+ Mask() {
+ mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
+ }
+ ~Mask() {
+ if (mask)
+ __kmp_free(mask);
+ }
+ void set(int i) override {
+ mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
+ }
+ bool is_set(int i) const override {
+ return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
+ }
+ void clear(int i) override {
+ mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
+ }
+ void zero() override {
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ mask[i] = 0;
+ }
+ void copy(const KMPAffinity::Mask *src) override {
+ const Mask *convert = static_cast<const Mask *>(src);
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ mask[i] = convert->mask[i];
+ }
+ void bitwise_and(const KMPAffinity::Mask *rhs) override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ mask[i] &= convert->mask[i];
+ }
+ void bitwise_or(const KMPAffinity::Mask *rhs) override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ mask[i] |= convert->mask[i];
+ }
+ void bitwise_not() override {
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ mask[i] = ~(mask[i]);
+ }
+ int begin() const override {
+ int retval = 0;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
+ int next(int previous) const override {
+ int retval = previous + 1;
+ while (retval < end() && !is_set(retval))
+ ++retval;
+ return retval;
+ }
+ int set_system_affinity(bool abort_on_error) const override {
+ if (__kmp_num_proc_groups > 1) {
+ // Check for a valid mask.
+ GROUP_AFFINITY ga;
+ int group = get_proc_group();
+ if (group < 0) {
+ if (abort_on_error) {
+ KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
+ }
+ return -1;
+ }
+ // Transform the bit vector into a GROUP_AFFINITY struct
+ // and make the system call to set affinity.
+ ga.Group = group;
+ ga.Mask = mask[group];
+ ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
+
+ KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
+ if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ } else {
+ if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ }
+ return 0;
+ }
+ int get_system_affinity(bool abort_on_error) override {
+ if (__kmp_num_proc_groups > 1) {
+ this->zero();
+ GROUP_AFFINITY ga;
+ KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
+ if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal,
+ KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
+ (ga.Mask == 0)) {
+ return -1;
+ }
+ mask[ga.Group] = ga.Mask;
+ } else {
+ mask_t newMask, sysMask, retval;
+ if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal,
+ KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
+ if (!retval) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal,
+ KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ return error;
+ }
+ newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
+ if (!newMask) {
+ DWORD error = GetLastError();
+ if (abort_on_error) {
+ __kmp_msg(kmp_ms_fatal,
+ KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
+ KMP_ERR(error), __kmp_msg_null);
+ }
+ }
+ *mask = retval;
+ }
+ return 0;
+ }
+ int get_proc_group() const override {
+ int group = -1;
+ if (__kmp_num_proc_groups == 1) {
+ return 1;
+ }
+ for (int i = 0; i < __kmp_num_proc_groups; i++) {
+ if (mask[i] == 0)
+ continue;
+ if (group >= 0)
+ return -1;
+ group = i;
+ }
+ return group;
+ }
+ };
+ void determine_capable(const char *env_var) override {
+ __kmp_affinity_determine_capable(env_var);
+ }
+ void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
+ KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
+ void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
+ KMPAffinity::Mask *allocate_mask_array(int num) override {
+ return new Mask[num];
+ }
+ void deallocate_mask_array(KMPAffinity::Mask *array) override {
+ Mask *windows_array = static_cast<Mask *>(array);
+ delete[] windows_array;
+ }
+ KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
+ int index) override {
+ Mask *windows_array = static_cast<Mask *>(array);
+ return &(windows_array[index]);
+ }
+ api_type get_api_type() const override { return NATIVE_OS; }
};
#endif /* KMP_OS_WINDOWS */
#endif /* KMP_AFFINITY_SUPPORTED */
class Address {
public:
- static const unsigned maxDepth = 32;
- unsigned labels[maxDepth];
- unsigned childNums[maxDepth];
- unsigned depth;
- unsigned leader;
- Address(unsigned _depth)
- : depth(_depth), leader(FALSE) {
- }
- Address &operator=(const Address &b) {
- depth = b.depth;
- for (unsigned i = 0; i < depth; i++) {
- labels[i] = b.labels[i];
- childNums[i] = b.childNums[i];
- }
- leader = FALSE;
- return *this;
- }
- bool operator==(const Address &b) const {
- if (depth != b.depth)
- return false;
- for (unsigned i = 0; i < depth; i++)
- if(labels[i] != b.labels[i])
- return false;
- return true;
- }
- bool isClose(const Address &b, int level) const {
- if (depth != b.depth)
- return false;
- if ((unsigned)level >= depth)
- return true;
- for (unsigned i = 0; i < (depth - level); i++)
- if(labels[i] != b.labels[i])
- return false;
- return true;
- }
- bool operator!=(const Address &b) const {
- return !operator==(b);
- }
- void print() const {
- unsigned i;
- printf("Depth: %u --- ", depth);
- for(i=0;i<depth;i++) {
- printf("%u ", labels[i]);
- }
+ static const unsigned maxDepth = 32;
+ unsigned labels[maxDepth];
+ unsigned childNums[maxDepth];
+ unsigned depth;
+ unsigned leader;
+ Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
+ Address &operator=(const Address &b) {
+ depth = b.depth;
+ for (unsigned i = 0; i < depth; i++) {
+ labels[i] = b.labels[i];
+ childNums[i] = b.childNums[i];
+ }
+ leader = FALSE;
+ return *this;
+ }
+ bool operator==(const Address &b) const {
+ if (depth != b.depth)
+ return false;
+ for (unsigned i = 0; i < depth; i++)
+ if (labels[i] != b.labels[i])
+ return false;
+ return true;
+ }
+ bool isClose(const Address &b, int level) const {
+ if (depth != b.depth)
+ return false;
+ if ((unsigned)level >= depth)
+ return true;
+ for (unsigned i = 0; i < (depth - level); i++)
+ if (labels[i] != b.labels[i])
+ return false;
+ return true;
+ }
+ bool operator!=(const Address &b) const { return !operator==(b); }
+ void print() const {
+ unsigned i;
+ printf("Depth: %u --- ", depth);
+ for (i = 0; i < depth; i++) {
+ printf("%u ", labels[i]);
}
+ }
};
class AddrUnsPair {
public:
- Address first;
- unsigned second;
- AddrUnsPair(Address _first, unsigned _second)
- : first(_first), second(_second) {
- }
- AddrUnsPair &operator=(const AddrUnsPair &b)
- {
- first = b.first;
- second = b.second;
- return *this;
- }
- void print() const {
- printf("first = "); first.print();
- printf(" --- second = %u", second);
- }
- bool operator==(const AddrUnsPair &b) const {
- if(first != b.first) return false;
- if(second != b.second) return false;
- return true;
- }
- bool operator!=(const AddrUnsPair &b) const {
- return !operator==(b);
- }
+ Address first;
+ unsigned second;
+ AddrUnsPair(Address _first, unsigned _second)
+ : first(_first), second(_second) {}
+ AddrUnsPair &operator=(const AddrUnsPair &b) {
+ first = b.first;
+ second = b.second;
+ return *this;
+ }
+ void print() const {
+ printf("first = ");
+ first.print();
+ printf(" --- second = %u", second);
+ }
+ bool operator==(const AddrUnsPair &b) const {
+ if (first != b.first)
+ return false;
+ if (second != b.second)
+ return false;
+ return true;
+ }
+ bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
};
-
-static int
-__kmp_affinity_cmp_Address_labels(const void *a, const void *b)
-{
- const Address *aa = (const Address *)&(((AddrUnsPair *)a)
- ->first);
- const Address *bb = (const Address *)&(((AddrUnsPair *)b)
- ->first);
- unsigned depth = aa->depth;
- unsigned i;
- KMP_DEBUG_ASSERT(depth == bb->depth);
- for (i = 0; i < depth; i++) {
- if (aa->labels[i] < bb->labels[i]) return -1;
- if (aa->labels[i] > bb->labels[i]) return 1;
- }
- return 0;
+static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
+ const Address *aa = (const Address *)&(((AddrUnsPair *)a)->first);
+ const Address *bb = (const Address *)&(((AddrUnsPair *)b)->first);
+ unsigned depth = aa->depth;
+ unsigned i;
+ KMP_DEBUG_ASSERT(depth == bb->depth);
+ for (i = 0; i < depth; i++) {
+ if (aa->labels[i] < bb->labels[i])
+ return -1;
+ if (aa->labels[i] > bb->labels[i])
+ return 1;
+ }
+ return 0;
}
-
-/** A structure for holding machine-specific hierarchy info to be computed once at init.
- This structure represents a mapping of threads to the actual machine hierarchy, or to
- our best guess at what the hierarchy might be, for the purpose of performing an
- efficient barrier. In the worst case, when there is no machine hierarchy information,
- it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */
+/* A structure for holding machine-specific hierarchy info to be computed once
+ at init. This structure represents a mapping of threads to the actual machine
+ hierarchy, or to our best guess at what the hierarchy might be, for the
+ purpose of performing an efficient barrier. In the worst case, when there is
+ no machine hierarchy information, it produces a tree suitable for a barrier,
+ similar to the tree used in the hyper barrier. */
class hierarchy_info {
public:
- /** Good default values for number of leaves and branching factor, given no affinity information.
- Behaves a bit like hyper barrier. */
- static const kmp_uint32 maxLeaves=4;
- static const kmp_uint32 minBranch=4;
- /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package
- or socket, packages/node, nodes/machine, etc. We don't want to get specific with
- nomenclature. When the machine is oversubscribed we add levels to duplicate the
- hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */
- kmp_uint32 maxLevels;
-
- /** This is specifically the depth of the machine configuration hierarchy, in terms of the
- number of levels along the longest path from root to any leaf. It corresponds to the
- number of entries in numPerLevel if we exclude all but one trailing 1. */
- kmp_uint32 depth;
- kmp_uint32 base_num_threads;
- enum init_status { initialized=0, not_initialized=1, initializing=2 };
- volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
- volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
-
- /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
- node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
- and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
- kmp_uint32 *numPerLevel;
- kmp_uint32 *skipPerLevel;
-
- void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
- int hier_depth = adr2os[0].first.depth;
- int level = 0;
- for (int i=hier_depth-1; i>=0; --i) {
- int max = -1;
- for (int j=0; j<num_addrs; ++j) {
- int next = adr2os[j].first.childNums[i];
- if (next > max) max = next;
- }
- numPerLevel[level] = max+1;
- ++level;
- }
- }
-
- hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
-
- void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
-
- void init(AddrUnsPair *adr2os, int num_addrs)
- {
- kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
- if (bool_result == 0) { // Wait for initialization
- while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
- return;
- }
- KMP_DEBUG_ASSERT(bool_result==1);
-
- /* Added explicit initialization of the data fields here to prevent usage of dirty value
- observed when static library is re-initialized multiple times (e.g. when
- non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
- depth = 1;
- resizing = 0;
- maxLevels = 7;
- numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
- skipPerLevel = &(numPerLevel[maxLevels]);
- for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- skipPerLevel[i] = 1;
- }
+ /* Good default values for number of leaves and branching factor, given no
+ affinity information. Behaves a bit like hyper barrier. */
+ static const kmp_uint32 maxLeaves = 4;
+ static const kmp_uint32 minBranch = 4;
+ /** Number of levels in the hierarchy. Typical levels are threads/core,
+ cores/package or socket, packages/node, nodes/machine, etc. We don't want
+ to get specific with nomenclature. When the machine is oversubscribed we
+ add levels to duplicate the hierarchy, doubling the thread capacity of the
+ hierarchy each time we add a level. */
+ kmp_uint32 maxLevels;
+
+ /** This is specifically the depth of the machine configuration hierarchy, in
+ terms of the number of levels along the longest path from root to any
+ leaf. It corresponds to the number of entries in numPerLevel if we exclude
+ all but one trailing 1. */
+ kmp_uint32 depth;
+ kmp_uint32 base_num_threads;
+ enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
+ volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
+ // 2=initialization in progress
+ volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
+
+ /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children
+ the parent of a node at level i has. For example, if we have a machine
+ with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel =
+ {2, 4, 4, 1, 1}. All empty levels are set to 1. */
+ kmp_uint32 *numPerLevel;
+ kmp_uint32 *skipPerLevel;
+
+ void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
+ int hier_depth = adr2os[0].first.depth;
+ int level = 0;
+ for (int i = hier_depth - 1; i >= 0; --i) {
+ int max = -1;
+ for (int j = 0; j < num_addrs; ++j) {
+ int next = adr2os[j].first.childNums[i];
+ if (next > max)
+ max = next;
+ }
+ numPerLevel[level] = max + 1;
+ ++level;
+ }
+ }
+
+ hierarchy_info()
+ : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
+
+ void fini() {
+ if (!uninitialized && numPerLevel)
+ __kmp_free(numPerLevel);
+ }
+
+ void init(AddrUnsPair *adr2os, int num_addrs) {
+ kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
+ &uninitialized, not_initialized, initializing);
+ if (bool_result == 0) { // Wait for initialization
+ while (TCR_1(uninitialized) != initialized)
+ KMP_CPU_PAUSE();
+ return;
+ }
+ KMP_DEBUG_ASSERT(bool_result == 1);
+
+ /* Added explicit initialization of the data fields here to prevent usage of
+ dirty value observed when static library is re-initialized multiple times
+ (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
+ OpenMP). */
+ depth = 1;
+ resizing = 0;
+ maxLevels = 7;
+ numPerLevel =
+ (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
+ skipPerLevel = &(numPerLevel[maxLevels]);
+ for (kmp_uint32 i = 0; i < maxLevels;
+ ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ skipPerLevel[i] = 1;
+ }
+
+ // Sort table by physical ID
+ if (adr2os) {
+ qsort(adr2os, num_addrs, sizeof(*adr2os),
+ __kmp_affinity_cmp_Address_labels);
+ deriveLevels(adr2os, num_addrs);
+ } else {
+ numPerLevel[0] = maxLeaves;
+ numPerLevel[1] = num_addrs / maxLeaves;
+ if (num_addrs % maxLeaves)
+ numPerLevel[1]++;
+ }
+
+ base_num_threads = num_addrs;
+ for (int i = maxLevels - 1; i >= 0;
+ --i) // count non-empty levels to get depth
+ if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
+ depth++;
+
+ kmp_uint32 branch = minBranch;
+ if (numPerLevel[0] == 1)
+ branch = num_addrs / maxLeaves;
+ if (branch < minBranch)
+ branch = minBranch;
+ for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
+ while (numPerLevel[d] > branch ||
+ (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
+ if (numPerLevel[d] & 1)
+ numPerLevel[d]++;
+ numPerLevel[d] = numPerLevel[d] >> 1;
+ if (numPerLevel[d + 1] == 1)
+ depth++;
+ numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
+ }
+ if (numPerLevel[0] == 1) {
+ branch = branch >> 1;
+ if (branch < 4)
+ branch = minBranch;
+ }
+ }
+
+ for (kmp_uint32 i = 1; i < depth; ++i)
+ skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
+ // Fill in hierarchy in the case of oversubscription
+ for (kmp_uint32 i = depth; i < maxLevels; ++i)
+ skipPerLevel[i] = 2 * skipPerLevel[i - 1];
+
+ uninitialized = initialized; // One writer
+
+ }
+
+ // Resize the hierarchy if nproc changes to something larger than before
+ void resize(kmp_uint32 nproc) {
+ kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+ while (bool_result == 0) { // someone else is trying to resize
+ KMP_CPU_PAUSE();
+ if (nproc <= base_num_threads) // happy with other thread's resize
+ return;
+ else // try to resize
+ bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
+ }
+ KMP_DEBUG_ASSERT(bool_result != 0);
+ if (nproc <= base_num_threads)
+ return; // happy with other thread's resize
+
+ // Calculate new maxLevels
+ kmp_uint32 old_sz = skipPerLevel[depth - 1];
+ kmp_uint32 incs = 0, old_maxLevels = maxLevels;
+ // First see if old maxLevels is enough to contain new size
+ for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
+ skipPerLevel[i] = 2 * skipPerLevel[i - 1];
+ numPerLevel[i - 1] *= 2;
+ old_sz *= 2;
+ depth++;
+ }
+ if (nproc > old_sz) { // Not enough space, need to expand hierarchy
+ while (nproc > old_sz) {
+ old_sz *= 2;
+ incs++;
+ depth++;
+ }
+ maxLevels += incs;
+
+ // Resize arrays
+ kmp_uint32 *old_numPerLevel = numPerLevel;
+ kmp_uint32 *old_skipPerLevel = skipPerLevel;
+ numPerLevel = skipPerLevel = NULL;
+ numPerLevel =
+ (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
+ skipPerLevel = &(numPerLevel[maxLevels]);
+
+ // Copy old elements from old arrays
+ for (kmp_uint32 i = 0; i < old_maxLevels;
+ ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = old_numPerLevel[i];
+ skipPerLevel[i] = old_skipPerLevel[i];
+ }
+
+ // Init new elements in arrays to 1
+ for (kmp_uint32 i = old_maxLevels; i < maxLevels;
+ ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ skipPerLevel[i] = 1;
+ }
+
+ // Free old arrays
+ __kmp_free(old_numPerLevel);
+ }
+
+ // Fill in oversubscription levels of hierarchy
+ for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
+ skipPerLevel[i] = 2 * skipPerLevel[i - 1];
- // Sort table by physical ID
- if (adr2os) {
- qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
- deriveLevels(adr2os, num_addrs);
- }
- else {
- numPerLevel[0] = maxLeaves;
- numPerLevel[1] = num_addrs/maxLeaves;
- if (num_addrs%maxLeaves) numPerLevel[1]++;
- }
-
- base_num_threads = num_addrs;
- for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
- if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
- depth++;
-
- kmp_uint32 branch = minBranch;
- if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
- if (branch<minBranch) branch=minBranch;
- for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
- while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
- if (numPerLevel[d] & 1) numPerLevel[d]++;
- numPerLevel[d] = numPerLevel[d] >> 1;
- if (numPerLevel[d+1] == 1) depth++;
- numPerLevel[d+1] = numPerLevel[d+1] << 1;
- }
- if(numPerLevel[0] == 1) {
- branch = branch >> 1;
- if (branch<4) branch = minBranch;
- }
- }
+ base_num_threads = nproc;
+ resizing = 0; // One writer
- for (kmp_uint32 i=1; i<depth; ++i)
- skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
- // Fill in hierarchy in the case of oversubscription
- for (kmp_uint32 i=depth; i<maxLevels; ++i)
- skipPerLevel[i] = 2*skipPerLevel[i-1];
-
- uninitialized = initialized; // One writer
-
- }
-
- // Resize the hierarchy if nproc changes to something larger than before
- void resize(kmp_uint32 nproc)
- {
- kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
- while (bool_result == 0) { // someone else is trying to resize
- KMP_CPU_PAUSE();
- if (nproc <= base_num_threads) // happy with other thread's resize
- return;
- else // try to resize
- bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
- }
- KMP_DEBUG_ASSERT(bool_result!=0);
- if (nproc <= base_num_threads) return; // happy with other thread's resize
-
- // Calculate new maxLevels
- kmp_uint32 old_sz = skipPerLevel[depth-1];
- kmp_uint32 incs = 0, old_maxLevels = maxLevels;
- // First see if old maxLevels is enough to contain new size
- for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
- skipPerLevel[i] = 2*skipPerLevel[i-1];
- numPerLevel[i-1] *= 2;
- old_sz *= 2;
- depth++;
- }
- if (nproc > old_sz) { // Not enough space, need to expand hierarchy
- while (nproc > old_sz) {
- old_sz *=2;
- incs++;
- depth++;
- }
- maxLevels += incs;
-
- // Resize arrays
- kmp_uint32 *old_numPerLevel = numPerLevel;
- kmp_uint32 *old_skipPerLevel = skipPerLevel;
- numPerLevel = skipPerLevel = NULL;
- numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
- skipPerLevel = &(numPerLevel[maxLevels]);
-
- // Copy old elements from old arrays
- for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = old_numPerLevel[i];
- skipPerLevel[i] = old_skipPerLevel[i];
- }
-
- // Init new elements in arrays to 1
- for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
- numPerLevel[i] = 1;
- skipPerLevel[i] = 1;
- }
-
- // Free old arrays
- __kmp_free(old_numPerLevel);
- }
-
- // Fill in oversubscription levels of hierarchy
- for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
- skipPerLevel[i] = 2*skipPerLevel[i-1];
-
- base_num_threads = nproc;
- resizing = 0; // One writer
-
- }
+ }
};
#endif // KMP_AFFINITY_H
Modified: openmp/trunk/runtime/src/kmp_alloc.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_alloc.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_alloc.cpp (original)
+++ openmp/trunk/runtime/src/kmp_alloc.cpp Fri May 12 13:01:32 2017
@@ -14,742 +14,679 @@
#include "kmp.h"
-#include "kmp_wrapper_malloc.h"
#include "kmp_io.h"
+#include "kmp_wrapper_malloc.h"
// Disable bget when it is not used
#if KMP_USE_BGET
/* Thread private buffer management code */
-typedef int (*bget_compact_t)(size_t, int);
+typedef int (*bget_compact_t)(size_t, int);
typedef void *(*bget_acquire_t)(size_t);
-typedef void (*bget_release_t)(void *);
+typedef void (*bget_release_t)(void *);
/* NOTE: bufsize must be a signed datatype */
#if KMP_OS_WINDOWS
-# if KMP_ARCH_X86 || KMP_ARCH_ARM
- typedef kmp_int32 bufsize;
-# else
- typedef kmp_int64 bufsize;
-# endif
+#if KMP_ARCH_X86 || KMP_ARCH_ARM
+typedef kmp_int32 bufsize;
+#else
+typedef kmp_int64 bufsize;
+#endif
#else
- typedef ssize_t bufsize;
+typedef ssize_t bufsize;
#endif
/* The three modes of operation are, fifo search, lifo search, and best-fit */
typedef enum bget_mode {
- bget_mode_fifo = 0,
- bget_mode_lifo = 1,
- bget_mode_best = 2
+ bget_mode_fifo = 0,
+ bget_mode_lifo = 1,
+ bget_mode_best = 2
} bget_mode_t;
-
-static void bpool( kmp_info_t *th, void *buffer, bufsize len);
-static void *bget( kmp_info_t *th, bufsize size);
-static void *bgetz( kmp_info_t *th, bufsize size);
-static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
-static void brel( kmp_info_t *th, void *buf);
-static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
+static void bpool(kmp_info_t *th, void *buffer, bufsize len);
+static void *bget(kmp_info_t *th, bufsize size);
+static void *bgetz(kmp_info_t *th, bufsize size);
+static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
+static void brel(kmp_info_t *th, void *buf);
+static void bectl(kmp_info_t *th, bget_compact_t compact,
+ bget_acquire_t acquire, bget_release_t release,
+ bufsize pool_incr);
#ifdef KMP_DEBUG
-static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
-static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
-static void bufdump( kmp_info_t *th, void *buf);
-static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
-static int bpoolv( kmp_info_t *th, void *pool);
+static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
+ bufsize *maxfree, long *nget, long *nrel);
+static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
+ long *npget, long *nprel, long *ndget, long *ndrel);
+static void bufdump(kmp_info_t *th, void *buf);
+static void bpoold(kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
+static int bpoolv(kmp_info_t *th, void *pool);
#endif
/* BGET CONFIGURATION */
- /* Buffer allocation size quantum:
- all buffers allocated are a
- multiple of this size. This
- MUST be a power of two. */
-
- /* On IA-32 architecture with Linux* OS,
- malloc() does not
- ensure 16 byte alignmnent */
+/* Buffer allocation size quantum: all buffers allocated are a
+ multiple of this size. This MUST be a power of two. */
+
+/* On IA-32 architecture with Linux* OS, malloc() does not
+ ensure 16 byte alignmnent */
#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
-#define SizeQuant 8
-#define AlignType double
+#define SizeQuant 8
+#define AlignType double
#else
-#define SizeQuant 16
-#define AlignType _Quad
+#define SizeQuant 16
+#define AlignType _Quad
#endif
-#define BufStats 1 /* Define this symbol to enable the
- bstats() function which calculates
- the total free space in the buffer
- pool, the largest available
- buffer, and the total space
- currently allocated. */
+// Define this symbol to enable the bstats() function which calculates the
+// total free space in the buffer pool, the largest available buffer, and the
+// total space currently allocated.
+#define BufStats 1
#ifdef KMP_DEBUG
-#define BufDump 1 /* Define this symbol to enable the
- bpoold() function which dumps the
- buffers in a buffer pool. */
-
-#define BufValid 1 /* Define this symbol to enable the
- bpoolv() function for validating
- a buffer pool. */
-
-#define DumpData 1 /* Define this symbol to enable the
- bufdump() function which allows
- dumping the contents of an allocated
- or free buffer. */
+// Define this symbol to enable the bpoold() function which dumps the buffers
+// in a buffer pool.
+#define BufDump 1
+
+// Define this symbol to enable the bpoolv() function for validating a buffer
+// pool.
+#define BufValid 1
+
+// Define this symbol to enable the bufdump() function which allows dumping the
+// contents of an allocated or free buffer.
+#define DumpData 1
+
#ifdef NOT_USED_NOW
-#define FreeWipe 1 /* Wipe free buffers to a guaranteed
- pattern of garbage to trip up
- miscreants who attempt to use
- pointers into released buffers. */
-
-#define BestFit 1 /* Use a best fit algorithm when
- searching for space for an
- allocation request. This uses
- memory more efficiently, but
- allocation will be much slower. */
+// Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
+// who attempt to use pointers into released buffers.
+#define FreeWipe 1
+
+// Use a best fit algorithm when searching for space for an allocation request.
+// This uses memory more efficiently, but allocation will be much slower.
+#define BestFit 1
+
#endif /* NOT_USED_NOW */
#endif /* KMP_DEBUG */
-
-static bufsize bget_bin_size[ ] = {
+static bufsize bget_bin_size[] = {
0,
-// 1 << 6, /* .5 Cache line */
- 1 << 7, /* 1 Cache line, new */
- 1 << 8, /* 2 Cache lines */
- 1 << 9, /* 4 Cache lines, new */
- 1 << 10, /* 8 Cache lines */
- 1 << 11, /* 16 Cache lines, new */
- 1 << 12,
- 1 << 13, /* new */
- 1 << 14,
- 1 << 15, /* new */
- 1 << 16,
- 1 << 17,
- 1 << 18,
- 1 << 19,
- 1 << 20, /* 1MB */
- 1 << 21, /* 2MB */
- 1 << 22, /* 4MB */
- 1 << 23, /* 8MB */
- 1 << 24, /* 16MB */
- 1 << 25, /* 32MB */
+ // 1 << 6, /* .5 Cache line */
+ 1 << 7, /* 1 Cache line, new */
+ 1 << 8, /* 2 Cache lines */
+ 1 << 9, /* 4 Cache lines, new */
+ 1 << 10, /* 8 Cache lines */
+ 1 << 11, /* 16 Cache lines, new */
+ 1 << 12, 1 << 13, /* new */
+ 1 << 14, 1 << 15, /* new */
+ 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
+ 1 << 21, /* 2MB */
+ 1 << 22, /* 4MB */
+ 1 << 23, /* 8MB */
+ 1 << 24, /* 16MB */
+ 1 << 25, /* 32MB */
};
-#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
+#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
struct bfhead;
-/* Declare the interface, including the requested buffer size type,
- bufsize. */
+// Declare the interface, including the requested buffer size type, bufsize.
/* Queue links */
-
typedef struct qlinks {
- struct bfhead *flink; /* Forward link */
- struct bfhead *blink; /* Backward link */
+ struct bfhead *flink; /* Forward link */
+ struct bfhead *blink; /* Backward link */
} qlinks_t;
/* Header in allocated and free buffers */
-
typedef struct bhead2 {
- kmp_info_t *bthr; /* The thread which owns the buffer pool */
- bufsize prevfree; /* Relative link back to previous
- free buffer in memory or 0 if
- previous buffer is allocated. */
- bufsize bsize; /* Buffer size: positive if free,
- negative if allocated. */
+ kmp_info_t *bthr; /* The thread which owns the buffer pool */
+ bufsize prevfree; /* Relative link back to previous free buffer in memory or
+ 0 if previous buffer is allocated. */
+ bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
} bhead2_t;
/* Make sure the bhead structure is a multiple of SizeQuant in size. */
-
typedef union bhead {
- KMP_ALIGN( SizeQuant )
- AlignType b_align;
- char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
- bhead2_t bb;
+ KMP_ALIGN(SizeQuant)
+ AlignType b_align;
+ char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
+ bhead2_t bb;
} bhead_t;
-#define BH(p) ((bhead_t *) (p))
+#define BH(p) ((bhead_t *)(p))
/* Header in directly allocated buffers (by acqfcn) */
-
-typedef struct bdhead
-{
- bufsize tsize; /* Total size, including overhead */
- bhead_t bh; /* Common header */
+typedef struct bdhead {
+ bufsize tsize; /* Total size, including overhead */
+ bhead_t bh; /* Common header */
} bdhead_t;
-#define BDH(p) ((bdhead_t *) (p))
+#define BDH(p) ((bdhead_t *)(p))
/* Header in free buffers */
-
typedef struct bfhead {
- bhead_t bh; /* Common allocated/free header */
- qlinks_t ql; /* Links on free list */
+ bhead_t bh; /* Common allocated/free header */
+ qlinks_t ql; /* Links on free list */
} bfhead_t;
-#define BFH(p) ((bfhead_t *) (p))
+#define BFH(p) ((bfhead_t *)(p))
typedef struct thr_data {
- bfhead_t freelist[ MAX_BGET_BINS ];
+ bfhead_t freelist[MAX_BGET_BINS];
#if BufStats
- size_t totalloc; /* Total space currently allocated */
- long numget, numrel; /* Number of bget() and brel() calls */
- long numpblk; /* Number of pool blocks */
- long numpget, numprel; /* Number of block gets and rels */
- long numdget, numdrel; /* Number of direct gets and rels */
+ size_t totalloc; /* Total space currently allocated */
+ long numget, numrel; /* Number of bget() and brel() calls */
+ long numpblk; /* Number of pool blocks */
+ long numpget, numprel; /* Number of block gets and rels */
+ long numdget, numdrel; /* Number of direct gets and rels */
#endif /* BufStats */
- /* Automatic expansion block management functions */
- bget_compact_t compfcn;
- bget_acquire_t acqfcn;
- bget_release_t relfcn;
-
- bget_mode_t mode; /* what allocation mode to use? */
-
- bufsize exp_incr; /* Expansion block size */
- bufsize pool_len; /* 0: no bpool calls have been made
- -1: not all pool blocks are
- the same size
- >0: (common) block size for all
- bpool calls made so far
- */
- bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
+ /* Automatic expansion block management functions */
+ bget_compact_t compfcn;
+ bget_acquire_t acqfcn;
+ bget_release_t relfcn;
+
+ bget_mode_t mode; /* what allocation mode to use? */
+
+ bufsize exp_incr; /* Expansion block size */
+ bufsize pool_len; /* 0: no bpool calls have been made
+ -1: not all pool blocks are the same size
+ >0: (common) block size for all bpool calls made so far
+ */
+ bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
} thr_data_t;
/* Minimum allocation quantum: */
-
-#define QLSize (sizeof(qlinks_t))
-#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
-#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
- // Maximun for the requested size.
+#define QLSize (sizeof(qlinks_t))
+#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
+#define MaxSize \
+ (bufsize)( \
+ ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
+// Maximun for the requested size.
/* End sentinel: value placed in bsize field of dummy block delimiting
end of pool block. The most negative number which will fit in a
bufsize, defined in a way that the compiler will accept. */
-#define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
-
-/* ------------------------------------------------------------------------ */
+#define ESent \
+ ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
/* Thread Data management routines */
+static int bget_get_bin(bufsize size) {
+ // binary chop bins
+ int lo = 0, hi = MAX_BGET_BINS - 1;
-static int
-bget_get_bin( bufsize size )
-{
- // binary chop bins
- int lo = 0, hi = MAX_BGET_BINS - 1;
-
- KMP_DEBUG_ASSERT( size > 0 );
+ KMP_DEBUG_ASSERT(size > 0);
- while ( (hi - lo) > 1 ) {
- int mid = (lo + hi) >> 1;
- if (size < bget_bin_size[ mid ])
- hi = mid - 1;
- else
- lo = mid;
- }
+ while ((hi - lo) > 1) {
+ int mid = (lo + hi) >> 1;
+ if (size < bget_bin_size[mid])
+ hi = mid - 1;
+ else
+ lo = mid;
+ }
- KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
+ KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
- return lo;
+ return lo;
}
-static void
-set_thr_data( kmp_info_t *th )
-{
- int i;
- thr_data_t *data;
+static void set_thr_data(kmp_info_t *th) {
+ int i;
+ thr_data_t *data;
- data =
- (thr_data_t *)(
- ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
- );
+ data = (thr_data_t *)((!th->th.th_local.bget_data)
+ ? __kmp_allocate(sizeof(*data))
+ : th->th.th_local.bget_data);
- memset( data, '\0', sizeof( *data ) );
+ memset(data, '\0', sizeof(*data));
- for (i = 0; i < MAX_BGET_BINS; ++i) {
- data->freelist[ i ].ql.flink = & data->freelist[ i ];
- data->freelist[ i ].ql.blink = & data->freelist[ i ];
- }
+ for (i = 0; i < MAX_BGET_BINS; ++i) {
+ data->freelist[i].ql.flink = &data->freelist[i];
+ data->freelist[i].ql.blink = &data->freelist[i];
+ }
- th->th.th_local.bget_data = data;
- th->th.th_local.bget_list = 0;
-#if ! USE_CMP_XCHG_FOR_BGET
+ th->th.th_local.bget_data = data;
+ th->th.th_local.bget_list = 0;
+#if !USE_CMP_XCHG_FOR_BGET
#ifdef USE_QUEUING_LOCK_FOR_BGET
- __kmp_init_lock( & th->th.th_local.bget_lock );
+ __kmp_init_lock(&th->th.th_local.bget_lock);
#else
- __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
+ __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
#endif /* USE_LOCK_FOR_BGET */
#endif /* ! USE_CMP_XCHG_FOR_BGET */
}
-static thr_data_t *
-get_thr_data( kmp_info_t *th )
-{
- thr_data_t *data;
+static thr_data_t *get_thr_data(kmp_info_t *th) {
+ thr_data_t *data;
- data = (thr_data_t *) th->th.th_local.bget_data;
+ data = (thr_data_t *)th->th.th_local.bget_data;
- KMP_DEBUG_ASSERT( data != 0 );
+ KMP_DEBUG_ASSERT(data != 0);
- return data;
+ return data;
}
-
#ifdef KMP_DEBUG
-static void
-__kmp_bget_validate_queue( kmp_info_t *th )
-{
- /* NOTE: assume that the global_lock is held */
+static void __kmp_bget_validate_queue(kmp_info_t *th) {
+ /* NOTE: assume that the global_lock is held */
- void *p = (void *) th->th.th_local.bget_list;
+ void *p = (void *)th->th.th_local.bget_list;
- while (p != 0) {
- bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
+ while (p != 0) {
+ bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
- KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
- p = (void *) b->ql.flink;
- }
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
+ p = (void *)b->ql.flink;
+ }
}
#endif
/* Walk the free list and release the enqueued buffers */
+static void __kmp_bget_dequeue(kmp_info_t *th) {
+ void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
-static void
-__kmp_bget_dequeue( kmp_info_t *th )
-{
- void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
-
- if (p != 0) {
- #if USE_CMP_XCHG_FOR_BGET
- {
- volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
- while ( ! KMP_COMPARE_AND_STORE_PTR(
- & th->th.th_local.bget_list, old_value, NULL ) )
- {
- KMP_CPU_PAUSE();
- old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
- }
- p = (void *) old_value;
- }
- #else /* ! USE_CMP_XCHG_FOR_BGET */
- #ifdef USE_QUEUING_LOCK_FOR_BGET
- __kmp_acquire_lock( & th->th.th_local.bget_lock,
- __kmp_gtid_from_thread(th) );
- #else
- __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
- #endif /* USE_QUEUING_LOCK_FOR_BGET */
-
- p = (void *) th->th.th_local.bget_list;
- th->th.th_local.bget_list = 0;
-
- #ifdef USE_QUEUING_LOCK_FOR_BGET
- __kmp_release_lock( & th->th.th_local.bget_lock,
- __kmp_gtid_from_thread(th) );
- #else
- __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
- #endif
- #endif /* USE_CMP_XCHG_FOR_BGET */
-
- /* Check again to make sure the list is not empty */
-
- while (p != 0) {
- void *buf = p;
- bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
-
- KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
- KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
- (kmp_uintptr_t)th ); // clear possible mark
- KMP_DEBUG_ASSERT( b->ql.blink == 0 );
+ if (p != 0) {
+#if USE_CMP_XCHG_FOR_BGET
+ {
+ volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
+ while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, old_value,
+ NULL)) {
+ KMP_CPU_PAUSE();
+ old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
+ }
+ p = (void *)old_value;
+ }
+#else /* ! USE_CMP_XCHG_FOR_BGET */
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+ __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
+#else
+ __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
+#endif /* USE_QUEUING_LOCK_FOR_BGET */
- p = (void *) b->ql.flink;
+ p = (void *)th->th.th_local.bget_list;
+ th->th.th_local.bget_list = 0;
- brel( th, buf );
- }
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+ __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
+#else
+ __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
+#endif
+#endif /* USE_CMP_XCHG_FOR_BGET */
+
+ /* Check again to make sure the list is not empty */
+ while (p != 0) {
+ void *buf = p;
+ bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
+
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
+ KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
+ (kmp_uintptr_t)th); // clear possible mark
+ KMP_DEBUG_ASSERT(b->ql.blink == 0);
+
+ p = (void *)b->ql.flink;
+
+ brel(th, buf);
}
+ }
}
/* Chain together the free buffers by using the thread owner field */
-
-static void
-__kmp_bget_enqueue( kmp_info_t *th, void *buf
+static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
#ifdef USE_QUEUING_LOCK_FOR_BGET
- , kmp_int32 rel_gtid
+ ,
+ kmp_int32 rel_gtid
#endif
- )
-{
- bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
+ ) {
+ bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
- KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
- KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
- (kmp_uintptr_t)th ); // clear possible mark
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
+ KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
+ (kmp_uintptr_t)th); // clear possible mark
- b->ql.blink = 0;
+ b->ql.blink = 0;
- KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
- __kmp_gtid_from_thread( th ) ) );
+ KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
+ __kmp_gtid_from_thread(th)));
#if USE_CMP_XCHG_FOR_BGET
- {
- volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
- /* the next pointer must be set before setting bget_list to buf to avoid
- exposing a broken list to other threads, even for an instant. */
- b->ql.flink = BFH( old_value );
-
- while ( ! KMP_COMPARE_AND_STORE_PTR(
- & th->th.th_local.bget_list, old_value, buf ) )
- {
- KMP_CPU_PAUSE();
- old_value = TCR_PTR(th->th.th_local.bget_list);
- /* the next pointer must be set before setting bget_list to buf to avoid
- exposing a broken list to other threads, even for an instant. */
- b->ql.flink = BFH( old_value );
- }
+ {
+ volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
+ /* the next pointer must be set before setting bget_list to buf to avoid
+ exposing a broken list to other threads, even for an instant. */
+ b->ql.flink = BFH(old_value);
+
+ while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, old_value,
+ buf)) {
+ KMP_CPU_PAUSE();
+ old_value = TCR_PTR(th->th.th_local.bget_list);
+ /* the next pointer must be set before setting bget_list to buf to avoid
+ exposing a broken list to other threads, even for an instant. */
+ b->ql.flink = BFH(old_value);
}
+ }
#else /* ! USE_CMP_XCHG_FOR_BGET */
-# ifdef USE_QUEUING_LOCK_FOR_BGET
- __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
-# else
- __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
- # endif
-
- b->ql.flink = BFH( th->th.th_local.bget_list );
- th->th.th_local.bget_list = (void *) buf;
-
-# ifdef USE_QUEUING_LOCK_FOR_BGET
- __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
-# else
- __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
-# endif
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+ __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
+#else
+ __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
+#endif
+
+ b->ql.flink = BFH(th->th.th_local.bget_list);
+ th->th.th_local.bget_list = (void *)buf;
+
+#ifdef USE_QUEUING_LOCK_FOR_BGET
+ __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
+#else
+ __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
+#endif
#endif /* USE_CMP_XCHG_FOR_BGET */
}
/* insert buffer back onto a new freelist */
+static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
+ int bin;
-static void
-__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
-{
- int bin;
+ KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
- KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
- KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
+ bin = bget_get_bin(b->bh.bb.bsize);
- bin = bget_get_bin( b->bh.bb.bsize );
+ KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
+ &thr->freelist[bin]);
+ KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
+ &thr->freelist[bin]);
- KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
- KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
+ b->ql.flink = &thr->freelist[bin];
+ b->ql.blink = thr->freelist[bin].ql.blink;
- b->ql.flink = &thr->freelist[ bin ];
- b->ql.blink = thr->freelist[ bin ].ql.blink;
-
- thr->freelist[ bin ].ql.blink = b;
- b->ql.blink->ql.flink = b;
+ thr->freelist[bin].ql.blink = b;
+ b->ql.blink->ql.flink = b;
}
/* unlink the buffer from the old freelist */
+static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
+ KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
+ KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
-static void
-__kmp_bget_remove_from_freelist( bfhead_t *b )
-{
- KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
- KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
-
- b->ql.blink->ql.flink = b->ql.flink;
- b->ql.flink->ql.blink = b->ql.blink;
+ b->ql.blink->ql.flink = b->ql.flink;
+ b->ql.flink->ql.blink = b->ql.blink;
}
-/* ------------------------------------------------------------------------ */
-
/* GET STATS -- check info on free list */
+static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
+ thr_data_t *thr = get_thr_data(th);
+ int bin;
-static void
-bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
-{
- thr_data_t *thr = get_thr_data( th );
- int bin;
-
- *total_free = *max_free = 0;
+ *total_free = *max_free = 0;
- for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
- bfhead_t *b, *best;
+ for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+ bfhead_t *b, *best;
- best = &thr->freelist[ bin ];
- b = best->ql.flink;
+ best = &thr->freelist[bin];
+ b = best->ql.flink;
- while (b != &thr->freelist[ bin ]) {
- *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
- if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
- best = b;
-
- /* Link to next buffer */
- b = b->ql.flink;
- }
+ while (b != &thr->freelist[bin]) {
+ *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
+ if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
+ best = b;
- if (*max_free < best->bh.bb.bsize)
- *max_free = best->bh.bb.bsize;
+ /* Link to next buffer */
+ b = b->ql.flink;
}
- if (*max_free > (bufsize)sizeof( bhead_t ))
- *max_free -= sizeof( bhead_t );
-}
+ if (*max_free < best->bh.bb.bsize)
+ *max_free = best->bh.bb.bsize;
+ }
-/* ------------------------------------------------------------------------ */
+ if (*max_free > (bufsize)sizeof(bhead_t))
+ *max_free -= sizeof(bhead_t);
+}
/* BGET -- Allocate a buffer. */
+static void *bget(kmp_info_t *th, bufsize requested_size) {
+ thr_data_t *thr = get_thr_data(th);
+ bufsize size = requested_size;
+ bfhead_t *b;
+ void *buf;
+ int compactseq = 0;
+ int use_blink = 0;
+ /* For BestFit */
+ bfhead_t *best;
-static void *
-bget( kmp_info_t *th, bufsize requested_size )
-{
- thr_data_t *thr = get_thr_data( th );
- bufsize size = requested_size;
- bfhead_t *b;
- void *buf;
- int compactseq = 0;
- int use_blink = 0;
-/* For BestFit */
- bfhead_t *best;
-
- if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
- return NULL;
- }; // if
-
- __kmp_bget_dequeue( th ); /* Release any queued buffers */
-
- if (size < (bufsize)SizeQ) { /* Need at least room for the */
- size = SizeQ; /* queue links. */
- }
- #if defined( SizeQuant ) && ( SizeQuant > 1 )
- size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
- #endif
-
- size += sizeof(bhead_t); /* Add overhead in allocated buffer
- to size required. */
- KMP_DEBUG_ASSERT( size >= 0 );
- KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
-
- use_blink = ( thr->mode == bget_mode_lifo );
-
- /* If a compact function was provided in the call to bectl(), wrap
- a loop around the allocation process to allow compaction to
- intervene in case we don't find a suitable buffer in the chain. */
-
- for (;;) {
- int bin;
-
- for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
- /* Link to next buffer */
- b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
-
- if (thr->mode == bget_mode_best) {
- best = &thr->freelist[ bin ];
-
- /* Scan the free list searching for the first buffer big enough
- to hold the requested size buffer. */
-
- while (b != &thr->freelist[ bin ]) {
- if (b->bh.bb.bsize >= (bufsize) size) {
- if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
- best = b;
- }
- }
-
- /* Link to next buffer */
- b = ( use_blink ? b->ql.blink : b->ql.flink );
- }
- b = best;
+ if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
+ return NULL;
+ }; // if
+
+ __kmp_bget_dequeue(th); /* Release any queued buffers */
+
+ if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
+ size = SizeQ;
+ }
+#if defined(SizeQuant) && (SizeQuant > 1)
+ size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
+#endif
+
+ size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
+ KMP_DEBUG_ASSERT(size >= 0);
+ KMP_DEBUG_ASSERT(size % SizeQuant == 0);
+
+ use_blink = (thr->mode == bget_mode_lifo);
+
+ /* If a compact function was provided in the call to bectl(), wrap
+ a loop around the allocation process to allow compaction to
+ intervene in case we don't find a suitable buffer in the chain. */
+
+ for (;;) {
+ int bin;
+
+ for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
+ /* Link to next buffer */
+ b = (use_blink ? thr->freelist[bin].ql.blink
+ : thr->freelist[bin].ql.flink);
+
+ if (thr->mode == bget_mode_best) {
+ best = &thr->freelist[bin];
+
+ /* Scan the free list searching for the first buffer big enough
+ to hold the requested size buffer. */
+ while (b != &thr->freelist[bin]) {
+ if (b->bh.bb.bsize >= (bufsize)size) {
+ if ((best == &thr->freelist[bin]) ||
+ (b->bh.bb.bsize < best->bh.bb.bsize)) {
+ best = b;
}
+ }
- while (b != &thr->freelist[ bin ]) {
- if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
+ /* Link to next buffer */
+ b = (use_blink ? b->ql.blink : b->ql.flink);
+ }
+ b = best;
+ }
- /* Buffer is big enough to satisfy the request. Allocate it
- to the caller. We must decide whether the buffer is large
- enough to split into the part given to the caller and a
- free buffer that remains on the free list, or whether the
- entire buffer should be removed from the free list and
- given to the caller in its entirety. We only split the
- buffer if enough room remains for a header plus the minimum
- quantum of allocation. */
-
- if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
- bhead_t *ba, *bn;
-
- ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
- bn = BH(((char *) ba) + size);
-
- KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
-
- /* Subtract size from length of free block. */
- b->bh.bb.bsize -= (bufsize) size;
-
- /* Link allocated buffer to the previous free buffer. */
- ba->bb.prevfree = b->bh.bb.bsize;
-
- /* Plug negative size into user buffer. */
- ba->bb.bsize = -size;
-
- /* Mark this buffer as owned by this thread. */
- TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
- /* Mark buffer after this one not preceded by free block. */
- bn->bb.prevfree = 0;
-
- /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
- __kmp_bget_remove_from_freelist( b );
- __kmp_bget_insert_into_freelist( thr, b );
+ while (b != &thr->freelist[bin]) {
+ if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
+
+ // Buffer is big enough to satisfy the request. Allocate it to the
+ // caller. We must decide whether the buffer is large enough to split
+ // into the part given to the caller and a free buffer that remains
+ // on the free list, or whether the entire buffer should be removed
+ // from the free list and given to the caller in its entirety. We
+ // only split the buffer if enough room remains for a header plus the
+ // minimum quantum of allocation.
+ if ((b->bh.bb.bsize - (bufsize)size) >
+ (bufsize)(SizeQ + (sizeof(bhead_t)))) {
+ bhead_t *ba, *bn;
+
+ ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
+ bn = BH(((char *)ba) + size);
+
+ KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
+
+ /* Subtract size from length of free block. */
+ b->bh.bb.bsize -= (bufsize)size;
+
+ /* Link allocated buffer to the previous free buffer. */
+ ba->bb.prevfree = b->bh.bb.bsize;
+
+ /* Plug negative size into user buffer. */
+ ba->bb.bsize = -size;
+
+ /* Mark this buffer as owned by this thread. */
+ TCW_PTR(ba->bb.bthr,
+ th); // not an allocated address (do not mark it)
+ /* Mark buffer after this one not preceded by free block. */
+ bn->bb.prevfree = 0;
+
+ // unlink buffer from old freelist, and reinsert into new freelist
+ __kmp_bget_remove_from_freelist(b);
+ __kmp_bget_insert_into_freelist(thr, b);
#if BufStats
- thr->totalloc += (size_t) size;
- thr->numget++; /* Increment number of bget() calls */
+ thr->totalloc += (size_t)size;
+ thr->numget++; /* Increment number of bget() calls */
#endif
- buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
- KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
- return buf;
- } else {
- bhead_t *ba;
+ buf = (void *)((((char *)ba) + sizeof(bhead_t)));
+ KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
+ return buf;
+ } else {
+ bhead_t *ba;
- ba = BH(((char *) b) + b->bh.bb.bsize);
+ ba = BH(((char *)b) + b->bh.bb.bsize);
- KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
+ KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
- /* The buffer isn't big enough to split. Give the whole
- shebang to the caller and remove it from the free list. */
+ /* The buffer isn't big enough to split. Give the whole
+ shebang to the caller and remove it from the free list. */
- __kmp_bget_remove_from_freelist( b );
+ __kmp_bget_remove_from_freelist(b);
#if BufStats
- thr->totalloc += (size_t) b->bh.bb.bsize;
- thr->numget++; /* Increment number of bget() calls */
+ thr->totalloc += (size_t)b->bh.bb.bsize;
+ thr->numget++; /* Increment number of bget() calls */
#endif
- /* Negate size to mark buffer allocated. */
- b->bh.bb.bsize = -(b->bh.bb.bsize);
+ /* Negate size to mark buffer allocated. */
+ b->bh.bb.bsize = -(b->bh.bb.bsize);
- /* Mark this buffer as owned by this thread. */
- TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
- /* Zero the back pointer in the next buffer in memory
- to indicate that this buffer is allocated. */
- ba->bb.prevfree = 0;
-
- /* Give user buffer starting at queue links. */
- buf = (void *) &(b->ql);
- KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
- return buf;
- }
- }
+ /* Mark this buffer as owned by this thread. */
+ TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
+ /* Zero the back pointer in the next buffer in memory
+ to indicate that this buffer is allocated. */
+ ba->bb.prevfree = 0;
- /* Link to next buffer */
- b = ( use_blink ? b->ql.blink : b->ql.flink );
- }
+ /* Give user buffer starting at queue links. */
+ buf = (void *)&(b->ql);
+ KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
+ return buf;
+ }
}
- /* We failed to find a buffer. If there's a compact function
- defined, notify it of the size requested. If it returns
- TRUE, try the allocation again. */
-
- if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
- break;
- }
+ /* Link to next buffer */
+ b = (use_blink ? b->ql.blink : b->ql.flink);
+ }
}
- /* No buffer available with requested size free. */
-
- /* Don't give up yet -- look in the reserve supply. */
+ /* We failed to find a buffer. If there's a compact function defined,
+ notify it of the size requested. If it returns TRUE, try the allocation
+ again. */
- if (thr->acqfcn != 0) {
- if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
+ if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
+ break;
+ }
+ }
- /* Request is too large to fit in a single expansion
- block. Try to satisy it by a direct buffer acquisition. */
+ /* No buffer available with requested size free. */
- bdhead_t *bdh;
+ /* Don't give up yet -- look in the reserve supply. */
+ if (thr->acqfcn != 0) {
+ if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
+ /* Request is too large to fit in a single expansion block.
+ Try to satisy it by a direct buffer acquisition. */
+ bdhead_t *bdh;
- size += sizeof(bdhead_t) - sizeof(bhead_t);
+ size += sizeof(bdhead_t) - sizeof(bhead_t);
- KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
+ KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
- /* richryan */
- bdh = BDH((*thr->acqfcn)((bufsize) size));
- if (bdh != NULL) {
+ /* richryan */
+ bdh = BDH((*thr->acqfcn)((bufsize)size));
+ if (bdh != NULL) {
- /* Mark the buffer special by setting the size field
- of its header to zero. */
- bdh->bh.bb.bsize = 0;
+ // Mark the buffer special by setting size field of its header to zero.
+ bdh->bh.bb.bsize = 0;
- /* Mark this buffer as owned by this thread. */
- TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
- // because direct buffer never goes to free list
- bdh->bh.bb.prevfree = 0;
- bdh->tsize = size;
+ /* Mark this buffer as owned by this thread. */
+ TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
+ // because direct buffer never goes to free list
+ bdh->bh.bb.prevfree = 0;
+ bdh->tsize = size;
#if BufStats
- thr->totalloc += (size_t) size;
- thr->numget++; /* Increment number of bget() calls */
- thr->numdget++; /* Direct bget() call count */
-#endif
- buf = (void *) (bdh + 1);
- KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
- return buf;
- }
+ thr->totalloc += (size_t)size;
+ thr->numget++; /* Increment number of bget() calls */
+ thr->numdget++; /* Direct bget() call count */
+#endif
+ buf = (void *)(bdh + 1);
+ KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
+ return buf;
+ }
- } else {
-
- /* Try to obtain a new expansion block */
+ } else {
- void *newpool;
+ /* Try to obtain a new expansion block */
+ void *newpool;
- KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
+ KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
- /* richryan */
- newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
- KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
- if (newpool != NULL) {
- bpool( th, newpool, thr->exp_incr);
- buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
- return buf;
- }
- }
+ /* richryan */
+ newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
+ KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
+ if (newpool != NULL) {
+ bpool(th, newpool, thr->exp_incr);
+ buf = bget(
+ th, requested_size); /* This can't, I say, can't get into a loop. */
+ return buf;
+ }
}
+ }
- /* Still no buffer available */
+ /* Still no buffer available */
- return NULL;
+ return NULL;
}
/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
the entire contents of the buffer to zero, not just the
region requested by the caller. */
-static void *
-bgetz( kmp_info_t *th, bufsize size )
-{
- char *buf = (char *) bget( th, size);
-
- if (buf != NULL) {
- bhead_t *b;
- bufsize rsize;
-
- b = BH(buf - sizeof(bhead_t));
- rsize = -(b->bb.bsize);
- if (rsize == 0) {
- bdhead_t *bd;
-
- bd = BDH(buf - sizeof(bdhead_t));
- rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
- } else {
- rsize -= sizeof(bhead_t);
- }
+static void *bgetz(kmp_info_t *th, bufsize size) {
+ char *buf = (char *)bget(th, size);
+
+ if (buf != NULL) {
+ bhead_t *b;
+ bufsize rsize;
- KMP_DEBUG_ASSERT(rsize >= size);
+ b = BH(buf - sizeof(bhead_t));
+ rsize = -(b->bb.bsize);
+ if (rsize == 0) {
+ bdhead_t *bd;
- (void) memset(buf, 0, (bufsize) rsize);
+ bd = BDH(buf - sizeof(bdhead_t));
+ rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
+ } else {
+ rsize -= sizeof(bhead_t);
}
- return ((void *) buf);
+
+ KMP_DEBUG_ASSERT(rsize >= size);
+
+ (void)memset(buf, 0, (bufsize)rsize);
+ }
+ return ((void *)buf);
}
/* BGETR -- Reallocate a buffer. This is a minimal implementation,
@@ -757,392 +694,372 @@ bgetz( kmp_info_t *th, bufsize size )
enhanced to allow the buffer to grow into adjacent free
blocks and to avoid moving data unnecessarily. */
-static void *
-bgetr( kmp_info_t *th, void *buf, bufsize size)
-{
- void *nbuf;
- bufsize osize; /* Old size of buffer */
- bhead_t *b;
-
- nbuf = bget( th, size );
- if ( nbuf == NULL ) { /* Acquire new buffer */
- return NULL;
- }
- if ( buf == NULL ) {
- return nbuf;
- }
- b = BH(((char *) buf) - sizeof(bhead_t));
- osize = -b->bb.bsize;
- if (osize == 0) {
- /* Buffer acquired directly through acqfcn. */
- bdhead_t *bd;
-
- bd = BDH(((char *) buf) - sizeof(bdhead_t));
- osize = bd->tsize - (bufsize) sizeof(bdhead_t);
- } else {
- osize -= sizeof(bhead_t);
- };
-
- KMP_DEBUG_ASSERT(osize > 0);
-
- (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
- (size_t) ((size < osize) ? size : osize));
- brel( th, buf );
+static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
+ void *nbuf;
+ bufsize osize; /* Old size of buffer */
+ bhead_t *b;
+ nbuf = bget(th, size);
+ if (nbuf == NULL) { /* Acquire new buffer */
+ return NULL;
+ }
+ if (buf == NULL) {
return nbuf;
+ }
+ b = BH(((char *)buf) - sizeof(bhead_t));
+ osize = -b->bb.bsize;
+ if (osize == 0) {
+ /* Buffer acquired directly through acqfcn. */
+ bdhead_t *bd;
+
+ bd = BDH(((char *)buf) - sizeof(bdhead_t));
+ osize = bd->tsize - (bufsize)sizeof(bdhead_t);
+ } else {
+ osize -= sizeof(bhead_t);
+ };
+
+ KMP_DEBUG_ASSERT(osize > 0);
+
+ (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
+ (size_t)((size < osize) ? size : osize));
+ brel(th, buf);
+
+ return nbuf;
}
/* BREL -- Release a buffer. */
+static void brel(kmp_info_t *th, void *buf) {
+ thr_data_t *thr = get_thr_data(th);
+ bfhead_t *b, *bn;
+ kmp_info_t *bth;
-static void
-brel( kmp_info_t *th, void *buf )
-{
- thr_data_t *thr = get_thr_data( th );
- bfhead_t *b, *bn;
- kmp_info_t *bth;
-
- KMP_DEBUG_ASSERT(buf != NULL);
- KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
+ KMP_DEBUG_ASSERT(buf != NULL);
+ KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
- b = BFH(((char *) buf) - sizeof(bhead_t));
+ b = BFH(((char *)buf) - sizeof(bhead_t));
- if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
- bdhead_t *bdh;
+ if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
+ bdhead_t *bdh;
- bdh = BDH(((char *) buf) - sizeof(bdhead_t));
- KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
+ bdh = BDH(((char *)buf) - sizeof(bdhead_t));
+ KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
#if BufStats
- thr->totalloc -= (size_t) bdh->tsize;
- thr->numdrel++; /* Number of direct releases */
- thr->numrel++; /* Increment number of brel() calls */
+ thr->totalloc -= (size_t)bdh->tsize;
+ thr->numdrel++; /* Number of direct releases */
+ thr->numrel++; /* Increment number of brel() calls */
#endif /* BufStats */
#ifdef FreeWipe
- (void) memset((char *) buf, 0x55,
- (size_t) (bdh->tsize - sizeof(bdhead_t)));
+ (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
#endif /* FreeWipe */
- KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
+ KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
- KMP_DEBUG_ASSERT( thr->relfcn != 0 );
- (*thr->relfcn)((void *) bdh); /* Release it directly. */
- return;
- }
-
- bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
- if ( bth != th ) {
- /* Add this buffer to be released by the owning thread later */
- __kmp_bget_enqueue( bth, buf
+ KMP_DEBUG_ASSERT(thr->relfcn != 0);
+ (*thr->relfcn)((void *)bdh); /* Release it directly. */
+ return;
+ }
+
+ bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
+ ~1); // clear possible mark before comparison
+ if (bth != th) {
+ /* Add this buffer to be released by the owning thread later */
+ __kmp_bget_enqueue(bth, buf
#ifdef USE_QUEUING_LOCK_FOR_BGET
- , __kmp_gtid_from_thread( th )
+ ,
+ __kmp_gtid_from_thread(th)
#endif
- );
- return;
- }
+ );
+ return;
+ }
+
+ /* Buffer size must be negative, indicating that the buffer is allocated. */
+ if (b->bh.bb.bsize >= 0) {
+ bn = NULL;
+ }
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
- /* Buffer size must be negative, indicating that the buffer is
- allocated. */
-
- if (b->bh.bb.bsize >= 0) {
- bn = NULL;
- }
- KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
+ /* Back pointer in next buffer must be zero, indicating the same thing: */
- /* Back pointer in next buffer must be zero, indicating the
- same thing: */
-
- KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
+ KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
#if BufStats
- thr->numrel++; /* Increment number of brel() calls */
- thr->totalloc += (size_t) b->bh.bb.bsize;
+ thr->numrel++; /* Increment number of brel() calls */
+ thr->totalloc += (size_t)b->bh.bb.bsize;
#endif
- /* If the back link is nonzero, the previous buffer is free. */
-
- if (b->bh.bb.prevfree != 0) {
- /* The previous buffer is free. Consolidate this buffer with it
- by adding the length of this buffer to the previous free
- buffer. Note that we subtract the size in the buffer being
- released, since it's negative to indicate that the buffer is
- allocated. */
-
- register bufsize size = b->bh.bb.bsize;
-
- /* Make the previous buffer the one we're working on. */
- KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
- b = BFH(((char *) b) - b->bh.bb.prevfree);
- b->bh.bb.bsize -= size;
-
- /* unlink the buffer from the old freelist */
- __kmp_bget_remove_from_freelist( b );
- }
- else {
- /* The previous buffer isn't allocated. Mark this buffer
- size as positive (i.e. free) and fall through to place
- the buffer on the free list as an isolated free block. */
-
- b->bh.bb.bsize = -b->bh.bb.bsize;
- }
-
- /* insert buffer back onto a new freelist */
- __kmp_bget_insert_into_freelist( thr, b );
-
+ /* If the back link is nonzero, the previous buffer is free. */
- /* Now we look at the next buffer in memory, located by advancing from
- the start of this buffer by its size, to see if that buffer is
- free. If it is, we combine this buffer with the next one in
- memory, dechaining the second buffer from the free list. */
-
- bn = BFH(((char *) b) + b->bh.bb.bsize);
- if (bn->bh.bb.bsize > 0) {
-
- /* The buffer is free. Remove it from the free list and add
- its size to that of our buffer. */
-
- KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
-
- __kmp_bget_remove_from_freelist( bn );
-
- b->bh.bb.bsize += bn->bh.bb.bsize;
-
- /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
-
- __kmp_bget_remove_from_freelist( b );
- __kmp_bget_insert_into_freelist( thr, b );
-
- /* Finally, advance to the buffer that follows the newly
- consolidated free block. We must set its backpointer to the
- head of the consolidated free block. We know the next block
- must be an allocated block because the process of recombination
- guarantees that two free blocks will never be contiguous in
- memory. */
-
- bn = BFH(((char *) b) + b->bh.bb.bsize);
- }
+ if (b->bh.bb.prevfree != 0) {
+ /* The previous buffer is free. Consolidate this buffer with it by adding
+ the length of this buffer to the previous free buffer. Note that we
+ subtract the size in the buffer being released, since it's negative to
+ indicate that the buffer is allocated. */
+ register bufsize size = b->bh.bb.bsize;
+
+ /* Make the previous buffer the one we're working on. */
+ KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
+ b->bh.bb.prevfree);
+ b = BFH(((char *)b) - b->bh.bb.prevfree);
+ b->bh.bb.bsize -= size;
+
+ /* unlink the buffer from the old freelist */
+ __kmp_bget_remove_from_freelist(b);
+ } else {
+ /* The previous buffer isn't allocated. Mark this buffer size as positive
+ (i.e. free) and fall through to place the buffer on the free list as an
+ isolated free block. */
+ b->bh.bb.bsize = -b->bh.bb.bsize;
+ }
+
+ /* insert buffer back onto a new freelist */
+ __kmp_bget_insert_into_freelist(thr, b);
+
+ /* Now we look at the next buffer in memory, located by advancing from
+ the start of this buffer by its size, to see if that buffer is
+ free. If it is, we combine this buffer with the next one in
+ memory, dechaining the second buffer from the free list. */
+ bn = BFH(((char *)b) + b->bh.bb.bsize);
+ if (bn->bh.bb.bsize > 0) {
+
+ /* The buffer is free. Remove it from the free list and add
+ its size to that of our buffer. */
+ KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
+ bn->bh.bb.bsize);
+
+ __kmp_bget_remove_from_freelist(bn);
+
+ b->bh.bb.bsize += bn->bh.bb.bsize;
+
+ /* unlink the buffer from the old freelist, and reinsert it into the new
+ * freelist */
+ __kmp_bget_remove_from_freelist(b);
+ __kmp_bget_insert_into_freelist(thr, b);
+
+ /* Finally, advance to the buffer that follows the newly
+ consolidated free block. We must set its backpointer to the
+ head of the consolidated free block. We know the next block
+ must be an allocated block because the process of recombination
+ guarantees that two free blocks will never be contiguous in
+ memory. */
+ bn = BFH(((char *)b) + b->bh.bb.bsize);
+ }
#ifdef FreeWipe
- (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
- (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
+ (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
+ (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
#endif
- KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
-
- /* The next buffer is allocated. Set the backpointer in it to point
- to this buffer; the previous free buffer in memory. */
-
- bn->bh.bb.prevfree = b->bh.bb.bsize;
+ KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
- /* If a block-release function is defined, and this free buffer
- constitutes the entire block, release it. Note that pool_len
- is defined in such a way that the test will fail unless all
- pool blocks are the same size. */
+ /* The next buffer is allocated. Set the backpointer in it to point
+ to this buffer; the previous free buffer in memory. */
- if (thr->relfcn != 0 &&
- b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
- {
+ bn->bh.bb.prevfree = b->bh.bb.bsize;
+
+ /* If a block-release function is defined, and this free buffer
+ constitutes the entire block, release it. Note that pool_len
+ is defined in such a way that the test will fail unless all
+ pool blocks are the same size. */
+ if (thr->relfcn != 0 &&
+ b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
#if BufStats
- if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
+ if (thr->numpblk !=
+ 1) { /* Do not release the last buffer until finalization time */
#endif
- KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
- KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
- KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
+ KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
+ KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
+ KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
+ b->bh.bb.bsize);
- /* Unlink the buffer from the free list */
- __kmp_bget_remove_from_freelist( b );
+ /* Unlink the buffer from the free list */
+ __kmp_bget_remove_from_freelist(b);
- KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
+ KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
- (*thr->relfcn)(b);
+ (*thr->relfcn)(b);
#if BufStats
- thr->numprel++; /* Nr of expansion block releases */
- thr->numpblk--; /* Total number of blocks */
- KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
-
- /* avoid leaving stale last_pool pointer around if it is being dealloced */
- if (thr->last_pool == b) thr->last_pool = 0;
- }
- else {
- thr->last_pool = b;
- }
-#endif /* BufStats */
+ thr->numprel++; /* Nr of expansion block releases */
+ thr->numpblk--; /* Total number of blocks */
+ KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
+
+ // avoid leaving stale last_pool pointer around if it is being dealloced
+ if (thr->last_pool == b)
+ thr->last_pool = 0;
+ } else {
+ thr->last_pool = b;
}
+#endif /* BufStats */
+ }
}
/* BECTL -- Establish automatic pool expansion control */
-
-static void
-bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
-{
- thr_data_t *thr = get_thr_data( th );
-
- thr->compfcn = compact;
- thr->acqfcn = acquire;
- thr->relfcn = release;
- thr->exp_incr = pool_incr;
+static void bectl(kmp_info_t *th, bget_compact_t compact,
+ bget_acquire_t acquire, bget_release_t release,
+ bufsize pool_incr) {
+ thr_data_t *thr = get_thr_data(th);
+
+ thr->compfcn = compact;
+ thr->acqfcn = acquire;
+ thr->relfcn = release;
+ thr->exp_incr = pool_incr;
}
/* BPOOL -- Add a region of memory to the buffer pool. */
+static void bpool(kmp_info_t *th, void *buf, bufsize len) {
+ /* int bin = 0; */
+ thr_data_t *thr = get_thr_data(th);
+ bfhead_t *b = BFH(buf);
+ bhead_t *bn;
-static void
-bpool( kmp_info_t *th, void *buf, bufsize len)
-{
-/* int bin = 0; */
- thr_data_t *thr = get_thr_data( th );
- bfhead_t *b = BFH(buf);
- bhead_t *bn;
-
- __kmp_bget_dequeue( th ); /* Release any queued buffers */
+ __kmp_bget_dequeue(th); /* Release any queued buffers */
#ifdef SizeQuant
- len &= ~(SizeQuant - 1);
+ len &= ~(SizeQuant - 1);
#endif
- if (thr->pool_len == 0) {
- thr->pool_len = len;
- } else if (len != thr->pool_len) {
- thr->pool_len = -1;
- }
+ if (thr->pool_len == 0) {
+ thr->pool_len = len;
+ } else if (len != thr->pool_len) {
+ thr->pool_len = -1;
+ }
#if BufStats
- thr->numpget++; /* Number of block acquisitions */
- thr->numpblk++; /* Number of blocks total */
- KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
+ thr->numpget++; /* Number of block acquisitions */
+ thr->numpblk++; /* Number of blocks total */
+ KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
#endif /* BufStats */
- /* Since the block is initially occupied by a single free buffer,
- it had better not be (much) larger than the largest buffer
- whose size we can store in bhead.bb.bsize. */
-
- KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
-
- /* Clear the backpointer at the start of the block to indicate that
- there is no free block prior to this one. That blocks
- recombination when the first block in memory is released. */
-
- b->bh.bb.prevfree = 0;
-
- /* Create a dummy allocated buffer at the end of the pool. This dummy
- buffer is seen when a buffer at the end of the pool is released and
- blocks recombination of the last buffer with the dummy buffer at
- the end. The length in the dummy buffer is set to the largest
- negative number to denote the end of the pool for diagnostic
- routines (this specific value is not counted on by the actual
- allocation and release functions). */
-
- len -= sizeof(bhead_t);
- b->bh.bb.bsize = (bufsize) len;
- /* Set the owner of this buffer */
- TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
+ /* Since the block is initially occupied by a single free buffer,
+ it had better not be (much) larger than the largest buffer
+ whose size we can store in bhead.bb.bsize. */
+ KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
+
+ /* Clear the backpointer at the start of the block to indicate that
+ there is no free block prior to this one. That blocks
+ recombination when the first block in memory is released. */
+ b->bh.bb.prevfree = 0;
+
+ /* Create a dummy allocated buffer at the end of the pool. This dummy
+ buffer is seen when a buffer at the end of the pool is released and
+ blocks recombination of the last buffer with the dummy buffer at
+ the end. The length in the dummy buffer is set to the largest
+ negative number to denote the end of the pool for diagnostic
+ routines (this specific value is not counted on by the actual
+ allocation and release functions). */
+ len -= sizeof(bhead_t);
+ b->bh.bb.bsize = (bufsize)len;
+ /* Set the owner of this buffer */
+ TCW_PTR(b->bh.bb.bthr,
+ (kmp_info_t *)((kmp_uintptr_t)th |
+ 1)); // mark the buffer as allocated address
- /* Chain the new block to the free list. */
- __kmp_bget_insert_into_freelist( thr, b );
+ /* Chain the new block to the free list. */
+ __kmp_bget_insert_into_freelist(thr, b);
#ifdef FreeWipe
- (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
- (size_t) (len - sizeof(bfhead_t)));
+ (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
+ (size_t)(len - sizeof(bfhead_t)));
#endif
- bn = BH(((char *) b) + len);
- bn->bb.prevfree = (bufsize) len;
- /* Definition of ESent assumes two's complement! */
- KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
+ bn = BH(((char *)b) + len);
+ bn->bb.prevfree = (bufsize)len;
+ /* Definition of ESent assumes two's complement! */
+ KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
- bn->bb.bsize = ESent;
+ bn->bb.bsize = ESent;
}
-/* ------------------------------------------------------------------------ */
-
/* BFREED -- Dump the free lists for this thread. */
-
-static void
-bfreed( kmp_info_t *th )
-{
- int bin = 0, count = 0;
- int gtid = __kmp_gtid_from_thread( th );
- thr_data_t *thr = get_thr_data( th );
+static void bfreed(kmp_info_t *th) {
+ int bin = 0, count = 0;
+ int gtid = __kmp_gtid_from_thread(th);
+ thr_data_t *thr = get_thr_data(th);
#if BufStats
- __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
- KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
- " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
- gtid, (kmp_uint64) thr->totalloc,
- (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
- (kmp_int64) thr->numpblk,
- (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
- (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
+ __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
+ " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
+ " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
+ " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
+ " drel=%" KMP_INT64_SPEC "\n",
+ gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
+ (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
+ (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
+ (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
#endif
- for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
- bfhead_t *b;
-
- for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
- bufsize bs = b->bh.bb.bsize;
-
- KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
- KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
- KMP_DEBUG_ASSERT( bs > 0 );
-
- count += 1;
+ for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+ bfhead_t *b;
- __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
+ for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
+ b = b->ql.flink) {
+ bufsize bs = b->bh.bb.bsize;
+
+ KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
+ KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
+ KMP_DEBUG_ASSERT(bs > 0);
+
+ count += 1;
+
+ __kmp_printf_no_lock(
+ "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
+ (long)bs);
#ifdef FreeWipe
- {
- char *lerr = ((char *) b) + sizeof(bfhead_t);
- if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
- __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
- }
- }
-#endif
+ {
+ char *lerr = ((char *)b) + sizeof(bfhead_t);
+ if ((bs > sizeof(bfhead_t)) &&
+ ((*lerr != 0x55) ||
+ (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
+ 0))) {
+ __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
+ "free block have been overstored.)\n",
+ gtid);
}
+ }
+#endif
}
+ }
- if (count == 0)
- __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
+ if (count == 0)
+ __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
}
-/* ------------------------------------------------------------------------ */
-
#ifdef KMP_DEBUG
#if BufStats
/* BSTATS -- Return buffer allocation free space statistics. */
-
-static void
-bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
-{
- int bin = 0;
- thr_data_t *thr = get_thr_data( th );
-
- *nget = thr->numget;
- *nrel = thr->numrel;
- *curalloc = (bufsize) thr->totalloc;
- *totfree = 0;
- *maxfree = -1;
-
- for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
- bfhead_t *b = thr->freelist[ bin ].ql.flink;
-
- while (b != &thr->freelist[ bin ]) {
- KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
- *totfree += b->bh.bb.bsize;
- if (b->bh.bb.bsize > *maxfree) {
- *maxfree = b->bh.bb.bsize;
- }
- b = b->ql.flink; /* Link to next buffer */
- }
+static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree,
+ bufsize *maxfree, long *nget, long *nrel) {
+ int bin = 0;
+ thr_data_t *thr = get_thr_data(th);
+
+ *nget = thr->numget;
+ *nrel = thr->numrel;
+ *curalloc = (bufsize)thr->totalloc;
+ *totfree = 0;
+ *maxfree = -1;
+
+ for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+ bfhead_t *b = thr->freelist[bin].ql.flink;
+
+ while (b != &thr->freelist[bin]) {
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
+ *totfree += b->bh.bb.bsize;
+ if (b->bh.bb.bsize > *maxfree) {
+ *maxfree = b->bh.bb.bsize;
+ }
+ b = b->ql.flink; /* Link to next buffer */
}
+ }
}
/* BSTATSE -- Return extended statistics */
-
-static void
-bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
-{
- thr_data_t *thr = get_thr_data( th );
-
- *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
- *npool = thr->numpblk;
- *npget = thr->numpget;
- *nprel = thr->numprel;
- *ndget = thr->numdget;
- *ndrel = thr->numdrel;
+static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool,
+ long *npget, long *nprel, long *ndget, long *ndrel) {
+ thr_data_t *thr = get_thr_data(th);
+
+ *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
+ *npool = thr->numpblk;
+ *npget = thr->numpget;
+ *nprel = thr->numprel;
+ *ndget = thr->numdget;
+ *ndrel = thr->numdrel;
}
#endif /* BufStats */
@@ -1150,59 +1067,56 @@ bstatse( kmp_info_t *th, bufsize *pool_
/* BUFDUMP -- Dump the data in a buffer. This is called with the user
data pointer, and backs up to the buffer header. It will
dump either a free block or an allocated one. */
-
-static void
-bufdump( kmp_info_t *th, void *buf )
-{
- bfhead_t *b;
- unsigned char *bdump;
- bufsize bdlen;
-
- b = BFH(((char *) buf) - sizeof(bhead_t));
- KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
- if (b->bh.bb.bsize < 0) {
- bdump = (unsigned char *) buf;
- bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
- } else {
- bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
- bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
- }
-
- while (bdlen > 0) {
- int i, dupes = 0;
- bufsize l = bdlen;
- char bhex[50], bascii[20];
-
- if (l > 16) {
- l = 16;
- }
-
- for (i = 0; i < l; i++) {
- (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
- if (bdump[i] > 0x20 && bdump[i] < 0x7F)
- bascii[ i ] = bdump[ i ];
- else
- bascii[ i ] = ' ';
- }
- bascii[i] = 0;
- (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
- bdump += l;
- bdlen -= l;
- while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
- (char *) bdump, 16) == 0)) {
- dupes++;
- bdump += 16;
- bdlen -= 16;
- }
- if (dupes > 1) {
- (void) __kmp_printf_no_lock(
- " (%d lines [%d bytes] identical to above line skipped)\n",
- dupes, dupes * 16);
- } else if (dupes == 1) {
- bdump -= 16;
- bdlen += 16;
- }
+static void bufdump(kmp_info_t *th, void *buf) {
+ bfhead_t *b;
+ unsigned char *bdump;
+ bufsize bdlen;
+
+ b = BFH(((char *)buf) - sizeof(bhead_t));
+ KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
+ if (b->bh.bb.bsize < 0) {
+ bdump = (unsigned char *)buf;
+ bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t);
+ } else {
+ bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t));
+ bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t);
+ }
+
+ while (bdlen > 0) {
+ int i, dupes = 0;
+ bufsize l = bdlen;
+ char bhex[50], bascii[20];
+
+ if (l > 16) {
+ l = 16;
+ }
+
+ for (i = 0; i < l; i++) {
+ (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]);
+ if (bdump[i] > 0x20 && bdump[i] < 0x7F)
+ bascii[i] = bdump[i];
+ else
+ bascii[i] = ' ';
+ }
+ bascii[i] = 0;
+ (void)__kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
+ bdump += l;
+ bdlen -= l;
+ while ((bdlen > 16) &&
+ (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) {
+ dupes++;
+ bdump += 16;
+ bdlen -= 16;
+ }
+ if (dupes > 1) {
+ (void)__kmp_printf_no_lock(
+ " (%d lines [%d bytes] identical to above line skipped)\n", dupes,
+ dupes * 16);
+ } else if (dupes == 1) {
+ bdump -= 16;
+ bdlen += 16;
}
+ }
}
/* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
@@ -1210,611 +1124,519 @@ bufdump( kmp_info_t *th, void *buf )
are dumped. If DUMPFREE is nonzero, free blocks are
dumped as well. If FreeWipe checking is enabled, free
blocks which have been clobbered will always be dumped. */
+static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) {
+ bfhead_t *b = BFH((char *)buf - sizeof(bhead_t));
-static void
-bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
-{
- bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
-
- while (b->bh.bb.bsize != ESent) {
- bufsize bs = b->bh.bb.bsize;
-
- if (bs < 0) {
- bs = -bs;
- (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
- if (dumpalloc) {
- bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
- }
- } else {
- const char *lerr = "";
+ while (b->bh.bb.bsize != ESent) {
+ bufsize bs = b->bh.bb.bsize;
- KMP_DEBUG_ASSERT(bs > 0);
- if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
- lerr = " (Bad free list links)";
- }
- (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
- (long) bs, lerr);
+ if (bs < 0) {
+ bs = -bs;
+ (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n",
+ (long)bs);
+ if (dumpalloc) {
+ bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
+ }
+ } else {
+ const char *lerr = "";
+
+ KMP_DEBUG_ASSERT(bs > 0);
+ if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
+ lerr = " (Bad free list links)";
+ }
+ (void)__kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
+ (long)bs, lerr);
#ifdef FreeWipe
- lerr = ((char *) b) + sizeof(bfhead_t);
- if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
- (memcmp(lerr, lerr + 1,
- (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
- (void) __kmp_printf_no_lock(
- "(Contents of above free block have been overstored.)\n");
- bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
- } else
-#endif
- if (dumpfree) {
- bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
- }
- }
- b = BFH(((char *) b) + bs);
+ lerr = ((char *)b) + sizeof(bfhead_t);
+ if ((bs > sizeof(bfhead_t)) &&
+ ((*lerr != 0x55) ||
+ (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
+ 0))) {
+ (void)__kmp_printf_no_lock(
+ "(Contents of above free block have been overstored.)\n");
+ bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
+ } else
+#endif
+ if (dumpfree) {
+ bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
+ }
}
+ b = BFH(((char *)b) + bs);
+ }
}
/* BPOOLV -- Validate a buffer pool. */
+static int bpoolv(kmp_info_t *th, void *buf) {
+ bfhead_t *b = BFH(buf);
+
+ while (b->bh.bb.bsize != ESent) {
+ bufsize bs = b->bh.bb.bsize;
-static int
-bpoolv( kmp_info_t *th, void *buf )
-{
- bfhead_t *b = BFH(buf);
-
- while (b->bh.bb.bsize != ESent) {
- bufsize bs = b->bh.bb.bsize;
-
- if (bs < 0) {
- bs = -bs;
- } else {
+ if (bs < 0) {
+ bs = -bs;
+ } else {
#ifdef FreeWipe
- char *lerr = "";
+ char *lerr = "";
#endif
- KMP_DEBUG_ASSERT(bs > 0);
- if (bs <= 0) {
- return 0;
- }
- if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
- (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
- (long) bs);
- KMP_DEBUG_ASSERT(0);
- return 0;
- }
+ KMP_DEBUG_ASSERT(bs > 0);
+ if (bs <= 0) {
+ return 0;
+ }
+ if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
+ (void)__kmp_printf_no_lock(
+ "Free block: size %6ld bytes. (Bad free list links)\n", (long)bs);
+ KMP_DEBUG_ASSERT(0);
+ return 0;
+ }
#ifdef FreeWipe
- lerr = ((char *) b) + sizeof(bfhead_t);
- if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
- (memcmp(lerr, lerr + 1,
- (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
- (void) __kmp_printf_no_lock(
- "(Contents of above free block have been overstored.)\n");
- bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
- KMP_DEBUG_ASSERT(0);
- return 0;
- }
+ lerr = ((char *)b) + sizeof(bfhead_t);
+ if ((bs > sizeof(bfhead_t)) &&
+ ((*lerr != 0x55) ||
+ (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
+ 0))) {
+ (void)__kmp_printf_no_lock(
+ "(Contents of above free block have been overstored.)\n");
+ bufdump(th, (void *)(((char *)b) + sizeof(bhead_t)));
+ KMP_DEBUG_ASSERT(0);
+ return 0;
+ }
#endif /* FreeWipe */
- }
- b = BFH(((char *) b) + bs);
}
- return 1;
+ b = BFH(((char *)b) + bs);
+ }
+ return 1;
}
#endif /* KMP_DEBUG */
-/* ------------------------------------------------------------------------ */
-
-void
-__kmp_initialize_bget( kmp_info_t *th )
-{
- KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
+void __kmp_initialize_bget(kmp_info_t *th) {
+ KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
- set_thr_data( th );
+ set_thr_data(th);
- bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
- (bufsize) __kmp_malloc_pool_incr );
+ bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
+ (bufsize)__kmp_malloc_pool_incr);
}
-void
-__kmp_finalize_bget( kmp_info_t *th )
-{
- thr_data_t *thr;
- bfhead_t *b;
+void __kmp_finalize_bget(kmp_info_t *th) {
+ thr_data_t *thr;
+ bfhead_t *b;
- KMP_DEBUG_ASSERT( th != 0 );
+ KMP_DEBUG_ASSERT(th != 0);
#if BufStats
- thr = (thr_data_t *) th->th.th_local.bget_data;
- KMP_DEBUG_ASSERT( thr != NULL );
- b = thr->last_pool;
-
- /* If a block-release function is defined, and this free buffer
- constitutes the entire block, release it. Note that pool_len
- is defined in such a way that the test will fail unless all
- pool blocks are the same size. */
-
- /* Deallocate the last pool if one exists because we no longer do it in brel() */
- if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
- b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
- {
- KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
- KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
- KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
-
- /* Unlink the buffer from the free list */
- __kmp_bget_remove_from_freelist( b );
-
- KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
-
- (*thr->relfcn)(b);
- thr->numprel++; /* Nr of expansion block releases */
- thr->numpblk--; /* Total number of blocks */
- KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
- }
+ thr = (thr_data_t *)th->th.th_local.bget_data;
+ KMP_DEBUG_ASSERT(thr != NULL);
+ b = thr->last_pool;
+
+ /* If a block-release function is defined, and this free buffer constitutes
+ the entire block, release it. Note that pool_len is defined in such a way
+ that the test will fail unless all pool blocks are the same size. */
+
+ // Deallocate the last pool if one exists because we no longer do it in brel()
+ if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
+ b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
+ KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
+ KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
+ KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
+ b->bh.bb.bsize);
+
+ /* Unlink the buffer from the free list */
+ __kmp_bget_remove_from_freelist(b);
+
+ KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
+
+ (*thr->relfcn)(b);
+ thr->numprel++; /* Nr of expansion block releases */
+ thr->numpblk--; /* Total number of blocks */
+ KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
+ }
#endif /* BufStats */
- /* Deallocate bget_data */
- if ( th->th.th_local.bget_data != NULL ) {
- __kmp_free( th->th.th_local.bget_data );
- th->th.th_local.bget_data = NULL;
- }; // if
+ /* Deallocate bget_data */
+ if (th->th.th_local.bget_data != NULL) {
+ __kmp_free(th->th.th_local.bget_data);
+ th->th.th_local.bget_data = NULL;
+ }; // if
}
-void
-kmpc_set_poolsize( size_t size )
-{
- bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
- (bget_release_t) free, (bufsize) size );
+void kmpc_set_poolsize(size_t size) {
+ bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
+ (bget_release_t)free, (bufsize)size);
}
-size_t
-kmpc_get_poolsize( void )
-{
- thr_data_t *p;
+size_t kmpc_get_poolsize(void) {
+ thr_data_t *p;
- p = get_thr_data( __kmp_get_thread() );
+ p = get_thr_data(__kmp_get_thread());
- return p->exp_incr;
+ return p->exp_incr;
}
-void
-kmpc_set_poolmode( int mode )
-{
- thr_data_t *p;
+void kmpc_set_poolmode(int mode) {
+ thr_data_t *p;
- if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
- p = get_thr_data( __kmp_get_thread() );
- p->mode = (bget_mode_t) mode;
- }
+ if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
+ mode == bget_mode_best) {
+ p = get_thr_data(__kmp_get_thread());
+ p->mode = (bget_mode_t)mode;
+ }
}
-int
-kmpc_get_poolmode( void )
-{
- thr_data_t *p;
+int kmpc_get_poolmode(void) {
+ thr_data_t *p;
- p = get_thr_data( __kmp_get_thread() );
+ p = get_thr_data(__kmp_get_thread());
- return p->mode;
+ return p->mode;
}
-void
-kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
-{
- kmp_info_t *th = __kmp_get_thread();
- bufsize a, b;
+void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
+ kmp_info_t *th = __kmp_get_thread();
+ bufsize a, b;
- __kmp_bget_dequeue( th ); /* Release any queued buffers */
+ __kmp_bget_dequeue(th); /* Release any queued buffers */
- bcheck( th, &a, &b );
+ bcheck(th, &a, &b);
- *maxmem = a;
- *allmem = b;
+ *maxmem = a;
+ *allmem = b;
}
-void
-kmpc_poolprint( void )
-{
- kmp_info_t *th = __kmp_get_thread();
+void kmpc_poolprint(void) {
+ kmp_info_t *th = __kmp_get_thread();
- __kmp_bget_dequeue( th ); /* Release any queued buffers */
+ __kmp_bget_dequeue(th); /* Release any queued buffers */
- bfreed( th );
+ bfreed(th);
}
#endif // #if KMP_USE_BGET
-/* ------------------------------------------------------------------------ */
-
-void *
-kmpc_malloc( size_t size )
-{
- void * ptr;
- ptr = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
- if( ptr != NULL ) {
- // save allocated pointer just before one returned to user
- *(void**)ptr = ptr;
- ptr = (void**)ptr + 1;
- }
- return ptr;
-}
-
-#define IS_POWER_OF_TWO(n) (((n)&((n)-1))==0)
-
-void *
-kmpc_aligned_malloc( size_t size, size_t alignment )
-{
- void * ptr;
- void * ptr_allocated;
- KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too big
- if( !IS_POWER_OF_TWO(alignment) ) {
- // AC: do we need to issue a warning here?
- errno = EINVAL;
- return NULL;
- }
- size = size + sizeof( void* ) + alignment;
- ptr_allocated = bget( __kmp_entry_thread(), (bufsize)size );
- if( ptr_allocated != NULL ) {
- // save allocated pointer just before one returned to user
- ptr = (void*)(((kmp_uintptr_t)ptr_allocated + sizeof( void* ) + alignment) & ~(alignment - 1));
- *((void**)ptr - 1) = ptr_allocated;
- } else {
- ptr = NULL;
- }
- return ptr;
-}
-
-void *
-kmpc_calloc( size_t nelem, size_t elsize )
-{
- void * ptr;
- ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize + sizeof(ptr)) );
- if( ptr != NULL ) {
- // save allocated pointer just before one returned to user
- *(void**)ptr = ptr;
- ptr = (void**)ptr + 1;
- }
- return ptr;
-}
-
-void *
-kmpc_realloc( void * ptr, size_t size )
-{
- void * result = NULL;
- if ( ptr == NULL ) {
- // If pointer is NULL, realloc behaves like malloc.
- result = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) );
- // save allocated pointer just before one returned to user
- if( result != NULL ) {
- *(void**)result = result;
- result = (void**)result + 1;
- }
- } else if ( size == 0 ) {
- // If size is 0, realloc behaves like free.
- // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
- // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
- KMP_ASSERT(*((void**)ptr - 1));
- brel( __kmp_get_thread(), *((void**)ptr - 1) );
- } else {
- result = bgetr( __kmp_entry_thread(), *((void**)ptr - 1), (bufsize)(size + sizeof(ptr)) );
- if( result != NULL ) {
- *(void**)result = result;
- result = (void**)result + 1;
- }
- }; // if
- return result;
-}
-
-/* NOTE: the library must have already been initialized by a previous allocate */
-
-void
-kmpc_free( void * ptr )
-{
- if ( ! __kmp_init_serial ) {
- return;
- }; // if
- if ( ptr != NULL ) {
- kmp_info_t *th = __kmp_get_thread();
- __kmp_bget_dequeue( th ); /* Release any queued buffers */
- // extract allocated pointer and free it
- KMP_ASSERT(*((void**)ptr - 1));
- brel( th, *((void**)ptr - 1) );
- };
-}
-
-
-/* ------------------------------------------------------------------------ */
-
-void *
-___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
-{
- void * ptr;
- KE_TRACE( 30, (
- "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
- th,
- (int) size
- KMP_SRC_LOC_PARM
- ) );
- ptr = bget( th, (bufsize) size );
- KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
- return ptr;
-}
-
-void *
-___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
-{
- void * ptr;
- KE_TRACE( 30, (
- "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
- th,
- (int) nelem,
- (int) elsize
- KMP_SRC_LOC_PARM
- ) );
- ptr = bgetz( th, (bufsize) (nelem * elsize) );
- KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
- return ptr;
-}
-
-void *
-___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
-{
- KE_TRACE( 30, (
- "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
- th,
- ptr,
- (int) size
- KMP_SRC_LOC_PARM
- ) );
- ptr = bgetr( th, ptr, (bufsize) size );
- KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
- return ptr;
-}
-
-void
-___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
-{
- KE_TRACE( 30, (
- "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
- th,
- ptr
- KMP_SRC_LOC_PARM
- ) );
- if ( ptr != NULL ) {
- __kmp_bget_dequeue( th ); /* Release any queued buffers */
- brel( th, ptr );
- }
- KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
-}
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-/*
- If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
- may be useful for debugging memory corruptions, used freed pointers, etc.
-*/
+void *kmpc_malloc(size_t size) {
+ void *ptr;
+ ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
+ if (ptr != NULL) {
+ // save allocated pointer just before one returned to user
+ *(void **)ptr = ptr;
+ ptr = (void **)ptr + 1;
+ }
+ return ptr;
+}
+
+#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
+
+void *kmpc_aligned_malloc(size_t size, size_t alignment) {
+ void *ptr;
+ void *ptr_allocated;
+ KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
+ if (!IS_POWER_OF_TWO(alignment)) {
+ // AC: do we need to issue a warning here?
+ errno = EINVAL;
+ return NULL;
+ }
+ size = size + sizeof(void *) + alignment;
+ ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
+ if (ptr_allocated != NULL) {
+ // save allocated pointer just before one returned to user
+ ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
+ ~(alignment - 1));
+ *((void **)ptr - 1) = ptr_allocated;
+ } else {
+ ptr = NULL;
+ }
+ return ptr;
+}
+
+void *kmpc_calloc(size_t nelem, size_t elsize) {
+ void *ptr;
+ ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
+ if (ptr != NULL) {
+ // save allocated pointer just before one returned to user
+ *(void **)ptr = ptr;
+ ptr = (void **)ptr + 1;
+ }
+ return ptr;
+}
+
+void *kmpc_realloc(void *ptr, size_t size) {
+ void *result = NULL;
+ if (ptr == NULL) {
+ // If pointer is NULL, realloc behaves like malloc.
+ result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
+ // save allocated pointer just before one returned to user
+ if (result != NULL) {
+ *(void **)result = result;
+ result = (void **)result + 1;
+ }
+ } else if (size == 0) {
+ // If size is 0, realloc behaves like free.
+ // The thread must be registered by the call to kmpc_malloc() or
+ // kmpc_calloc() before.
+ // So it should be safe to call __kmp_get_thread(), not
+ // __kmp_entry_thread().
+ KMP_ASSERT(*((void **)ptr - 1));
+ brel(__kmp_get_thread(), *((void **)ptr - 1));
+ } else {
+ result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
+ (bufsize)(size + sizeof(ptr)));
+ if (result != NULL) {
+ *(void **)result = result;
+ result = (void **)result + 1;
+ }
+ }; // if
+ return result;
+}
+
+// NOTE: the library must have already been initialized by a previous allocate
+void kmpc_free(void *ptr) {
+ if (!__kmp_init_serial) {
+ return;
+ }; // if
+ if (ptr != NULL) {
+ kmp_info_t *th = __kmp_get_thread();
+ __kmp_bget_dequeue(th); /* Release any queued buffers */
+ // extract allocated pointer and free it
+ KMP_ASSERT(*((void **)ptr - 1));
+ brel(th, *((void **)ptr - 1));
+ };
+}
+
+void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
+ void *ptr;
+ KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
+ (int)size KMP_SRC_LOC_PARM));
+ ptr = bget(th, (bufsize)size);
+ KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
+ return ptr;
+}
+
+void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
+ size_t elsize KMP_SRC_LOC_DECL) {
+ void *ptr;
+ KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
+ (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
+ ptr = bgetz(th, (bufsize)(nelem * elsize));
+ KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
+ return ptr;
+}
+
+void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
+ size_t size KMP_SRC_LOC_DECL) {
+ KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
+ ptr, (int)size KMP_SRC_LOC_PARM));
+ ptr = bgetr(th, ptr, (bufsize)size);
+ KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
+ return ptr;
+}
+
+void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
+ KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
+ ptr KMP_SRC_LOC_PARM));
+ if (ptr != NULL) {
+ __kmp_bget_dequeue(th); /* Release any queued buffers */
+ brel(th, ptr);
+ }
+ KE_TRACE(30, ("<- __kmp_thread_free()\n"));
+}
+
+/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
+ memory leaks, but it may be useful for debugging memory corruptions, used
+ freed pointers, etc. */
/* #define LEAK_MEMORY */
-
-struct kmp_mem_descr { // Memory block descriptor.
- void * ptr_allocated; // Pointer returned by malloc(), subject for free().
- size_t size_allocated; // Size of allocated memory block.
- void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
- size_t size_aligned; // Size of aligned memory block.
+struct kmp_mem_descr { // Memory block descriptor.
+ void *ptr_allocated; // Pointer returned by malloc(), subject for free().
+ size_t size_allocated; // Size of allocated memory block.
+ void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
+ size_t size_aligned; // Size of aligned memory block.
};
typedef struct kmp_mem_descr kmp_mem_descr_t;
-/*
- Allocate memory on requested boundary, fill allocated memory with 0x00.
- NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
- Must use __kmp_free when freeing memory allocated by this routine!
- */
-static
-void *
-___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
-{
- /*
- __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
- return properly aligned pointer. Original pointer returned by malloc() and size of allocated
- block is saved in descriptor just before the aligned pointer. This information used by
- __kmp_free() -- it has to pass to free() original pointer, not aligned one.
-
- +---------+------------+-----------------------------------+---------+
- | padding | descriptor | aligned block | padding |
- +---------+------------+-----------------------------------+---------+
- ^ ^
- | |
- | +- Aligned pointer returned to caller
- +- Pointer returned by malloc()
-
- Aligned block is filled with zeros, paddings are filled with 0xEF.
- */
-
- kmp_mem_descr_t descr;
- kmp_uintptr_t addr_allocated; // Address returned by malloc().
- kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
- kmp_uintptr_t addr_descr; // Address of memory block descriptor.
-
- KE_TRACE( 25, (
- "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
- (int) size,
- (int) alignment
- KMP_SRC_LOC_PARM
- ) );
-
- KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
- KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
- // Make sure kmp_uintptr_t is enough to store addresses.
-
- descr.size_aligned = size;
- descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
+/* Allocate memory on requested boundary, fill allocated memory with 0x00.
+ NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
+ error. Must use __kmp_free when freeing memory allocated by this routine! */
+static void *___kmp_allocate_align(size_t size,
+ size_t alignment KMP_SRC_LOC_DECL) {
+ /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
+ requested to return properly aligned pointer. Original pointer returned
+ by malloc() and size of allocated block is saved in descriptor just
+ before the aligned pointer. This information used by __kmp_free() -- it
+ has to pass to free() original pointer, not aligned one.
+
+ +---------+------------+-----------------------------------+---------+
+ | padding | descriptor | aligned block | padding |
+ +---------+------------+-----------------------------------+---------+
+ ^ ^
+ | |
+ | +- Aligned pointer returned to caller
+ +- Pointer returned by malloc()
+
+ Aligned block is filled with zeros, paddings are filled with 0xEF. */
+
+ kmp_mem_descr_t descr;
+ kmp_uintptr_t addr_allocated; // Address returned by malloc().
+ kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
+ kmp_uintptr_t addr_descr; // Address of memory block descriptor.
+
+ KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
+ (int)size, (int)alignment KMP_SRC_LOC_PARM));
+
+ KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
+ KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
+ // Make sure kmp_uintptr_t is enough to store addresses.
+
+ descr.size_aligned = size;
+ descr.size_allocated =
+ descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
#if KMP_DEBUG
- descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
+ descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
#else
- descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
+ descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
#endif
- KE_TRACE( 10, (
- " malloc( %d ) returned %p\n",
- (int) descr.size_allocated,
- descr.ptr_allocated
- ) );
- if ( descr.ptr_allocated == NULL ) {
- KMP_FATAL( OutOfHeapMemory );
- };
-
- addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
- addr_aligned =
- ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
- & ~ ( alignment - 1 );
- addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
-
- descr.ptr_aligned = (void *) addr_aligned;
-
- KE_TRACE( 26, (
- " ___kmp_allocate_align: "
- "ptr_allocated=%p, size_allocated=%d, "
- "ptr_aligned=%p, size_aligned=%d\n",
- descr.ptr_allocated,
- (int) descr.size_allocated,
- descr.ptr_aligned,
- (int) descr.size_aligned
- ) );
-
- KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
- KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
- KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
- KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
+ KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
+ descr.ptr_allocated));
+ if (descr.ptr_allocated == NULL) {
+ KMP_FATAL(OutOfHeapMemory);
+ };
+
+ addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
+ addr_aligned =
+ (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
+ addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
+
+ descr.ptr_aligned = (void *)addr_aligned;
+
+ KE_TRACE(26, (" ___kmp_allocate_align: "
+ "ptr_allocated=%p, size_allocated=%d, "
+ "ptr_aligned=%p, size_aligned=%d\n",
+ descr.ptr_allocated, (int)descr.size_allocated,
+ descr.ptr_aligned, (int)descr.size_aligned));
+
+ KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
+ KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
+ KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
+ addr_allocated + descr.size_allocated);
+ KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
#ifdef KMP_DEBUG
- memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
- // Fill allocated memory block with 0xEF.
+ memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
+// Fill allocated memory block with 0xEF.
#endif
- memset( descr.ptr_aligned, 0x00, descr.size_aligned );
- // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
- // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
- // bytes remain filled with 0xEF in debugging library.)
- * ( (kmp_mem_descr_t *) addr_descr ) = descr;
+ memset(descr.ptr_aligned, 0x00, descr.size_aligned);
+ // Fill the aligned memory block (which is intended for using by caller) with
+ // 0x00. Do not
+ // put this filling under KMP_DEBUG condition! Many callers expect zeroed
+ // memory. (Padding
+ // bytes remain filled with 0xEF in debugging library.)
+ *((kmp_mem_descr_t *)addr_descr) = descr;
- KMP_MB();
+ KMP_MB();
- KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
- return descr.ptr_aligned;
+ KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
+ return descr.ptr_aligned;
} // func ___kmp_allocate_align
-
-/*
- Allocate memory on cache line boundary, fill allocated memory with 0x00.
- Do not call this func directly! Use __kmp_allocate macro instead.
- NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
- Must use __kmp_free when freeing memory allocated by this routine!
- */
-void *
-___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
-{
- void * ptr;
- KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
- ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
- KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
- return ptr;
+/* Allocate memory on cache line boundary, fill allocated memory with 0x00.
+ Do not call this func directly! Use __kmp_allocate macro instead.
+ NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
+ error. Must use __kmp_free when freeing memory allocated by this routine! */
+void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
+ void *ptr;
+ KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
+ (int)size KMP_SRC_LOC_PARM));
+ ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
+ KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
+ return ptr;
} // func ___kmp_allocate
-#if (BUILD_MEMORY==FIRST_TOUCH)
-void *
-__kmp_ft_page_allocate(size_t size)
-{
+#if (BUILD_MEMORY == FIRST_TOUCH)
+void *__kmp_ft_page_allocate(size_t size) {
void *adr, *aadr;
const int page_size = KMP_GET_PAGE_SIZE();
- adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
+ adr = (void *)__kmp_thread_malloc(__kmp_get_thread(),
size + page_size + KMP_PTR_SKIP);
- if ( adr == 0 )
- KMP_FATAL( OutOfHeapMemory );
+ if (adr == 0)
+ KMP_FATAL(OutOfHeapMemory);
/* check to see if adr is on a page boundary. */
- if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
+ if (((kmp_uintptr_t)adr & (page_size - 1)) == 0)
/* nothing to do if adr is already on a page boundary. */
aadr = adr;
else
/* else set aadr to the first page boundary in the allocated memory. */
- aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
+ aadr = (void *)(((kmp_uintptr_t)adr + page_size) & ~(page_size - 1));
/* the first touch by the owner thread. */
- *((void**)aadr) = adr;
+ *((void **)aadr) = adr;
/* skip the memory space used for storing adr above. */
- return (void*)((char*)aadr + KMP_PTR_SKIP);
+ return (void *)((char *)aadr + KMP_PTR_SKIP);
}
#endif
-/*
- Allocate memory on page boundary, fill allocated memory with 0x00.
- Does not call this func directly! Use __kmp_page_allocate macro instead.
- NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
- Must use __kmp_free when freeing memory allocated by this routine!
- */
-void *
-___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
-{
- int page_size = 8 * 1024;
- void * ptr;
-
- KE_TRACE( 25, (
- "-> __kmp_page_allocate( %d ) called from %s:%d\n",
- (int) size
- KMP_SRC_LOC_PARM
- ) );
- ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
- KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
- return ptr;
+/* Allocate memory on page boundary, fill allocated memory with 0x00.
+ Does not call this func directly! Use __kmp_page_allocate macro instead.
+ NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
+ error. Must use __kmp_free when freeing memory allocated by this routine! */
+void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
+ int page_size = 8 * 1024;
+ void *ptr;
+
+ KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
+ (int)size KMP_SRC_LOC_PARM));
+ ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
+ KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
+ return ptr;
} // ___kmp_page_allocate
-/*
- Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
- In debug mode, fill the memory block with 0xEF before call to free().
-*/
-void
-___kmp_free( void * ptr KMP_SRC_LOC_DECL )
-{
- kmp_mem_descr_t descr;
- kmp_uintptr_t addr_allocated; // Address returned by malloc().
- kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
-
- KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
- KMP_ASSERT( ptr != NULL );
-
- descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
-
- KE_TRACE( 26, ( " __kmp_free: "
- "ptr_allocated=%p, size_allocated=%d, "
- "ptr_aligned=%p, size_aligned=%d\n",
- descr.ptr_allocated, (int) descr.size_allocated,
- descr.ptr_aligned, (int) descr.size_aligned ));
-
- addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
- addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
-
- KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
- KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
- KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
- KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
- KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
-
- #ifdef KMP_DEBUG
- memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
- // Fill memory block with 0xEF, it helps catch using freed memory.
- #endif
-
- #ifndef LEAK_MEMORY
- KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
- # ifdef KMP_DEBUG
- _free_src_loc( descr.ptr_allocated, _file_, _line_ );
- # else
- free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
- # endif
- #endif
- KMP_MB();
- KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
-} // func ___kmp_free
+/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
+ In debug mode, fill the memory block with 0xEF before call to free(). */
+void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
+ kmp_mem_descr_t descr;
+ kmp_uintptr_t addr_allocated; // Address returned by malloc().
+ kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
+
+ KE_TRACE(25,
+ ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
+ KMP_ASSERT(ptr != NULL);
+
+ descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
+
+ KE_TRACE(26, (" __kmp_free: "
+ "ptr_allocated=%p, size_allocated=%d, "
+ "ptr_aligned=%p, size_aligned=%d\n",
+ descr.ptr_allocated, (int)descr.size_allocated,
+ descr.ptr_aligned, (int)descr.size_aligned));
+
+ addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
+ addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
+
+ KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
+ KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
+ KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
+ KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
+ KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
+ addr_allocated + descr.size_allocated);
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
+#ifdef KMP_DEBUG
+ memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
+// Fill memory block with 0xEF, it helps catch using freed memory.
+#endif
+
+#ifndef LEAK_MEMORY
+ KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
+#ifdef KMP_DEBUG
+ _free_src_loc(descr.ptr_allocated, _file_, _line_);
+#else
+ free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
+#endif
+#endif
+ KMP_MB();
+ KE_TRACE(25, ("<- __kmp_free() returns\n"));
+} // func ___kmp_free
#if USE_FAST_MEMORY == 3
// Allocate fast memory by first scanning the thread's free lists
@@ -1825,254 +1647,257 @@ ___kmp_free( void * ptr KMP_SRC_LOC_DECL
#define KMP_FREE_LIST_LIMIT 16
// Always use 128 bytes for determining buckets for caching memory blocks
-#define DCACHE_LINE 128
+#define DCACHE_LINE 128
-void *
-___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
-{
- void * ptr;
- int num_lines;
- int idx;
- int index;
- void * alloc_ptr;
- size_t alloc_size;
- kmp_mem_descr_t * descr;
-
- KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
- __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
-
- num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
- idx = num_lines - 1;
- KMP_DEBUG_ASSERT( idx >= 0 );
- if ( idx < 2 ) {
- index = 0; // idx is [ 0, 1 ], use first free list
- num_lines = 2; // 1, 2 cache lines or less than cache line
- } else if ( ( idx >>= 2 ) == 0 ) {
- index = 1; // idx is [ 2, 3 ], use second free list
- num_lines = 4; // 3, 4 cache lines
- } else if ( ( idx >>= 2 ) == 0 ) {
- index = 2; // idx is [ 4, 15 ], use third free list
- num_lines = 16; // 5, 6, ..., 16 cache lines
- } else if ( ( idx >>= 2 ) == 0 ) {
- index = 3; // idx is [ 16, 63 ], use fourth free list
- num_lines = 64; // 17, 18, ..., 64 cache lines
- } else {
- goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
- }
+void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
+ void *ptr;
+ int num_lines;
+ int idx;
+ int index;
+ void *alloc_ptr;
+ size_t alloc_size;
+ kmp_mem_descr_t *descr;
+
+ KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
+ __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
+
+ num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
+ idx = num_lines - 1;
+ KMP_DEBUG_ASSERT(idx >= 0);
+ if (idx < 2) {
+ index = 0; // idx is [ 0, 1 ], use first free list
+ num_lines = 2; // 1, 2 cache lines or less than cache line
+ } else if ((idx >>= 2) == 0) {
+ index = 1; // idx is [ 2, 3 ], use second free list
+ num_lines = 4; // 3, 4 cache lines
+ } else if ((idx >>= 2) == 0) {
+ index = 2; // idx is [ 4, 15 ], use third free list
+ num_lines = 16; // 5, 6, ..., 16 cache lines
+ } else if ((idx >>= 2) == 0) {
+ index = 3; // idx is [ 16, 63 ], use fourth free list
+ num_lines = 64; // 17, 18, ..., 64 cache lines
+ } else {
+ goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
+ }
+
+ ptr = this_thr->th.th_free_lists[index].th_free_list_self;
+ if (ptr != NULL) {
+ // pop the head of no-sync free list
+ this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
+ KMP_DEBUG_ASSERT(
+ this_thr ==
+ ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
+ ->ptr_aligned);
+ goto end;
+ };
+ ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
+ if (ptr != NULL) {
+ // no-sync free list is empty, use sync free list (filled in by other
+ // threads only)
+ // pop the head of the sync free list, push NULL instead
+ while (!KMP_COMPARE_AND_STORE_PTR(
+ &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL)) {
+ KMP_CPU_PAUSE();
+ ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
+ }
+ // push the rest of chain into no-sync free list (can be NULL if there was
+ // the only block)
+ this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
+ KMP_DEBUG_ASSERT(
+ this_thr ==
+ ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
+ ->ptr_aligned);
+ goto end;
+ }
- ptr = this_thr->th.th_free_lists[index].th_free_list_self;
- if ( ptr != NULL ) {
- // pop the head of no-sync free list
- this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
- KMP_DEBUG_ASSERT( this_thr ==
- ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
- goto end;
- };
- ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
- if ( ptr != NULL ) {
- // no-sync free list is empty, use sync free list (filled in by other threads only)
- // pop the head of the sync free list, push NULL instead
- while ( ! KMP_COMPARE_AND_STORE_PTR(
- &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
- {
- KMP_CPU_PAUSE();
- ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
- }
- // push the rest of chain into no-sync free list (can be NULL if there was the only block)
- this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
- KMP_DEBUG_ASSERT( this_thr ==
- ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
- goto end;
- }
-
- alloc_call:
- // haven't found block in the free lists, thus allocate it
- size = num_lines * DCACHE_LINE;
-
- alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
- KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
- __kmp_gtid_from_thread( this_thr ), alloc_size ) );
- alloc_ptr = bget( this_thr, (bufsize) alloc_size );
-
- // align ptr to DCACHE_LINE
- ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
- descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
-
- descr->ptr_allocated = alloc_ptr; // remember allocated pointer
- // we don't need size_allocated
- descr->ptr_aligned = (void *)this_thr; // remember allocating thread
- // (it is already saved in bget buffer,
- // but we may want to use another allocator in future)
- descr->size_aligned = size;
-
- end:
- KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
- __kmp_gtid_from_thread( this_thr ), ptr ) );
- return ptr;
+alloc_call:
+ // haven't found block in the free lists, thus allocate it
+ size = num_lines * DCACHE_LINE;
+
+ alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
+ KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
+ "alloc_size %d\n",
+ __kmp_gtid_from_thread(this_thr), alloc_size));
+ alloc_ptr = bget(this_thr, (bufsize)alloc_size);
+
+ // align ptr to DCACHE_LINE
+ ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
+ DCACHE_LINE) &
+ ~(DCACHE_LINE - 1));
+ descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
+
+ descr->ptr_allocated = alloc_ptr; // remember allocated pointer
+ // we don't need size_allocated
+ descr->ptr_aligned = (void *)this_thr; // remember allocating thread
+ // (it is already saved in bget buffer,
+ // but we may want to use another allocator in future)
+ descr->size_aligned = size;
+
+end:
+ KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
+ __kmp_gtid_from_thread(this_thr), ptr));
+ return ptr;
} // func __kmp_fast_allocate
// Free fast memory and place it on the thread's free list if it is of
// the correct size.
-void
-___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
-{
- kmp_mem_descr_t * descr;
- kmp_info_t * alloc_thr;
- size_t size;
- size_t idx;
- int index;
-
- KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
- __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
- KMP_ASSERT( ptr != NULL );
-
- descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
-
- KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
- (int) descr->size_aligned ) );
-
- size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
-
- idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
- if ( idx == size ) {
- index = 0; // 2 cache lines
- } else if ( ( idx <<= 1 ) == size ) {
- index = 1; // 4 cache lines
- } else if ( ( idx <<= 2 ) == size ) {
- index = 2; // 16 cache lines
- } else if ( ( idx <<= 2 ) == size ) {
- index = 3; // 64 cache lines
+void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
+ kmp_mem_descr_t *descr;
+ kmp_info_t *alloc_thr;
+ size_t size;
+ size_t idx;
+ int index;
+
+ KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
+ __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
+ KMP_ASSERT(ptr != NULL);
+
+ descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
+
+ KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
+ (int)descr->size_aligned));
+
+ size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
+
+ idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
+ if (idx == size) {
+ index = 0; // 2 cache lines
+ } else if ((idx <<= 1) == size) {
+ index = 1; // 4 cache lines
+ } else if ((idx <<= 2) == size) {
+ index = 2; // 16 cache lines
+ } else if ((idx <<= 2) == size) {
+ index = 3; // 64 cache lines
+ } else {
+ KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
+ goto free_call; // 65 or more cache lines ( > 8KB )
+ }
+
+ alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
+ if (alloc_thr == this_thr) {
+ // push block to self no-sync free list, linking previous head (LIFO)
+ *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
+ this_thr->th.th_free_lists[index].th_free_list_self = ptr;
+ } else {
+ void *head = this_thr->th.th_free_lists[index].th_free_list_other;
+ if (head == NULL) {
+ // Create new free list
+ this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+ *((void **)ptr) = NULL; // mark the tail of the list
+ descr->size_allocated = (size_t)1; // head of the list keeps its length
} else {
- KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
- goto free_call; // 65 or more cache lines ( > 8KB )
- }
-
- alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
- if ( alloc_thr == this_thr ) {
- // push block to self no-sync free list, linking previous head (LIFO)
- *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
- this_thr->th.th_free_lists[index].th_free_list_self = ptr;
- } else {
- void * head = this_thr->th.th_free_lists[index].th_free_list_other;
- if ( head == NULL ) {
- // Create new free list
- this_thr->th.th_free_lists[index].th_free_list_other = ptr;
- *((void **)ptr) = NULL; // mark the tail of the list
- descr->size_allocated = (size_t)1; // head of the list keeps its length
- } else {
- // need to check existed "other" list's owner thread and size of queue
- kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
- kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
- size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
- if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
- // we can add current task to "other" list, no sync needed
- *((void **)ptr) = head;
- descr->size_allocated = q_sz;
- this_thr->th.th_free_lists[index].th_free_list_other = ptr;
- } else {
- // either queue blocks owner is changing or size limit exceeded
- // return old queue to allocating thread (q_th) synchroneously,
- // and start new list for alloc_thr's tasks
- void * old_ptr;
- void * tail = head;
- void * next = *((void **)head);
- while ( next != NULL ) {
- KMP_DEBUG_ASSERT(
- // queue size should decrease by 1 each step through the list
- ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
- ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
- tail = next; // remember tail node
- next = *((void **)next);
- }
- KMP_DEBUG_ASSERT( q_th != NULL );
- // push block to owner's sync free list
- old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
- /* the next pointer must be set before setting free_list to ptr to avoid
- exposing a broken list to other threads, even for an instant. */
- *((void **)tail) = old_ptr;
-
- while ( ! KMP_COMPARE_AND_STORE_PTR(
- &q_th->th.th_free_lists[index].th_free_list_sync,
- old_ptr,
- head ) )
- {
- KMP_CPU_PAUSE();
- old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
- *((void **)tail) = old_ptr;
- }
-
- // start new list of not-selt tasks
- this_thr->th.th_free_lists[index].th_free_list_other = ptr;
- *((void **)ptr) = NULL;
- descr->size_allocated = (size_t)1; // head of queue keeps its length
- }
- }
- }
- goto end;
+ // need to check existed "other" list's owner thread and size of queue
+ kmp_mem_descr_t *dsc =
+ (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
+ // allocating thread, same for all queue nodes
+ kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
+ size_t q_sz =
+ dsc->size_allocated + 1; // new size in case we add current task
+ if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
+ // we can add current task to "other" list, no sync needed
+ *((void **)ptr) = head;
+ descr->size_allocated = q_sz;
+ this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+ } else {
+ // either queue blocks owner is changing or size limit exceeded
+ // return old queue to allocating thread (q_th) synchroneously,
+ // and start new list for alloc_thr's tasks
+ void *old_ptr;
+ void *tail = head;
+ void *next = *((void **)head);
+ while (next != NULL) {
+ KMP_DEBUG_ASSERT(
+ // queue size should decrease by 1 each step through the list
+ ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
+ ->size_allocated +
+ 1 ==
+ ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
+ ->size_allocated);
+ tail = next; // remember tail node
+ next = *((void **)next);
+ }
+ KMP_DEBUG_ASSERT(q_th != NULL);
+ // push block to owner's sync free list
+ old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
+ /* the next pointer must be set before setting free_list to ptr to avoid
+ exposing a broken list to other threads, even for an instant. */
+ *((void **)tail) = old_ptr;
- free_call:
- KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
- __kmp_gtid_from_thread( this_thr), size ) );
- __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
- brel( this_thr, descr->ptr_allocated );
+ while (!KMP_COMPARE_AND_STORE_PTR(
+ &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
+ KMP_CPU_PAUSE();
+ old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
+ *((void **)tail) = old_ptr;
+ }
+
+ // start new list of not-selt tasks
+ this_thr->th.th_free_lists[index].th_free_list_other = ptr;
+ *((void **)ptr) = NULL;
+ descr->size_allocated = (size_t)1; // head of queue keeps its length
+ }
+ }
+ }
+ goto end;
+
+free_call:
+ KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
+ __kmp_gtid_from_thread(this_thr), size));
+ __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
+ brel(this_thr, descr->ptr_allocated);
- end:
- KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
+end:
+ KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
} // func __kmp_fast_free
-
// Initialize the thread free lists related to fast memory
// Only do this when a thread is initially created.
-void
-__kmp_initialize_fast_memory( kmp_info_t *this_thr )
-{
- KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
+void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
+ KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
- memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
+ memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
}
// Free the memory in the thread free lists related to fast memory
// Only do this when a thread is being reaped (destroyed).
-void
-__kmp_free_fast_memory( kmp_info_t *th )
-{
- // Suppose we use BGET underlying allocator, walk through its structures...
- int bin;
- thr_data_t * thr = get_thr_data( th );
- void ** lst = NULL;
-
- KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
- __kmp_gtid_from_thread( th ) ) );
-
- __kmp_bget_dequeue( th ); // Release any queued buffers
-
- // Dig through free lists and extract all allocated blocks
- for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
- bfhead_t * b = thr->freelist[ bin ].ql.flink;
- while ( b != &thr->freelist[ bin ] ) {
- if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
- *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
- lst = (void**)b; // push b into lst
- }
- b = b->ql.flink; // get next buffer
- }
- }
- while ( lst != NULL ) {
- void * next = *lst;
- KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
- lst, next, th, __kmp_gtid_from_thread( th ) ) );
- (*thr->relfcn)(lst);
- #if BufStats
- // count blocks to prevent problems in __kmp_finalize_bget()
- thr->numprel++; /* Nr of expansion block releases */
- thr->numpblk--; /* Total number of blocks */
- #endif
- lst = (void**)next;
- }
+void __kmp_free_fast_memory(kmp_info_t *th) {
+ // Suppose we use BGET underlying allocator, walk through its structures...
+ int bin;
+ thr_data_t *thr = get_thr_data(th);
+ void **lst = NULL;
+
+ KE_TRACE(
+ 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
+
+ __kmp_bget_dequeue(th); // Release any queued buffers
+
+ // Dig through free lists and extract all allocated blocks
+ for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
+ bfhead_t *b = thr->freelist[bin].ql.flink;
+ while (b != &thr->freelist[bin]) {
+ if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
+ *((void **)b) =
+ lst; // link the list (override bthr, but keep flink yet)
+ lst = (void **)b; // push b into lst
+ }
+ b = b->ql.flink; // get next buffer
+ }
+ }
+ while (lst != NULL) {
+ void *next = *lst;
+ KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
+ lst, next, th, __kmp_gtid_from_thread(th)));
+ (*thr->relfcn)(lst);
+#if BufStats
+ // count blocks to prevent problems in __kmp_finalize_bget()
+ thr->numprel++; /* Nr of expansion block releases */
+ thr->numpblk--; /* Total number of blocks */
+#endif
+ lst = (void **)next;
+ }
- KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
- __kmp_gtid_from_thread( th ) ) );
+ KE_TRACE(
+ 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
}
#endif // USE_FAST_MEMORY
More information about the Openmp-commits
mailing list