[Openmp-commits] [openmp] r302929 - Clang-format and whitespace cleanup of source code
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Fri May 12 11:01:35 PDT 2017
Modified: openmp/trunk/runtime/src/kmp_stats.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats.cpp Fri May 12 13:01:32 2017
@@ -12,196 +12,186 @@
//
//===----------------------------------------------------------------------===//
+
#include "kmp.h"
-#include "kmp_str.h"
#include "kmp_lock.h"
#include "kmp_stats.h"
+#include "kmp_str.h"
#include <algorithm>
-#include <sstream>
-#include <iomanip>
-#include <stdlib.h> // for atexit
#include <ctime>
+#include <iomanip>
+#include <sstream>
+#include <stdlib.h> // for atexit
#define STRINGIZE2(x) #x
#define STRINGIZE(x) STRINGIZE2(x)
-#define expandName(name,flags,ignore) {STRINGIZE(name),flags},
+#define expandName(name, flags, ignore) {STRINGIZE(name), flags},
statInfo timeStat::timerInfo[] = {
- KMP_FOREACH_TIMER(expandName,0)
- {"TIMER_LAST", 0}
-};
+ KMP_FOREACH_TIMER(expandName, 0){"TIMER_LAST", 0}};
const statInfo counter::counterInfo[] = {
- KMP_FOREACH_COUNTER(expandName,0)
- {"COUNTER_LAST", 0}
-};
+ KMP_FOREACH_COUNTER(expandName, 0){"COUNTER_LAST", 0}};
#undef expandName
-#define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0},
+#define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0},
kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
- KMP_FOREACH_TIMER(expandName,0)
- {0.0,0.0,0.0}
-};
+ KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
#undef expandName
-const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = {
- {1.0, 0.0, 0.0}, // red
- {1.0, 0.6, 0.0}, // orange
- {1.0, 1.0, 0.0}, // yellow
- {0.0, 1.0, 0.0}, // green
- {0.0, 0.0, 1.0}, // blue
- {0.6, 0.2, 0.8}, // purple
- {1.0, 0.0, 1.0}, // magenta
- {0.0, 0.4, 0.2}, // dark green
- {1.0, 1.0, 0.6}, // light yellow
- {0.6, 0.4, 0.6}, // dirty purple
- {0.0, 1.0, 1.0}, // cyan
- {1.0, 0.4, 0.8}, // pink
- {0.5, 0.5, 0.5}, // grey
- {0.8, 0.7, 0.5}, // brown
- {0.6, 0.6, 1.0}, // light blue
- {1.0, 0.7, 0.5}, // peach
- {0.8, 0.5, 1.0}, // lavender
- {0.6, 0.0, 0.0}, // dark red
- {0.7, 0.6, 0.0}, // gold
- {0.0, 0.0, 0.0} // black
+const kmp_stats_output_module::rgb_color
+ kmp_stats_output_module::globalColorArray[] = {
+ {1.0, 0.0, 0.0}, // red
+ {1.0, 0.6, 0.0}, // orange
+ {1.0, 1.0, 0.0}, // yellow
+ {0.0, 1.0, 0.0}, // green
+ {0.0, 0.0, 1.0}, // blue
+ {0.6, 0.2, 0.8}, // purple
+ {1.0, 0.0, 1.0}, // magenta
+ {0.0, 0.4, 0.2}, // dark green
+ {1.0, 1.0, 0.6}, // light yellow
+ {0.6, 0.4, 0.6}, // dirty purple
+ {0.0, 1.0, 1.0}, // cyan
+ {1.0, 0.4, 0.8}, // pink
+ {0.5, 0.5, 0.5}, // grey
+ {0.8, 0.7, 0.5}, // brown
+ {0.6, 0.6, 1.0}, // light blue
+ {1.0, 0.7, 0.5}, // peach
+ {0.8, 0.5, 1.0}, // lavender
+ {0.6, 0.0, 0.0}, // dark red
+ {0.7, 0.6, 0.0}, // gold
+ {0.0, 0.0, 0.0} // black
};
// Ensure that the atexit handler only runs once.
static uint32_t statsPrinted = 0;
// output interface
-static kmp_stats_output_module* __kmp_stats_global_output = NULL;
+static kmp_stats_output_module *__kmp_stats_global_output = NULL;
-/* ****************************************************** */
/* ************* statistic member functions ************* */
-void statistic::addSample(double sample)
-{
- double delta = sample - meanVal;
-
- sampleCount = sampleCount + 1;
- meanVal = meanVal + delta/sampleCount;
- m2 = m2 + delta*(sample - meanVal);
-
- minVal = std::min(minVal, sample);
- maxVal = std::max(maxVal, sample);
-}
-
-statistic & statistic::operator+= (const statistic & other)
-{
- if (sampleCount == 0)
- {
- *this = other;
- return *this;
- }
+void statistic::addSample(double sample) {
+ double delta = sample - meanVal;
- uint64_t newSampleCount = sampleCount + other.sampleCount;
- double dnsc = double(newSampleCount);
- double dsc = double(sampleCount);
- double dscBydnsc = dsc/dnsc;
- double dosc = double(other.sampleCount);
- double delta = other.meanVal - meanVal;
-
- // Try to order these calculations to avoid overflows.
- // If this were Fortran, then the compiler would not be able to re-order over brackets.
- // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition
- // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation
- // really is associative (which floating addition isn't...)).
- meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc);
- m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta;
- minVal = std::min (minVal, other.minVal);
- maxVal = std::max (maxVal, other.maxVal);
- sampleCount = newSampleCount;
+ sampleCount = sampleCount + 1;
+ meanVal = meanVal + delta / sampleCount;
+ m2 = m2 + delta * (sample - meanVal);
-
- return *this;
+ minVal = std::min(minVal, sample);
+ maxVal = std::max(maxVal, sample);
}
-void statistic::scale(double factor)
-{
- minVal = minVal*factor;
- maxVal = maxVal*factor;
- meanVal= meanVal*factor;
- m2 = m2*factor*factor;
- return;
-}
+statistic &statistic::operator+=(const statistic &other) {
+ if (sampleCount == 0) {
+ *this = other;
+ return *this;
+ }
-std::string statistic::format(char unit, bool total) const
-{
- std::string result = formatSI(sampleCount,9,' ');
-
- if (sampleCount == 0)
- {
- result = result + std::string(", ") + formatSI(0.0, 9, unit);
- result = result + std::string(", ") + formatSI(0.0, 9, unit);
- result = result + std::string(", ") + formatSI(0.0, 9, unit);
- if (total)
- result = result + std::string(", ") + formatSI(0.0, 9, unit);
- result = result + std::string(", ") + formatSI(0.0, 9, unit);
- }
- else
- {
- result = result + std::string(", ") + formatSI(minVal, 9, unit);
- result = result + std::string(", ") + formatSI(meanVal, 9, unit);
- result = result + std::string(", ") + formatSI(maxVal, 9, unit);
- if (total)
- result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit);
- result = result + std::string(", ") + formatSI(getSD(), 9, unit);
- }
- return result;
+ uint64_t newSampleCount = sampleCount + other.sampleCount;
+ double dnsc = double(newSampleCount);
+ double dsc = double(sampleCount);
+ double dscBydnsc = dsc / dnsc;
+ double dosc = double(other.sampleCount);
+ double delta = other.meanVal - meanVal;
+
+ // Try to order these calculations to avoid overflows. If this were Fortran,
+ // then the compiler would not be able to re-order over brackets. In C++ it
+ // may be legal to do that (we certainly hope it doesn't, and CC+ Programming
+ // Language 2nd edition suggests it shouldn't, since it says that exploitation
+ // of associativity can only be made if the operation really is associative
+ // (which floating addition isn't...)).
+ meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
+ m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
+ minVal = std::min(minVal, other.minVal);
+ maxVal = std::max(maxVal, other.maxVal);
+ sampleCount = newSampleCount;
+
+ return *this;
+}
+
+void statistic::scale(double factor) {
+ minVal = minVal * factor;
+ maxVal = maxVal * factor;
+ meanVal = meanVal * factor;
+ m2 = m2 * factor * factor;
+ return;
+}
+
+std::string statistic::format(char unit, bool total) const {
+ std::string result = formatSI(sampleCount, 9, ' ');
+
+ if (sampleCount == 0) {
+ result = result + std::string(", ") + formatSI(0.0, 9, unit);
+ result = result + std::string(", ") + formatSI(0.0, 9, unit);
+ result = result + std::string(", ") + formatSI(0.0, 9, unit);
+ if (total)
+ result = result + std::string(", ") + formatSI(0.0, 9, unit);
+ result = result + std::string(", ") + formatSI(0.0, 9, unit);
+ } else {
+ result = result + std::string(", ") + formatSI(minVal, 9, unit);
+ result = result + std::string(", ") + formatSI(meanVal, 9, unit);
+ result = result + std::string(", ") + formatSI(maxVal, 9, unit);
+ if (total)
+ result =
+ result + std::string(", ") + formatSI(meanVal * sampleCount, 9, unit);
+ result = result + std::string(", ") + formatSI(getSD(), 9, unit);
+ }
+ return result;
}
-/* ********************************************************** */
/* ************* explicitTimer member functions ************* */
void explicitTimer::start(timer_e timerEnumValue) {
- startTime = tsc_tick_count::now();
- totalPauseTime = 0;
- if(timeStat::logEvent(timerEnumValue)) {
- __kmp_stats_thread_ptr->incrementNestValue();
- }
- return;
+ startTime = tsc_tick_count::now();
+ totalPauseTime = 0;
+ if (timeStat::logEvent(timerEnumValue)) {
+ __kmp_stats_thread_ptr->incrementNestValue();
+ }
+ return;
}
-void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) {
- if (startTime.getValue() == 0)
- return;
-
- tsc_tick_count finishTime = tsc_tick_count::now();
-
- //stat->addSample ((tsc_tick_count::now() - startTime).ticks());
- stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
-
- if(timeStat::logEvent(timerEnumValue)) {
- if(!stats_ptr)
- stats_ptr = __kmp_stats_thread_ptr;
- stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
- stats_ptr->decrementNestValue();
- }
-
- /* We accept the risk that we drop a sample because it really did start at t==0. */
- startTime = 0;
+void explicitTimer::stop(timer_e timerEnumValue,
+ kmp_stats_list *stats_ptr /* = nullptr */) {
+ if (startTime.getValue() == 0)
return;
+
+ tsc_tick_count finishTime = tsc_tick_count::now();
+
+ // stat->addSample ((tsc_tick_count::now() - startTime).ticks());
+ stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
+
+ if (timeStat::logEvent(timerEnumValue)) {
+ if (!stats_ptr)
+ stats_ptr = __kmp_stats_thread_ptr;
+ stats_ptr->push_event(
+ startTime.getValue() - __kmp_stats_start_time.getValue(),
+ finishTime.getValue() - __kmp_stats_start_time.getValue(),
+ __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
+ stats_ptr->decrementNestValue();
+ }
+
+ /* We accept the risk that we drop a sample because it really did start at
+ t==0. */
+ startTime = 0;
+ return;
}
-/* ************************************************************** */
/* ************* partitionedTimers member functions ************* */
-partitionedTimers::partitionedTimers() {
- timer_stack.reserve(8);
-}
+partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
// add a timer to this collection of partitioned timers.
-void partitionedTimers::add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer) {
- KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST+1);
- timers[timer_index] = timer_pointer;
+void partitionedTimers::add_timer(explicit_timer_e timer_index,
+ explicitTimer *timer_pointer) {
+ KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST + 1);
+ timers[timer_index] = timer_pointer;
}
// initialize the paritioned timers to an initial timer
void partitionedTimers::init(timerPair init_timer_pair) {
- KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
- timer_stack.push_back(init_timer_pair);
- timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
+ KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
+ timer_stack.push_back(init_timer_pair);
+ timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
}
// stop/save the current timer, and start the new timer (timer_pair)
@@ -209,33 +199,33 @@ void partitionedTimers::init(timerPair i
// the one you are trying to push, then it only manipulates the stack,
// and it won't stop/start the currently running timer.
void partitionedTimers::push(timerPair timer_pair) {
- // get the current timer
- // stop current timer
- // push new timer
- // start the new timer
- KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
- timerPair current_timer = timer_stack.back();
- timer_stack.push_back(timer_pair);
- if(current_timer != timer_pair) {
- timers[current_timer.get_index()]->pause();
- timers[timer_pair.get_index()]->start(timer_pair.get_timer());
- }
+ // get the current timer
+ // stop current timer
+ // push new timer
+ // start the new timer
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
+ timerPair current_timer = timer_stack.back();
+ timer_stack.push_back(timer_pair);
+ if (current_timer != timer_pair) {
+ timers[current_timer.get_index()]->pause();
+ timers[timer_pair.get_index()]->start(timer_pair.get_timer());
+ }
}
// stop/discard the current timer, and start the previously saved timer
void partitionedTimers::pop() {
- // get the current timer
- // stop current timer
- // pop current timer
- // get the new current timer and start it back up
- KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
- timerPair current_timer = timer_stack.back();
- timer_stack.pop_back();
- timerPair new_timer = timer_stack.back();
- if(current_timer != new_timer) {
- timers[current_timer.get_index()]->stop(current_timer.get_timer());
- timers[new_timer.get_index()]->resume();
- }
+ // get the current timer
+ // stop current timer
+ // pop current timer
+ // get the new current timer and start it back up
+ KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
+ timerPair current_timer = timer_stack.back();
+ timer_stack.pop_back();
+ timerPair new_timer = timer_stack.back();
+ if (current_timer != new_timer) {
+ timers[current_timer.get_index()]->stop(current_timer.get_timer());
+ timers[new_timer.get_index()]->resume();
+ }
}
// Wind up all the currently running timers.
@@ -243,481 +233,483 @@ void partitionedTimers::pop() {
// After this is called, init() must be run again to initialize the
// stack of timers
void partitionedTimers::windup() {
- while(timer_stack.size() > 1) {
- this->pop();
- }
- if(timer_stack.size() > 0) {
- timerPair last_timer = timer_stack.back();
- timer_stack.pop_back();
- timers[last_timer.get_index()]->stop(last_timer.get_timer());
- }
+ while (timer_stack.size() > 1) {
+ this->pop();
+ }
+ if (timer_stack.size() > 0) {
+ timerPair last_timer = timer_stack.back();
+ timer_stack.pop_back();
+ timers[last_timer.get_index()]->stop(last_timer.get_timer());
+ }
}
-/* ******************************************************************* */
/* ************* kmp_stats_event_vector member functions ************* */
void kmp_stats_event_vector::deallocate() {
- __kmp_free(events);
- internal_size = 0;
- allocated_size = 0;
- events = NULL;
+ __kmp_free(events);
+ internal_size = 0;
+ allocated_size = 0;
+ events = NULL;
}
// This function is for qsort() which requires the compare function to return
-// either a negative number if event1 < event2, a positive number if event1 > event2
-// or zero if event1 == event2.
-// This sorts by start time (lowest to highest).
-int compare_two_events(const void* event1, const void* event2) {
- kmp_stats_event* ev1 = (kmp_stats_event*)event1;
- kmp_stats_event* ev2 = (kmp_stats_event*)event2;
-
- if(ev1->getStart() < ev2->getStart()) return -1;
- else if(ev1->getStart() > ev2->getStart()) return 1;
- else return 0;
+// either a negative number if event1 < event2, a positive number if event1 >
+// event2 or zero if event1 == event2. This sorts by start time (lowest to
+// highest).
+int compare_two_events(const void *event1, const void *event2) {
+ kmp_stats_event *ev1 = (kmp_stats_event *)event1;
+ kmp_stats_event *ev2 = (kmp_stats_event *)event2;
+
+ if (ev1->getStart() < ev2->getStart())
+ return -1;
+ else if (ev1->getStart() > ev2->getStart())
+ return 1;
+ else
+ return 0;
}
void kmp_stats_event_vector::sort() {
- qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
+ qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
}
-/* *********************************************************** */
/* ************* kmp_stats_list member functions ************* */
// returns a pointer to newly created stats node
-kmp_stats_list* kmp_stats_list::push_back(int gtid) {
- kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list));
- // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used)
- new (newnode) kmp_stats_list();
- newnode->setGtid(gtid);
- newnode->prev = this->prev;
- newnode->next = this;
- newnode->prev->next = newnode;
- newnode->next->prev = newnode;
- return newnode;
+kmp_stats_list *kmp_stats_list::push_back(int gtid) {
+ kmp_stats_list *newnode =
+ (kmp_stats_list *)__kmp_allocate(sizeof(kmp_stats_list));
+ // placement new, only requires space and pointer and initializes (so
+ // __kmp_allocate instead of C++ new[] is used)
+ new (newnode) kmp_stats_list();
+ newnode->setGtid(gtid);
+ newnode->prev = this->prev;
+ newnode->next = this;
+ newnode->prev->next = newnode;
+ newnode->next->prev = newnode;
+ return newnode;
}
void kmp_stats_list::deallocate() {
- kmp_stats_list* ptr = this->next;
- kmp_stats_list* delptr = this->next;
- while(ptr != this) {
- delptr = ptr;
- ptr=ptr->next;
- // placement new means we have to explicitly call destructor.
- delptr->_event_vector.deallocate();
- delptr->~kmp_stats_list();
- __kmp_free(delptr);
- }
+ kmp_stats_list *ptr = this->next;
+ kmp_stats_list *delptr = this->next;
+ while (ptr != this) {
+ delptr = ptr;
+ ptr = ptr->next;
+ // placement new means we have to explicitly call destructor.
+ delptr->_event_vector.deallocate();
+ delptr->~kmp_stats_list();
+ __kmp_free(delptr);
+ }
}
kmp_stats_list::iterator kmp_stats_list::begin() {
- kmp_stats_list::iterator it;
- it.ptr = this->next;
- return it;
+ kmp_stats_list::iterator it;
+ it.ptr = this->next;
+ return it;
}
kmp_stats_list::iterator kmp_stats_list::end() {
- kmp_stats_list::iterator it;
- it.ptr = this;
- return it;
+ kmp_stats_list::iterator it;
+ it.ptr = this;
+ return it;
}
int kmp_stats_list::size() {
- int retval;
- kmp_stats_list::iterator it;
- for(retval=0, it=begin(); it!=end(); it++, retval++) {}
- return retval;
+ int retval;
+ kmp_stats_list::iterator it;
+ for (retval = 0, it = begin(); it != end(); it++, retval++) {
+ }
+ return retval;
}
-/* ********************************************************************* */
/* ************* kmp_stats_list::iterator member functions ************* */
kmp_stats_list::iterator::iterator() : ptr(NULL) {}
kmp_stats_list::iterator::~iterator() {}
kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
- this->ptr = this->ptr->next;
- return *this;
+ this->ptr = this->ptr->next;
+ return *this;
}
kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
- this->ptr = this->ptr->next;
- return *this;
+ this->ptr = this->ptr->next;
+ return *this;
}
kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
- this->ptr = this->ptr->prev;
- return *this;
+ this->ptr = this->ptr->prev;
+ return *this;
}
kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
- this->ptr = this->ptr->prev;
- return *this;
+ this->ptr = this->ptr->prev;
+ return *this;
}
-bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) {
- return this->ptr!=rhs.ptr;
+bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator &rhs) {
+ return this->ptr != rhs.ptr;
}
-bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) {
- return this->ptr==rhs.ptr;
+bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator &rhs) {
+ return this->ptr == rhs.ptr;
}
-kmp_stats_list* kmp_stats_list::iterator::operator*() const {
- return this->ptr;
+kmp_stats_list *kmp_stats_list::iterator::operator*() const {
+ return this->ptr;
}
-/* *************************************************************** */
/* ************* kmp_stats_output_module functions ************** */
-const char* kmp_stats_output_module::eventsFileName = NULL;
-const char* kmp_stats_output_module::plotFileName = NULL;
-int kmp_stats_output_module::printPerThreadFlag = 0;
+const char *kmp_stats_output_module::eventsFileName = NULL;
+const char *kmp_stats_output_module::plotFileName = NULL;
+int kmp_stats_output_module::printPerThreadFlag = 0;
int kmp_stats_output_module::printPerThreadEventsFlag = 0;
-// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output
-void kmp_stats_output_module::init()
-{
- char * statsFileName = getenv("KMP_STATS_FILE");
- eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
- plotFileName = getenv("KMP_STATS_PLOT_FILE");
- char * threadStats = getenv("KMP_STATS_THREADS");
- char * threadEvents = getenv("KMP_STATS_EVENTS");
-
- // set the stats output filenames based on environment variables and defaults
- if(statsFileName) {
- // append the process id to the output filename
- // events.csv --> events-pid.csv
- size_t index;
- std::string baseFileName, pid, suffix;
- std::stringstream ss;
- outputFileName = std::string(statsFileName);
- index = outputFileName.find_last_of('.');
- if(index == std::string::npos) {
- baseFileName = outputFileName;
- } else {
- baseFileName = outputFileName.substr(0, index);
- suffix = outputFileName.substr(index);
- }
- ss << getpid();
- pid = ss.str();
- outputFileName = baseFileName + "-" + pid + suffix;
- }
- eventsFileName = eventsFileName ? eventsFileName : "events.dat";
- plotFileName = plotFileName ? plotFileName : "events.plt";
-
- // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes
- printPerThreadFlag = __kmp_str_match_true(threadStats);
- printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
-
- if(printPerThreadEventsFlag) {
- // assigns a color to each timer for printing
- setupEventColors();
+// init() is called very near the beginning of execution time in the constructor
+// of __kmp_stats_global_output
+void kmp_stats_output_module::init() {
+ char *statsFileName = getenv("KMP_STATS_FILE");
+ eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
+ plotFileName = getenv("KMP_STATS_PLOT_FILE");
+ char *threadStats = getenv("KMP_STATS_THREADS");
+ char *threadEvents = getenv("KMP_STATS_EVENTS");
+
+ // set the stats output filenames based on environment variables and defaults
+ if (statsFileName) {
+ // append the process id to the output filename
+ // events.csv --> events-pid.csv
+ size_t index;
+ std::string baseFileName, pid, suffix;
+ std::stringstream ss;
+ outputFileName = std::string(statsFileName);
+ index = outputFileName.find_last_of('.');
+ if (index == std::string::npos) {
+ baseFileName = outputFileName;
} else {
- // will clear flag so that no event will be logged
- timeStat::clearEventFlags();
+ baseFileName = outputFileName.substr(0, index);
+ suffix = outputFileName.substr(index);
}
+ ss << getpid();
+ pid = ss.str();
+ outputFileName = baseFileName + "-" + pid + suffix;
+ }
+ eventsFileName = eventsFileName ? eventsFileName : "events.dat";
+ plotFileName = plotFileName ? plotFileName : "events.plt";
+
+ // set the flags based on environment variables matching: true, on, 1, .true.
+ // , .t. , yes
+ printPerThreadFlag = __kmp_str_match_true(threadStats);
+ printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
+
+ if (printPerThreadEventsFlag) {
+ // assigns a color to each timer for printing
+ setupEventColors();
+ } else {
+ // will clear flag so that no event will be logged
+ timeStat::clearEventFlags();
+ }
- return;
+ return;
}
void kmp_stats_output_module::setupEventColors() {
- int i;
- int globalColorIndex = 0;
- int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
- for(i=0;i<TIMER_LAST;i++) {
- if(timeStat::logEvent((timer_e)i)) {
- timerColorInfo[i] = globalColorArray[globalColorIndex];
- globalColorIndex = (globalColorIndex+1)%numGlobalColors;
- }
- }
- return;
-}
-
-void kmp_stats_output_module::printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats)
-{
- fprintf (statsOut, "Timer, SampleCount, Min, Mean, Max, Total, SD\n");
- for (timer_e s = timer_e(0); s<TIMER_LAST; s = timer_e(s+1)) {
- statistic const * stat = &theStats[s];
- char tag = timeStat::noUnits(s) ? ' ' : 'T';
-
- fprintf (statsOut, "%-28s, %s\n", timeStat::name(s), stat->format(tag, true).c_str());
- }
- // Also print the Total_ versions of times.
- for (timer_e s = timer_e(0); s<TIMER_LAST; s = timer_e(s+1)) {
- char tag = timeStat::noUnits(s) ? ' ' : 'T';
- if (totalStats && !timeStat::noTotal(s))
- fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s), totalStats[s].format(tag, true).c_str());
- }
-}
-
-void kmp_stats_output_module::printCounterStats(FILE *statsOut, statistic const * theStats)
-{
- fprintf (statsOut, "Counter, ThreadCount, Min, Mean, Max, Total, SD\n");
- for (int s = 0; s<COUNTER_LAST; s++) {
- statistic const * stat = &theStats[s];
- fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str());
- }
-}
-
-void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters)
-{
- // We print all the counters even if they are zero.
- // That makes it easier to slice them into a spreadsheet if you need to.
- fprintf (statsOut, "\nCounter, Count\n");
- for (int c = 0; c<COUNTER_LAST; c++) {
- counter const * stat = &theCounters[c];
- fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str());
- }
-}
-
-void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) {
- // sort by start time before printing
- theEvents->sort();
- for (int i = 0; i < theEvents->size(); i++) {
- kmp_stats_event ev = theEvents->at(i);
- rgb_color color = getEventColor(ev.getTimerName());
- fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n",
- gtid,
- ev.getStart(),
- ev.getStop(),
- 1.2 - (ev.getNestLevel() * 0.2),
- color.r, color.g, color.b,
- timeStat::name(ev.getTimerName())
- );
- }
- return;
-}
-
-void kmp_stats_output_module::windupExplicitTimers()
-{
- // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads
- // and say "it's over".
- // If the timer wasn't running, this won't record anything anyway.
- kmp_stats_list::iterator it;
- for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
- kmp_stats_list* ptr = *it;
- ptr->getPartitionedTimers()->windup();
- for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
- ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
- }
+ int i;
+ int globalColorIndex = 0;
+ int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
+ for (i = 0; i < TIMER_LAST; i++) {
+ if (timeStat::logEvent((timer_e)i)) {
+ timerColorInfo[i] = globalColorArray[globalColorIndex];
+ globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
+ }
+ }
+ return;
+}
+
+void kmp_stats_output_module::printTimerStats(FILE *statsOut,
+ statistic const *theStats,
+ statistic const *totalStats) {
+ fprintf(statsOut, "Timer, SampleCount, Min, "
+ "Mean, Max, Total, SD\n");
+ for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
+ statistic const *stat = &theStats[s];
+ char tag = timeStat::noUnits(s) ? ' ' : 'T';
+
+ fprintf(statsOut, "%-28s, %s\n", timeStat::name(s),
+ stat->format(tag, true).c_str());
+ }
+ // Also print the Total_ versions of times.
+ for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
+ char tag = timeStat::noUnits(s) ? ' ' : 'T';
+ if (totalStats && !timeStat::noTotal(s))
+ fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s),
+ totalStats[s].format(tag, true).c_str());
+ }
+}
+
+void kmp_stats_output_module::printCounterStats(FILE *statsOut,
+ statistic const *theStats) {
+ fprintf(statsOut, "Counter, ThreadCount, Min, Mean, "
+ " Max, Total, SD\n");
+ for (int s = 0; s < COUNTER_LAST; s++) {
+ statistic const *stat = &theStats[s];
+ fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)),
+ stat->format(' ', true).c_str());
+ }
+}
+
+void kmp_stats_output_module::printCounters(FILE *statsOut,
+ counter const *theCounters) {
+ // We print all the counters even if they are zero.
+ // That makes it easier to slice them into a spreadsheet if you need to.
+ fprintf(statsOut, "\nCounter, Count\n");
+ for (int c = 0; c < COUNTER_LAST; c++) {
+ counter const *stat = &theCounters[c];
+ fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(c)),
+ formatSI(stat->getValue(), 9, ' ').c_str());
+ }
+}
+
+void kmp_stats_output_module::printEvents(FILE *eventsOut,
+ kmp_stats_event_vector *theEvents,
+ int gtid) {
+ // sort by start time before printing
+ theEvents->sort();
+ for (int i = 0; i < theEvents->size(); i++) {
+ kmp_stats_event ev = theEvents->at(i);
+ rgb_color color = getEventColor(ev.getTimerName());
+ fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
+ ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2),
+ color.r, color.g, color.b, timeStat::name(ev.getTimerName()));
+ }
+ return;
+}
+
+void kmp_stats_output_module::windupExplicitTimers() {
+ // Wind up any explicit timers. We assume that it's fair at this point to just
+ // walk all the explcit timers in all threads and say "it's over".
+ // If the timer wasn't running, this won't record anything anyway.
+ kmp_stats_list::iterator it;
+ for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
+ kmp_stats_list *ptr = *it;
+ ptr->getPartitionedTimers()->windup();
+ for (int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) {
+ ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
}
+ }
}
void kmp_stats_output_module::printPloticusFile() {
- int i;
- int size = __kmp_stats_list->size();
- FILE* plotOut = fopen(plotFileName, "w+");
-
- fprintf(plotOut, "#proc page\n"
- " pagesize: 15 10\n"
- " scale: 1.0\n\n");
-
- fprintf(plotOut, "#proc getdata\n"
- " file: %s\n\n",
- eventsFileName);
-
- fprintf(plotOut, "#proc areadef\n"
- " title: OpenMP Sampling Timeline\n"
- " titledetails: align=center size=16\n"
- " rectangle: 1 1 13 9\n"
- " xautorange: datafield=2,3\n"
- " yautorange: -1 %d\n\n",
- size);
-
- fprintf(plotOut, "#proc xaxis\n"
- " stubs: inc\n"
- " stubdetails: size=12\n"
- " label: Time (ticks)\n"
- " labeldetails: size=14\n\n");
-
- fprintf(plotOut, "#proc yaxis\n"
- " stubs: inc 1\n"
- " stubrange: 0 %d\n"
- " stubdetails: size=12\n"
- " label: Thread #\n"
- " labeldetails: size=14\n\n",
- size-1);
-
- fprintf(plotOut, "#proc bars\n"
- " exactcolorfield: 5\n"
- " axis: x\n"
- " locfield: 1\n"
- " segmentfields: 2 3\n"
- " barwidthfield: 4\n\n");
-
- // create legend entries corresponding to the timer color
- for(i=0;i<TIMER_LAST;i++) {
- if(timeStat::logEvent((timer_e)i)) {
- rgb_color c = getEventColor((timer_e)i);
- fprintf(plotOut, "#proc legendentry\n"
- " sampletype: color\n"
- " label: %s\n"
- " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
- timeStat::name((timer_e)i),
- c.r, c.g, c.b);
-
- }
- }
-
- fprintf(plotOut, "#proc legend\n"
- " format: down\n"
- " location: max max\n\n");
- fclose(plotOut);
- return;
-}
-
-/*
- * Print some useful information about
- * * the date and time this experiment ran.
- * * the machine on which it ran.
- * We output all of this as stylised comments, though we may decide to parse some of it.
- */
-void kmp_stats_output_module::printHeaderInfo(FILE * statsOut)
-{
- std::time_t now = std::time(0);
- char buffer[40];
- char hostName[80];
-
- std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now));
- fprintf (statsOut, "# Time of run: %s\n", &buffer[0]);
- if (gethostname(&hostName[0], sizeof(hostName)) == 0)
- fprintf (statsOut,"# Hostname: %s\n", &hostName[0]);
+ int i;
+ int size = __kmp_stats_list->size();
+ FILE *plotOut = fopen(plotFileName, "w+");
+
+ fprintf(plotOut, "#proc page\n"
+ " pagesize: 15 10\n"
+ " scale: 1.0\n\n");
+
+ fprintf(plotOut, "#proc getdata\n"
+ " file: %s\n\n",
+ eventsFileName);
+
+ fprintf(plotOut, "#proc areadef\n"
+ " title: OpenMP Sampling Timeline\n"
+ " titledetails: align=center size=16\n"
+ " rectangle: 1 1 13 9\n"
+ " xautorange: datafield=2,3\n"
+ " yautorange: -1 %d\n\n",
+ size);
+
+ fprintf(plotOut, "#proc xaxis\n"
+ " stubs: inc\n"
+ " stubdetails: size=12\n"
+ " label: Time (ticks)\n"
+ " labeldetails: size=14\n\n");
+
+ fprintf(plotOut, "#proc yaxis\n"
+ " stubs: inc 1\n"
+ " stubrange: 0 %d\n"
+ " stubdetails: size=12\n"
+ " label: Thread #\n"
+ " labeldetails: size=14\n\n",
+ size - 1);
+
+ fprintf(plotOut, "#proc bars\n"
+ " exactcolorfield: 5\n"
+ " axis: x\n"
+ " locfield: 1\n"
+ " segmentfields: 2 3\n"
+ " barwidthfield: 4\n\n");
+
+ // create legend entries corresponding to the timer color
+ for (i = 0; i < TIMER_LAST; i++) {
+ if (timeStat::logEvent((timer_e)i)) {
+ rgb_color c = getEventColor((timer_e)i);
+ fprintf(plotOut, "#proc legendentry\n"
+ " sampletype: color\n"
+ " label: %s\n"
+ " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
+ timeStat::name((timer_e)i), c.r, c.g, c.b);
+ }
+ }
+
+ fprintf(plotOut, "#proc legend\n"
+ " format: down\n"
+ " location: max max\n\n");
+ fclose(plotOut);
+ return;
+}
+
+/* Print some useful information about
+ * the date and time this experiment ran.
+ * the machine on which it ran.
+ We output all of this as stylised comments, though we may decide to parse
+ some of it. */
+void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
+ std::time_t now = std::time(0);
+ char buffer[40];
+ char hostName[80];
+
+ std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now));
+ fprintf(statsOut, "# Time of run: %s\n", &buffer[0]);
+ if (gethostname(&hostName[0], sizeof(hostName)) == 0)
+ fprintf(statsOut, "# Hostname: %s\n", &hostName[0]);
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- fprintf (statsOut, "# CPU: %s\n", &__kmp_cpuinfo.name[0]);
- fprintf (statsOut, "# Family: %d, Model: %d, Stepping: %d\n", __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
- if (__kmp_cpuinfo.frequency == 0)
- fprintf (statsOut, "# Nominal frequency: Unknown\n");
- else
- fprintf (statsOut, "# Nominal frequency: %sz\n", formatSI(double(__kmp_cpuinfo.frequency),9,'H').c_str());
+ fprintf(statsOut, "# CPU: %s\n", &__kmp_cpuinfo.name[0]);
+ fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n",
+ __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
+ if (__kmp_cpuinfo.frequency == 0)
+ fprintf(statsOut, "# Nominal frequency: Unknown\n");
+ else
+ fprintf(statsOut, "# Nominal frequency: %sz\n",
+ formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str());
#endif
}
-void kmp_stats_output_module::outputStats(const char* heading)
-{
- // Stop all the explicit timers in all threads
- // Do this before declaring the local statistics because thay have constructors so will take time to create.
- windupExplicitTimers();
-
- statistic allStats[TIMER_LAST];
- statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of normal timer stats */
- statistic allCounters[COUNTER_LAST];
-
- FILE * statsOut = !outputFileName.empty() ? fopen (outputFileName.c_str(), "a+") : stderr;
- if (!statsOut)
- statsOut = stderr;
-
- FILE * eventsOut;
- if (eventPrintingEnabled()) {
- eventsOut = fopen(eventsFileName, "w+");
- }
-
- printHeaderInfo (statsOut);
- fprintf(statsOut, "%s\n",heading);
- // Accumulate across threads.
- kmp_stats_list::iterator it;
- for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
- int t = (*it)->getGtid();
- // Output per thread stats if requested.
- if (printPerThreadFlag) {
- fprintf (statsOut, "Thread %d\n", t);
- printTimerStats (statsOut, (*it)->getTimers(), 0);
- printCounters (statsOut, (*it)->getCounters());
- fprintf (statsOut,"\n");
- }
- // Output per thread events if requested.
- if (eventPrintingEnabled()) {
- kmp_stats_event_vector events = (*it)->getEventVector();
- printEvents(eventsOut, &events, t);
- }
-
- // Accumulate timers.
- for (timer_e s = timer_e(0); s<TIMER_LAST; s = timer_e(s+1)) {
- // See if we should ignore this timer when aggregating
- if ((timeStat::masterOnly(s) && (t != 0)) || // Timer is only valid on the master and this thread is a worker
- (timeStat::workerOnly(s) && (t == 0)) // Timer is only valid on a worker and this thread is the master
- )
- {
- continue;
- }
-
- statistic * threadStat = (*it)->getTimer(s);
- allStats[s] += *threadStat;
-
- // Add Total stats for timers that are valid in more than one thread
- if (!timeStat::noTotal(s))
- totalStats[s].addSample(threadStat->getTotal());
- }
-
- // Accumulate counters.
- for (counter_e c = counter_e(0); c<COUNTER_LAST; c = counter_e(c+1)) {
- if (counter::masterOnly(c) && t != 0)
- continue;
- allCounters[c].addSample ((*it)->getCounter(c)->getValue());
- }
+void kmp_stats_output_module::outputStats(const char *heading) {
+ // Stop all the explicit timers in all threads
+ // Do this before declaring the local statistics because thay have
+ // constructors so will take time to create.
+ windupExplicitTimers();
+
+ statistic allStats[TIMER_LAST];
+ statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of
+ normal timer stats */
+ statistic allCounters[COUNTER_LAST];
+
+ FILE *statsOut =
+ !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr;
+ if (!statsOut)
+ statsOut = stderr;
+
+ FILE *eventsOut;
+ if (eventPrintingEnabled()) {
+ eventsOut = fopen(eventsFileName, "w+");
+ }
+
+ printHeaderInfo(statsOut);
+ fprintf(statsOut, "%s\n", heading);
+ // Accumulate across threads.
+ kmp_stats_list::iterator it;
+ for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
+ int t = (*it)->getGtid();
+ // Output per thread stats if requested.
+ if (printPerThreadFlag) {
+ fprintf(statsOut, "Thread %d\n", t);
+ printTimerStats(statsOut, (*it)->getTimers(), 0);
+ printCounters(statsOut, (*it)->getCounters());
+ fprintf(statsOut, "\n");
}
-
+ // Output per thread events if requested.
if (eventPrintingEnabled()) {
- printPloticusFile();
- fclose(eventsOut);
+ kmp_stats_event_vector events = (*it)->getEventVector();
+ printEvents(eventsOut, &events, t);
}
- fprintf (statsOut, "Aggregate for all threads\n");
- printTimerStats (statsOut, &allStats[0], &totalStats[0]);
- fprintf (statsOut, "\n");
- printCounterStats (statsOut, &allCounters[0]);
+ // Accumulate timers.
+ for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
+ // See if we should ignore this timer when aggregating
+ if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master
+ // and this thread is worker
+ (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker
+ // and this thread is the master
+ ) {
+ continue;
+ }
+
+ statistic *threadStat = (*it)->getTimer(s);
+ allStats[s] += *threadStat;
+
+ // Add Total stats for timers that are valid in more than one thread
+ if (!timeStat::noTotal(s))
+ totalStats[s].addSample(threadStat->getTotal());
+ }
+
+ // Accumulate counters.
+ for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
+ if (counter::masterOnly(c) && t != 0)
+ continue;
+ allCounters[c].addSample((*it)->getCounter(c)->getValue());
+ }
+ }
+
+ if (eventPrintingEnabled()) {
+ printPloticusFile();
+ fclose(eventsOut);
+ }
+
+ fprintf(statsOut, "Aggregate for all threads\n");
+ printTimerStats(statsOut, &allStats[0], &totalStats[0]);
+ fprintf(statsOut, "\n");
+ printCounterStats(statsOut, &allCounters[0]);
- if (statsOut != stderr)
- fclose(statsOut);
+ if (statsOut != stderr)
+ fclose(statsOut);
}
-/* ************************************************** */
/* ************* exported C functions ************** */
-// no name mangling for these functions, we want the c files to be able to get at these functions
+// no name mangling for these functions, we want the c files to be able to get
+// at these functions
extern "C" {
-void __kmp_reset_stats()
-{
- kmp_stats_list::iterator it;
- for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
- timeStat * timers = (*it)->getTimers();
- counter * counters = (*it)->getCounters();
- explicitTimer * eTimers = (*it)->getExplicitTimers();
-
- for (int t = 0; t<TIMER_LAST; t++)
- timers[t].reset();
+void __kmp_reset_stats() {
+ kmp_stats_list::iterator it;
+ for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
+ timeStat *timers = (*it)->getTimers();
+ counter *counters = (*it)->getCounters();
+ explicitTimer *eTimers = (*it)->getExplicitTimers();
- for (int c = 0; c<COUNTER_LAST; c++)
- counters[c].reset();
+ for (int t = 0; t < TIMER_LAST; t++)
+ timers[t].reset();
- for (int t=0; t<EXPLICIT_TIMER_LAST; t++)
- eTimers[t].reset();
+ for (int c = 0; c < COUNTER_LAST; c++)
+ counters[c].reset();
- // reset the event vector so all previous events are "erased"
- (*it)->resetEventVector();
- }
+ for (int t = 0; t < EXPLICIT_TIMER_LAST; t++)
+ eTimers[t].reset();
+
+ // reset the event vector so all previous events are "erased"
+ (*it)->resetEventVector();
+ }
}
-// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
-void __kmp_output_stats(const char * heading)
-{
- __kmp_stats_global_output->outputStats(heading);
- __kmp_reset_stats();
-}
-
-void __kmp_accumulate_stats_at_exit(void)
-{
- // Only do this once.
- if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
- return;
-
- __kmp_output_stats("Statistics on exit");
-}
-
-void __kmp_stats_init(void)
-{
- __kmp_init_tas_lock( & __kmp_stats_lock );
- __kmp_stats_start_time = tsc_tick_count::now();
- __kmp_stats_global_output = new kmp_stats_output_module();
- __kmp_stats_list = new kmp_stats_list();
-}
-
-void __kmp_stats_fini(void)
-{
- __kmp_accumulate_stats_at_exit();
- __kmp_stats_list->deallocate();
- delete __kmp_stats_global_output;
- delete __kmp_stats_list;
+// This function will reset all stats and stop all threads' explicit timers if
+// they haven't been stopped already.
+void __kmp_output_stats(const char *heading) {
+ __kmp_stats_global_output->outputStats(heading);
+ __kmp_reset_stats();
}
-} // extern "C"
+void __kmp_accumulate_stats_at_exit(void) {
+ // Only do this once.
+ if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
+ return;
+
+ __kmp_output_stats("Statistics on exit");
+}
+void __kmp_stats_init(void) {
+ __kmp_init_tas_lock(&__kmp_stats_lock);
+ __kmp_stats_start_time = tsc_tick_count::now();
+ __kmp_stats_global_output = new kmp_stats_output_module();
+ __kmp_stats_list = new kmp_stats_list();
+}
+
+void __kmp_stats_fini(void) {
+ __kmp_accumulate_stats_at_exit();
+ __kmp_stats_list->deallocate();
+ delete __kmp_stats_global_output;
+ delete __kmp_stats_list;
+}
+
+} // extern "C"
Modified: openmp/trunk/runtime/src/kmp_stats.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.h (original)
+++ openmp/trunk/runtime/src/kmp_stats.h Fri May 12 13:01:32 2017
@@ -15,28 +15,29 @@
//
//===----------------------------------------------------------------------===//
+
#include "kmp_config.h"
#if KMP_STATS_ENABLED
-/*
- * Statistics accumulator.
- * Accumulates number of samples and computes min, max, mean, standard deviation on the fly.
- *
- * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
+/* Statistics accumulator.
+ Accumulates number of samples and computes min, max, mean, standard deviation
+ on the fly.
+
+ Online variance calculation algorithm from
+ http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
*/
+#include "kmp_stats_timing.h"
#include <limits>
#include <math.h>
-#include <vector>
-#include <string>
-#include <stdint.h>
#include <new> // placement new
-#include "kmp_stats_timing.h"
+#include <stdint.h>
+#include <string>
+#include <vector>
-/*
- * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
- * are intended for the runtime library developer.
- */
+/* Enable developer statistics here if you want them. They are more detailed
+ than is useful for application characterisation and are intended for the
+ runtime library developer. */
// #define KMP_DEVELOPER_STATS 1
/*!
@@ -45,11 +46,13 @@
*
*/
enum stats_flags_e {
- noTotal = 1<<0, //!< do not show a TOTAL_aggregation for this statistic
- onlyInMaster = 1<<1, //!< statistic is valid only for master
- noUnits = 1<<2, //!< statistic doesn't need units printed next to it in output
- notInMaster = 1<<3, //!< statistic is valid only for non-master threads
- logEvent = 1<<4 //!< statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers)
+ noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic
+ onlyInMaster = 1 << 1, //!< statistic is valid only for master
+ noUnits =
+ 1 << 2, //!< statistic doesn't need units printed next to it in output
+ notInMaster = 1 << 3, //!< statistic is valid only for non-master threads
+ logEvent = 1 << 4 //!< statistic can be logged on the event timeline when
+ //! KMP_STATS_EVENTS is on (valid only for timers)
};
/*!
@@ -58,123 +61,143 @@ enum stats_flags_e {
*
*/
enum stats_state_e {
- IDLE,
- SERIAL_REGION,
- FORK_JOIN_BARRIER,
- PLAIN_BARRIER,
- TASKWAIT,
- TASKYIELD,
- TASKGROUP,
- IMPLICIT_TASK,
- EXPLICIT_TASK
+ IDLE,
+ SERIAL_REGION,
+ FORK_JOIN_BARRIER,
+ PLAIN_BARRIER,
+ TASKWAIT,
+ TASKYIELD,
+ TASKGROUP,
+ IMPLICIT_TASK,
+ EXPLICIT_TASK
};
/*!
* \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
*
- * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg)
+ * @param macro a user defined macro that takes three arguments -
+ * macro(COUNTER_NAME, flags, arg)
* @param arg a user defined argument to send to the user defined macro
*
- * \details A counter counts the occurrence of some event.
- * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
- * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
- * The min,mean,max are therefore the values for the threads.
- * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
- * All of the tables and printing is generated from this macro.
+ * \details A counter counts the occurrence of some event. Each thread
+ * accumulates its own count, at the end of execution the counts are aggregated
+ * treating each thread as a separate measurement. (Unless onlyInMaster is set,
+ * in which case there's only a single measurement). The min,mean,max are
+ * therefore the values for the threads. Adding the counter here and then
+ * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you
+ * need to do. All of the tables and printing is generated from this macro.
* Format is "macro(name, flags, arg)"
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_COUNTER(macro, arg) \
- macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
- macro (OMP_NESTED_PARALLEL, 0, arg) \
- macro (OMP_FOR_static, 0, arg) \
- macro (OMP_FOR_static_steal, 0, arg) \
- macro (OMP_FOR_dynamic, 0, arg) \
- macro (OMP_DISTRIBUTE, 0, arg) \
- macro (OMP_BARRIER, 0, arg) \
- macro (OMP_CRITICAL,0, arg) \
- macro (OMP_SINGLE, 0, arg) \
- macro (OMP_MASTER, 0, arg) \
- macro (OMP_TEAMS, 0, arg) \
- macro (OMP_set_lock, 0, arg) \
- macro (OMP_test_lock, 0, arg) \
- macro (REDUCE_wait, 0, arg) \
- macro (REDUCE_nowait, 0, arg) \
- macro (OMP_TASKYIELD, 0, arg) \
- macro (OMP_TASKLOOP, 0, arg) \
- macro (TASK_executed, 0, arg) \
- macro (TASK_cancelled, 0, arg) \
- macro (TASK_stolen, 0, arg)
+// clang-format off
+#define KMP_FOREACH_COUNTER(macro, arg) \
+ macro(OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, \
+ arg) macro(OMP_NESTED_PARALLEL, 0, arg) macro(OMP_FOR_static, 0, arg) \
+ macro(OMP_FOR_static_steal, 0, arg) macro(OMP_FOR_dynamic, 0, arg) \
+ macro(OMP_DISTRIBUTE, 0, arg) macro(OMP_BARRIER, 0, arg) \
+ macro(OMP_CRITICAL, 0, arg) macro(OMP_SINGLE, 0, arg) \
+ macro(OMP_MASTER, 0, arg) macro(OMP_TEAMS, 0, arg) \
+ macro(OMP_set_lock, 0, arg) macro(OMP_test_lock, 0, arg) \
+ macro(REDUCE_wait, 0, arg) \
+ macro(REDUCE_nowait, 0, arg) \
+ macro(OMP_TASKYIELD, 0, arg) \
+ macro(OMP_TASKLOOP, 0, arg) \
+ macro(TASK_executed, 0, arg) \
+ macro(TASK_cancelled, 0, arg) \
+ macro(TASK_stolen, 0, arg)
+// clang-format on
/*!
* \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
*
- * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg)
+ * @param macro a user defined macro that takes three arguments -
+ * macro(TIMER_NAME, flags, arg)
* @param arg a user defined argument to send to the user defined macro
*
- * \details A timer collects multiple samples of some count in each thread and then finally aggregates alll of the samples from all of the threads.
- * For most timers the printing code also provides an aggregation over the thread totals. These are printed as TOTAL_foo.
- * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
- * as well).
- * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
- * Format is "macro(name, flags, arg)"
+ * \details A timer collects multiple samples of some count in each thread and
+ * then finally aggregates alll of the samples from all of the threads. For most
+ * timers the printing code also provides an aggregation over the thread totals.
+ * These are printed as TOTAL_foo. The count is normally a time (in ticks),
+ * hence the name "timer". (But can be any value, so we use this for "number of
+ * arguments passed to fork" as well). For timers the threads are not
+ * significant, it's the individual observations that count, so the statistics
+ * are at that level. Format is "macro(name, flags, arg)"
*
* @ingroup STATS_GATHERING2
*/
-#define KMP_FOREACH_TIMER(macro, arg) \
- macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
- macro (FOR_static_scheduling, 0, arg) \
- macro (FOR_dynamic_scheduling, 0, arg) \
- macro (OMP_critical, 0, arg) \
- macro (OMP_critical_wait, 0, arg) \
- macro (OMP_single, 0, arg) \
- macro (OMP_master, 0, arg) \
- macro (OMP_idle, stats_flags_e::logEvent, arg) \
- macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
- macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
- macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
- macro (OMP_parallel, stats_flags_e::logEvent, arg) \
- macro (OMP_task_immediate, 0, arg) \
- macro (OMP_task_taskwait, 0, arg) \
- macro (OMP_task_taskyield, 0, arg) \
- macro (OMP_task_taskgroup, 0, arg) \
- macro (OMP_task_join_bar, 0, arg) \
- macro (OMP_task_plain_bar, 0, arg) \
- macro (OMP_serial, stats_flags_e::logEvent, arg) \
- macro (OMP_taskloop_scheduling, 0, arg) \
- macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_static_steal_stolen,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
- macro (FOR_static_steal_chunks,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+// clang-format off
+#define KMP_FOREACH_TIMER(macro, arg) \
+ macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
+ macro (FOR_static_scheduling, 0, arg) \
+ macro (FOR_dynamic_scheduling, 0, arg) \
+ macro (OMP_critical, 0, arg) \
+ macro (OMP_critical_wait, 0, arg) \
+ macro (OMP_single, 0, arg) \
+ macro (OMP_master, 0, arg) \
+ macro (OMP_idle, stats_flags_e::logEvent, arg) \
+ macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
+ macro (OMP_parallel, stats_flags_e::logEvent, arg) \
+ macro (OMP_task_immediate, 0, arg) \
+ macro (OMP_task_taskwait, 0, arg) \
+ macro (OMP_task_taskyield, 0, arg) \
+ macro (OMP_task_taskgroup, 0, arg) \
+ macro (OMP_task_join_bar, 0, arg) \
+ macro (OMP_task_plain_bar, 0, arg) \
+ macro (OMP_serial, stats_flags_e::logEvent, arg) \
+ macro (OMP_taskloop_scheduling, 0, arg) \
+ macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\
+ arg) \
+ macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
+ arg) \
+ macro (FOR_static_iterations, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+ macro (FOR_dynamic_iterations, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+ macro (FOR_static_steal_stolen, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+ macro (FOR_static_steal_chunks, \
+ stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+// clang-format on
-
-// OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit
+// OMP_start_end -- Time from when OpenMP is initialized until the
+// stats are printed at exit
// OMP_serial -- Thread zero time executing serial code
-// OMP_work -- Elapsed time in code dispatched by a fork (measured in the thread)
+// OMP_work -- Elapsed time in code dispatched by a fork (measured
+// in the thread)
// OMP_barrier -- Time at "real" barriers (includes task time)
// FOR_static_scheduling -- Time spent doing scheduling for a static "for"
// FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
-// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region
+// OMP_idle -- Worker threads time spent waiting for inclusion in
+// a parallel region
// OMP_plain_barrier -- Time spent in a barrier construct
-// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region
+// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a
+// parallel region
// OMP_parallel -- Time spent inside a parallel construct
// OMP_task_immediate -- Time spent executing non-deferred tasks
-// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct
-// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct
-// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct
+// OMP_task_taskwait -- Time spent executing tasks inside a taskwait
+// construct
+// OMP_task_taskyield -- Time spent executing tasks inside a taskyield
+// construct
+// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup
+// construct
// OMP_task_join_bar -- Time spent executing tasks inside a join barrier
-// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct
+// OMP_task_plain_bar -- Time spent executing tasks inside a barrier
+// construct
// OMP_single -- Time spent executing a "single" region
// OMP_master -- Time spent executing a "master" region
// OMP_set_numthreads -- Values passed to omp_set_num_threads
// OMP_PARALLEL_args -- Number of arguments passed to a parallel region
-// FOR_static_iterations -- Number of available parallel chunks of work in a static for
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
-// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
+// FOR_static_iterations -- Number of available parallel chunks of work in a
+// static for
+// FOR_dynamic_iterations -- Number of available parallel chunks of work in a
+// dynamic for
+// Both adjust for any chunking, so if there were an
+// iteration count of 20 but a chunk size of 10, we'd
+// record 2.
#if (KMP_DEVELOPER_STATS)
// Timers which are of interest to runtime library developers, not end users.
@@ -192,227 +215,239 @@ enum stats_state_e {
// KMP_tree_release -- time in __kmp_tree_barrier_release
// KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
// KMP_hyper_release -- time in __kmp_hyper_barrier_release
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
- macro (KMP_fork_call, 0, arg) \
- macro (KMP_join_call, 0, arg) \
- macro (KMP_end_split_barrier, 0, arg) \
- macro (KMP_hier_gather, 0, arg) \
- macro (KMP_hier_release, 0, arg) \
- macro (KMP_hyper_gather, 0, arg) \
- macro (KMP_hyper_release, 0, arg) \
- macro (KMP_linear_gather, 0, arg) \
- macro (KMP_linear_release, 0, arg) \
- macro (KMP_tree_gather, 0, arg) \
- macro (KMP_tree_release, 0, arg) \
- macro (USER_resume, 0, arg) \
- macro (USER_suspend, 0, arg) \
- macro (KMP_allocate_team, 0, arg) \
- macro (KMP_setup_icv_copy, 0, arg) \
- macro (USER_icv_copy, 0, arg)
+#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
+ macro(KMP_fork_call, 0, arg) macro(KMP_join_call, 0, arg) macro( \
+ KMP_end_split_barrier, 0, arg) macro(KMP_hier_gather, 0, arg) \
+ macro(KMP_hier_release, 0, arg) macro(KMP_hyper_gather, 0, arg) \
+ macro(KMP_hyper_release, 0, arg) macro(KMP_linear_gather, 0, arg) \
+ macro(KMP_linear_release, 0, arg) macro(KMP_tree_gather, 0, arg) \
+ macro(KMP_tree_release, 0, arg) macro(USER_resume, 0, arg) \
+ macro(USER_suspend, 0, arg) \
+ macro(KMP_allocate_team, 0, arg) \
+ macro(KMP_setup_icv_copy, 0, arg) \
+ macro(USER_icv_copy, 0, arg)
#else
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
#endif
/*!
* \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
*
- * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg)
+ * @param macro a user defined macro that takes three arguments -
+ * macro(TIMER_NAME, flags, arg)
* @param arg a user defined argument to send to the user defined macro
*
- * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN!
+ * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE
+ * BAD THINGS WILL HAPPEN!
*
- * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics).
- * We allocate these on a per-thread basis, and explicitly start and stop them.
- * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't
- * need to be defined here.
- * The name here should be the same as that of a timer above.
+ * \details Explicit timers are ones where we need to allocate a timer itself
+ * (as well as the accumulated timing statistics). We allocate these on a
+ * per-thread basis, and explicitly start and stop them. Block timers just
+ * allocate the timer itself on the stack, and use the destructor to notice
+ * block exit; they don't need to be defined here. The name here should be the
+ * same as that of a timer above.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
- KMP_FOREACH_TIMER(macro, arg)
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
-#define ENUMERATE(name,ignore,prefix) prefix##name,
-enum timer_e {
- KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
- TIMER_LAST
-};
+#define ENUMERATE(name, ignore, prefix) prefix##name,
+enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
enum explicit_timer_e {
- KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
- EXPLICIT_TIMER_LAST
+ KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
};
-enum counter_e {
- KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
- COUNTER_LAST
-};
+enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
#undef ENUMERATE
class timerPair {
- explicit_timer_e timer_index;
- timer_e timer;
- public:
- timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
- inline explicit_timer_e get_index() const { return timer_index; }
- inline timer_e get_timer() const { return timer; }
- bool operator==(const timerPair & rhs) {
- return this->get_index() == rhs.get_index();
- }
- bool operator!=(const timerPair & rhs) {
- return !(*this == rhs);
- }
-};
+ explicit_timer_e timer_index;
+ timer_e timer;
-class statistic
-{
- double minVal;
- double maxVal;
- double meanVal;
- double m2;
- uint64_t sampleCount;
-
- public:
- statistic() { reset(); }
- statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
-
- double getMin() const { return minVal; }
- double getMean() const { return meanVal; }
- double getMax() const { return maxVal; }
- uint64_t getCount() const { return sampleCount; }
- double getSD() const { return sqrt(m2/sampleCount); }
- double getTotal() const { return sampleCount*meanVal; }
-
- void reset()
- {
- minVal = std::numeric_limits<double>::max();
- maxVal = -std::numeric_limits<double>::max();
- meanVal= 0.0;
- m2 = 0.0;
- sampleCount = 0;
- }
- void addSample(double sample);
- void scale (double factor);
- void scaleDown(double f) { scale (1./f); }
- statistic & operator+= (statistic const & other);
-
- std::string format(char unit, bool total=false) const;
-};
-
-struct statInfo
-{
- const char * name;
- uint32_t flags;
-};
-
-class timeStat : public statistic
-{
- static statInfo timerInfo[];
-
- public:
- timeStat() : statistic() {}
- static const char * name(timer_e e) { return timerInfo[e].name; }
- static bool noTotal (timer_e e) { return timerInfo[e].flags & stats_flags_e::noTotal; }
- static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; }
- static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; }
- static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; }
- static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; }
- static void clearEventFlags() {
- for(int i=0;i<TIMER_LAST;i++) {
- timerInfo[i].flags &= (~(stats_flags_e::logEvent));
- }
+public:
+ timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
+ inline explicit_timer_e get_index() const { return timer_index; }
+ inline timer_e get_timer() const { return timer; }
+ bool operator==(const timerPair &rhs) {
+ return this->get_index() == rhs.get_index();
+ }
+ bool operator!=(const timerPair &rhs) { return !(*this == rhs); }
+};
+
+class statistic {
+ double minVal;
+ double maxVal;
+ double meanVal;
+ double m2;
+ uint64_t sampleCount;
+
+public:
+ statistic() { reset(); }
+ statistic(statistic const &o)
+ : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
+ sampleCount(o.sampleCount) {}
+
+ double getMin() const { return minVal; }
+ double getMean() const { return meanVal; }
+ double getMax() const { return maxVal; }
+ uint64_t getCount() const { return sampleCount; }
+ double getSD() const { return sqrt(m2 / sampleCount); }
+ double getTotal() const { return sampleCount * meanVal; }
+
+ void reset() {
+ minVal = std::numeric_limits<double>::max();
+ maxVal = -std::numeric_limits<double>::max();
+ meanVal = 0.0;
+ m2 = 0.0;
+ sampleCount = 0;
+ }
+ void addSample(double sample);
+ void scale(double factor);
+ void scaleDown(double f) { scale(1. / f); }
+ statistic &operator+=(statistic const &other);
+
+ std::string format(char unit, bool total = false) const;
+};
+
+struct statInfo {
+ const char *name;
+ uint32_t flags;
+};
+
+class timeStat : public statistic {
+ static statInfo timerInfo[];
+
+public:
+ timeStat() : statistic() {}
+ static const char *name(timer_e e) { return timerInfo[e].name; }
+ static bool noTotal(timer_e e) {
+ return timerInfo[e].flags & stats_flags_e::noTotal;
+ }
+ static bool masterOnly(timer_e e) {
+ return timerInfo[e].flags & stats_flags_e::onlyInMaster;
+ }
+ static bool workerOnly(timer_e e) {
+ return timerInfo[e].flags & stats_flags_e::notInMaster;
+ }
+ static bool noUnits(timer_e e) {
+ return timerInfo[e].flags & stats_flags_e::noUnits;
+ }
+ static bool logEvent(timer_e e) {
+ return timerInfo[e].flags & stats_flags_e::logEvent;
+ }
+ static void clearEventFlags() {
+ for (int i = 0; i < TIMER_LAST; i++) {
+ timerInfo[i].flags &= (~(stats_flags_e::logEvent));
}
+ }
};
// Where we need explicitly to start and end the timer, this version can be used
-// Since these timers normally aren't nicely scoped, so don't have a good place to live
-// on the stack of the thread, they're more work to use.
-class explicitTimer
-{
- timeStat * stat;
- tsc_tick_count startTime;
- tsc_tick_count pauseStartTime;
- tsc_tick_count::tsc_interval_t totalPauseTime;
-
- public:
- explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
- explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
-
- void setStat (timeStat *s) { stat = s; }
- void start(timer_e timerEnumValue);
- void pause() { pauseStartTime = tsc_tick_count::now(); }
- void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
- void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr);
- void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
+// Since these timers normally aren't nicely scoped, so don't have a good place
+// to live on the stack of the thread, they're more work to use.
+class explicitTimer {
+ timeStat *stat;
+ tsc_tick_count startTime;
+ tsc_tick_count pauseStartTime;
+ tsc_tick_count::tsc_interval_t totalPauseTime;
+
+public:
+ explicitTimer()
+ : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
+ explicitTimer(timeStat *s)
+ : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
+
+ void setStat(timeStat *s) { stat = s; }
+ void start(timer_e timerEnumValue);
+ void pause() { pauseStartTime = tsc_tick_count::now(); }
+ void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
+ void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr = nullptr);
+ void reset() {
+ startTime = 0;
+ pauseStartTime = 0;
+ totalPauseTime = 0;
+ }
};
// Where all you need is to time a block, this is enough.
// (It avoids the need to have an explicit end, leaving the scope suffices.)
-class blockTimer : public explicitTimer
-{
- timer_e timerEnumValue;
- public:
- blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
- ~blockTimer() { stop(timerEnumValue); }
+class blockTimer : public explicitTimer {
+ timer_e timerEnumValue;
+
+public:
+ blockTimer(timeStat *s, timer_e newTimerEnumValue)
+ : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
+ start(timerEnumValue);
+ }
+ ~blockTimer() { stop(timerEnumValue); }
};
// Where you need to partition a threads clock ticks into separate states
// e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
-// DOING_NOTHING would render these conditions:
-// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
-// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
-class partitionedTimers
-{
- private:
- explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
- std::vector<timerPair> timer_stack;
- public:
- partitionedTimers();
- void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
- void init(timerPair timer_index);
- void push(timerPair timer_index);
- void pop();
- void windup();
+// DOING_NOTHING would render these conditions:
+// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
+// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
+// versa
+class partitionedTimers {
+private:
+ explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
+ std::vector<timerPair> timer_stack;
+
+public:
+ partitionedTimers();
+ void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
+ void init(timerPair timer_index);
+ void push(timerPair timer_index);
+ void pop();
+ void windup();
};
// Special wrapper around the partioned timers to aid timing code blocks
// It avoids the need to have an explicit end, leaving the scope suffices.
-class blockPartitionedTimer
-{
- partitionedTimers* part_timers;
- timerPair timer_pair;
- public:
- blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
- ~blockPartitionedTimer() { part_timers->pop(); }
-};
-
-// Special wrapper around the thread state to aid in keeping state in code blocks
-// It avoids the need to have an explicit end, leaving the scope suffices.
-class blockThreadState
-{
- stats_state_e* state_pointer;
- stats_state_e old_state;
- public:
- blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
- *state_pointer = new_state;
- }
- ~blockThreadState() { *state_pointer = old_state; }
+class blockPartitionedTimer {
+ partitionedTimers *part_timers;
+ timerPair timer_pair;
+
+public:
+ blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
+ : part_timers(pt), timer_pair(tp) {
+ part_timers->push(timer_pair);
+ }
+ ~blockPartitionedTimer() { part_timers->pop(); }
+};
+
+// Special wrapper around the thread state to aid in keeping state in code
+// blocks It avoids the need to have an explicit end, leaving the scope
+// suffices.
+class blockThreadState {
+ stats_state_e *state_pointer;
+ stats_state_e old_state;
+
+public:
+ blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
+ : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
+ *state_pointer = new_state;
+ }
+ ~blockThreadState() { *state_pointer = old_state; }
};
// If all you want is a count, then you can use this...
-// The individual per-thread counts will be aggregated into a statistic at program exit.
-class counter
-{
- uint64_t value;
- static const statInfo counterInfo[];
-
- public:
- counter() : value(0) {}
- void increment() { value++; }
- uint64_t getValue() const { return value; }
- void reset() { value = 0; }
- static const char * name(counter_e e) { return counterInfo[e].name; }
- static bool masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; }
+// The individual per-thread counts will be aggregated into a statistic at
+// program exit.
+class counter {
+ uint64_t value;
+ static const statInfo counterInfo[];
+
+public:
+ counter() : value(0) {}
+ void increment() { value++; }
+ uint64_t getValue() const { return value; }
+ void reset() { value = 0; }
+ static const char *name(counter_e e) { return counterInfo[e].name; }
+ static bool masterOnly(counter_e e) {
+ return counterInfo[e].flags & stats_flags_e::onlyInMaster;
+ }
};
/* ****************************************************************
@@ -449,17 +484,20 @@ Begin ----------------------------------
**************************************************************** */
class kmp_stats_event {
- uint64_t start;
- uint64_t stop;
- int nest_level;
- timer_e timer_name;
- public:
- kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
- kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
- inline uint64_t getStart() const { return start; }
- inline uint64_t getStop() const { return stop; }
- inline int getNestLevel() const { return nest_level; }
- inline timer_e getTimerName() const { return timer_name; }
+ uint64_t start;
+ uint64_t stop;
+ int nest_level;
+ timer_e timer_name;
+
+public:
+ kmp_stats_event()
+ : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
+ kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
+ : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
+ inline uint64_t getStart() const { return start; }
+ inline uint64_t getStop() const { return stop; }
+ inline int getNestLevel() const { return nest_level; }
+ inline timer_e getTimerName() const { return timer_name; }
};
/* ****************************************************************
@@ -479,48 +517,54 @@ class kmp_stats_event {
to avoid reallocations, then set INIT_SIZE to a large value.
the interface to this class is through six operations:
- 1) reset() -- sets the internal_size back to 0 but does not deallocate any memory
+ 1) reset() -- sets the internal_size back to 0 but does not deallocate any
+ memory
2) size() -- returns the number of valid elements in the vector
3) push_back(start, stop, nest, timer_name) -- pushes an event onto
- the back of the array
+ the back of the array
4) deallocate() -- frees all memory associated with the vector
5) sort() -- sorts the vector by start time
6) operator[index] or at(index) -- returns event reference at that index
-
**************************************************************** */
class kmp_stats_event_vector {
- kmp_stats_event* events;
- int internal_size;
- int allocated_size;
- static const int INIT_SIZE = 1024;
- public:
- kmp_stats_event_vector() {
- events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE);
- internal_size = 0;
- allocated_size = INIT_SIZE;
- }
- ~kmp_stats_event_vector() {}
- inline void reset() { internal_size = 0; }
- inline int size() const { return internal_size; }
- void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
- int i;
- if(internal_size == allocated_size) {
- kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2);
- for(i=0;i<internal_size;i++) tmp[i] = events[i];
- __kmp_free(events);
- events = tmp;
- allocated_size*=2;
- }
- events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
- internal_size++;
- return;
+ kmp_stats_event *events;
+ int internal_size;
+ int allocated_size;
+ static const int INIT_SIZE = 1024;
+
+public:
+ kmp_stats_event_vector() {
+ events =
+ (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
+ internal_size = 0;
+ allocated_size = INIT_SIZE;
+ }
+ ~kmp_stats_event_vector() {}
+ inline void reset() { internal_size = 0; }
+ inline int size() const { return internal_size; }
+ void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
+ timer_e name) {
+ int i;
+ if (internal_size == allocated_size) {
+ kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
+ sizeof(kmp_stats_event) * allocated_size * 2);
+ for (i = 0; i < internal_size; i++)
+ tmp[i] = events[i];
+ __kmp_free(events);
+ events = tmp;
+ allocated_size *= 2;
}
- void deallocate();
- void sort();
- const kmp_stats_event & operator[](int index) const { return events[index]; }
- kmp_stats_event & operator[](int index) { return events[index]; }
- const kmp_stats_event & at(int index) const { return events[index]; }
- kmp_stats_event & at(int index) { return events[index]; }
+ events[internal_size] =
+ kmp_stats_event(start_time, stop_time, nest_level, name);
+ internal_size++;
+ return;
+ }
+ void deallocate();
+ void sort();
+ const kmp_stats_event &operator[](int index) const { return events[index]; }
+ kmp_stats_event &operator[](int index) { return events[index]; }
+ const kmp_stats_event &at(int index) const { return events[index]; }
+ kmp_stats_event &at(int index) { return events[index]; }
};
/* ****************************************************************
@@ -536,13 +580,12 @@ class kmp_stats_event_vector {
The first node corresponds to thread 0's statistics.
The second node corresponds to thread 1's statistics and so on...
- Each node has a _timers, _counters, and _explicitTimers array to
- hold that thread's statistics. The _explicitTimers
- point to the correct _timer and update its statistics at every stop() call.
- The explicitTimers' pointers are set up in the constructor.
- Each node also has an event vector to hold that thread's timing events.
- The event vector expands as necessary and records the start-stop times
- for each timer.
+ Each node has a _timers, _counters, and _explicitTimers array to hold that
+ thread's statistics. The _explicitTimers point to the correct _timer and
+ update its statistics at every stop() call. The explicitTimers' pointers are
+ set up in the constructor. Each node also has an event vector to hold that
+ thread's timing events. The event vector expands as necessary and records
+ the start-stop times for each timer.
The nestLevel variable is for plotting events and is related
to the bar width in the timeline graph.
@@ -550,138 +593,148 @@ class kmp_stats_event_vector {
Every thread will have a __thread local pointer to its node in
the list. The sentinel node is used by the master thread to
store "dummy" statistics before __kmp_create_worker() is called.
-
**************************************************************** */
class kmp_stats_list {
- int gtid;
- timeStat _timers[TIMER_LAST+1];
- counter _counters[COUNTER_LAST+1];
- explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
- partitionedTimers _partitionedTimers;
- int _nestLevel; // one per thread
- kmp_stats_event_vector _event_vector;
- kmp_stats_list* next;
- kmp_stats_list* prev;
- stats_state_e state;
- int thread_is_idle_flag;
- public:
- kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
- state(IDLE), thread_is_idle_flag(0) {
-#define doInit(name,ignore1,ignore2) \
- getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
- _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
- KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
+ int gtid;
+ timeStat _timers[TIMER_LAST + 1];
+ counter _counters[COUNTER_LAST + 1];
+ explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
+ partitionedTimers _partitionedTimers;
+ int _nestLevel; // one per thread
+ kmp_stats_event_vector _event_vector;
+ kmp_stats_list *next;
+ kmp_stats_list *prev;
+ stats_state_e state;
+ int thread_is_idle_flag;
+
+public:
+ kmp_stats_list()
+ : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
+ thread_is_idle_flag(0) {
+#define doInit(name, ignore1, ignore2) \
+ getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
+ _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \
+ getExplicitTimer(EXPLICIT_TIMER_##name));
+ KMP_FOREACH_EXPLICIT_TIMER(doInit, 0);
#undef doInit
- }
- ~kmp_stats_list() { }
- inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; }
- inline counter * getCounter(counter_e idx) { return &_counters[idx]; }
- inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
- inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; }
- inline timeStat * getTimers() { return _timers; }
- inline counter * getCounters() { return _counters; }
- inline explicitTimer * getExplicitTimers() { return _explicitTimers; }
- inline kmp_stats_event_vector & getEventVector() { return _event_vector; }
- inline void resetEventVector() { _event_vector.reset(); }
- inline void incrementNestValue() { _nestLevel++; }
- inline int getNestValue() { return _nestLevel; }
- inline void decrementNestValue() { _nestLevel--; }
- inline int getGtid() const { return gtid; }
- inline void setGtid(int newgtid) { gtid = newgtid; }
- inline void setState(stats_state_e newstate) { state = newstate; }
- inline stats_state_e getState() const { return state; }
- inline stats_state_e * getStatePointer() { return &state; }
- inline bool isIdle() { return thread_is_idle_flag==1; }
- inline void setIdleFlag() { thread_is_idle_flag = 1; }
- inline void resetIdleFlag() { thread_is_idle_flag = 0; }
- kmp_stats_list* push_back(int gtid); // returns newly created list node
- inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
- _event_vector.push_back(start_time, stop_time, nest_level, name);
- }
- void deallocate();
- class iterator;
- kmp_stats_list::iterator begin();
- kmp_stats_list::iterator end();
- int size();
- class iterator {
- kmp_stats_list* ptr;
- friend kmp_stats_list::iterator kmp_stats_list::begin();
- friend kmp_stats_list::iterator kmp_stats_list::end();
- public:
- iterator();
- ~iterator();
- iterator operator++();
- iterator operator++(int dummy);
- iterator operator--();
- iterator operator--(int dummy);
- bool operator!=(const iterator & rhs);
- bool operator==(const iterator & rhs);
- kmp_stats_list* operator*() const; // dereference operator
- };
+ }
+ ~kmp_stats_list() {}
+ inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
+ inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
+ inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
+ return &_explicitTimers[idx];
+ }
+ inline partitionedTimers *getPartitionedTimers() {
+ return &_partitionedTimers;
+ }
+ inline timeStat *getTimers() { return _timers; }
+ inline counter *getCounters() { return _counters; }
+ inline explicitTimer *getExplicitTimers() { return _explicitTimers; }
+ inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
+ inline void resetEventVector() { _event_vector.reset(); }
+ inline void incrementNestValue() { _nestLevel++; }
+ inline int getNestValue() { return _nestLevel; }
+ inline void decrementNestValue() { _nestLevel--; }
+ inline int getGtid() const { return gtid; }
+ inline void setGtid(int newgtid) { gtid = newgtid; }
+ inline void setState(stats_state_e newstate) { state = newstate; }
+ inline stats_state_e getState() const { return state; }
+ inline stats_state_e *getStatePointer() { return &state; }
+ inline bool isIdle() { return thread_is_idle_flag == 1; }
+ inline void setIdleFlag() { thread_is_idle_flag = 1; }
+ inline void resetIdleFlag() { thread_is_idle_flag = 0; }
+ kmp_stats_list *push_back(int gtid); // returns newly created list node
+ inline void push_event(uint64_t start_time, uint64_t stop_time,
+ int nest_level, timer_e name) {
+ _event_vector.push_back(start_time, stop_time, nest_level, name);
+ }
+ void deallocate();
+ class iterator;
+ kmp_stats_list::iterator begin();
+ kmp_stats_list::iterator end();
+ int size();
+ class iterator {
+ kmp_stats_list *ptr;
+ friend kmp_stats_list::iterator kmp_stats_list::begin();
+ friend kmp_stats_list::iterator kmp_stats_list::end();
+
+ public:
+ iterator();
+ ~iterator();
+ iterator operator++();
+ iterator operator++(int dummy);
+ iterator operator--();
+ iterator operator--(int dummy);
+ bool operator!=(const iterator &rhs);
+ bool operator==(const iterator &rhs);
+ kmp_stats_list *operator*() const; // dereference operator
+ };
};
/* ****************************************************************
Class to encapsulate all output functions and the environment variables
- This module holds filenames for various outputs (normal stats, events, plot file),
- as well as coloring information for the plot file.
+ This module holds filenames for various outputs (normal stats, events, plot
+ file), as well as coloring information for the plot file.
The filenames and flags variables are read from environment variables.
- These are read once by the constructor of the global variable __kmp_stats_output
- which calls init().
+ These are read once by the constructor of the global variable
+ __kmp_stats_output which calls init().
- During this init() call, event flags for the timeStat::timerInfo[] global array
- are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
+ During this init() call, event flags for the timeStat::timerInfo[] global
+ array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
- The only interface function that is public is outputStats(heading). This function
- should print out everything it needs to, either to files or stderr,
+ The only interface function that is public is outputStats(heading). This
+ function should print out everything it needs to, either to files or stderr,
depending on the environment variables described below
ENVIRONMENT VARIABLES:
- KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file,
- otherwise, print to stderr
- KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either
- KMP_STATS_FILE or stderr
+ KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
+ file, otherwise, print to stderr
+ KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
+ either KMP_STATS_FILE or stderr
KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
otherwise, the plot file is sent to "events.plt"
- KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events
+ KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
+ events
KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
otherwise, output is sent to "events.dat"
-
**************************************************************** */
class kmp_stats_output_module {
- public:
- struct rgb_color {
- float r;
- float g;
- float b;
- };
-
- private:
- std::string outputFileName;
- static const char* eventsFileName;
- static const char* plotFileName;
- static int printPerThreadFlag;
- static int printPerThreadEventsFlag;
- static const rgb_color globalColorArray[];
- static rgb_color timerColorInfo[];
-
- void init();
- static void setupEventColors();
- static void printPloticusFile();
- static void printHeaderInfo(FILE *statsOut);
- static void printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats);
- static void printCounterStats(FILE *statsOut, statistic const * theStats);
- static void printCounters(FILE * statsOut, counter const * theCounters);
- static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid);
- static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
- static void windupExplicitTimers();
- bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
-
- public:
- kmp_stats_output_module() { init(); }
- void outputStats(const char* heading);
+public:
+ struct rgb_color {
+ float r;
+ float g;
+ float b;
+ };
+
+private:
+ std::string outputFileName;
+ static const char *eventsFileName;
+ static const char *plotFileName;
+ static int printPerThreadFlag;
+ static int printPerThreadEventsFlag;
+ static const rgb_color globalColorArray[];
+ static rgb_color timerColorInfo[];
+
+ void init();
+ static void setupEventColors();
+ static void printPloticusFile();
+ static void printHeaderInfo(FILE *statsOut);
+ static void printTimerStats(FILE *statsOut, statistic const *theStats,
+ statistic const *totalStats);
+ static void printCounterStats(FILE *statsOut, statistic const *theStats);
+ static void printCounters(FILE *statsOut, counter const *theCounters);
+ static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
+ int gtid);
+ static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
+ static void windupExplicitTimers();
+ bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
+
+public:
+ kmp_stats_output_module() { init(); }
+ void outputStats(const char *heading);
};
#ifdef __cplusplus
@@ -693,11 +746,11 @@ void __kmp_reset_stats();
void __kmp_output_stats(const char *);
void __kmp_accumulate_stats_at_exit(void);
// thread local pointer to stats node within list
-extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
+extern __thread kmp_stats_list *__kmp_stats_thread_ptr;
// head to stats list.
-extern kmp_stats_list* __kmp_stats_list;
+extern kmp_stats_list *__kmp_stats_list;
// lock for __kmp_stats_list
-extern kmp_tas_lock_t __kmp_stats_lock;
+extern kmp_tas_lock_t __kmp_stats_lock;
// reference start time
extern tsc_tick_count __kmp_stats_start_time;
// interface to output
@@ -709,21 +762,21 @@ extern kmp_stats_output_module __kmp_sta
// Simple, standard interfaces that drop out completely if stats aren't enabled
-
/*!
* \brief Uses specified timer (name) to time code block.
*
* @param name timer name as specified under the KMP_FOREACH_TIMER() macro
*
- * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will record the time taken in the block
- * and use the destructor to stop the timer. Convenient!
- * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block.
- * I don't think that's a problem.
+ * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will
+ * record the time taken in the block and use the destructor to stop the timer.
+ * Convenient! With this definition you can't have more than one KMP_TIME_BLOCK
+ * in the same code block. I don't think that's a problem.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_TIME_BLOCK(name) \
- blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)
+#define KMP_TIME_BLOCK(name) \
+ blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
+ TIMER_##name)
/*!
* \brief Adds value to specified timer (name).
@@ -731,69 +784,83 @@ extern kmp_stats_output_module __kmp_sta
* @param name timer name as specified under the KMP_FOREACH_TIMER() macro
* @param value double precision sample value to add to statistics for the timer
*
- * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics.
+ * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to
+ * a timer statistics.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_COUNT_VALUE(name, value) \
- __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
+#define KMP_COUNT_VALUE(name, value) \
+ __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
/*!
* \brief Increments specified counter (name).
*
* @param name counter name as specified under the KMP_FOREACH_COUNTER() macro
*
- * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread.
+ * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics
+ * counter for the executing thread.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_COUNT_BLOCK(name) \
- __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
+#define KMP_COUNT_BLOCK(name) \
+ __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
/*!
- * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro.
+ * \brief "Starts" an explicit timer which will need a corresponding
+ * KMP_STOP_EXPLICIT_TIMER() macro.
*
- * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro
+ * @param name explicit timer name as specified under the
+ * KMP_FOREACH_EXPLICIT_TIMER() macro
*
- * \details Use to start a timer. This will need a corresponding KMP_STOP_EXPLICIT_TIMER()
- * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end
- * of the code block. All explicit timers are stopped at library exit time before the final statistics are outputted.
+ * \details Use to start a timer. This will need a corresponding
+ * KMP_STOP_EXPLICIT_TIMER() macro to stop the timer unlike the
+ * KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end
+ * of the code block. All explicit timers are stopped at library exit time
+ * before the final statistics are outputted.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_START_EXPLICIT_TIMER(name) \
- __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name)
+#define KMP_START_EXPLICIT_TIMER(name) \
+ __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
+ ->start(TIMER_##name)
/*!
* \brief "Stops" an explicit timer.
*
- * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro
+ * @param name explicit timer name as specified under the
+ * KMP_FOREACH_EXPLICIT_TIMER() macro
*
- * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name)
- * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The timer will then be reset.
- * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again.
+ * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is
+ * done, the time between the last KMP_START_EXPLICIT_TIMER(name) and this
+ * KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The
+ * timer will then be reset. After the KMP_STOP_EXPLICIT_TIMER(name) macro is
+ * called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer
+ * once again.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_STOP_EXPLICIT_TIMER(name) \
- __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name)
+#define KMP_STOP_EXPLICIT_TIMER(name) \
+ __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
+ ->stop(TIMER_##name)
/*!
* \brief Outputs the current thread statistics and reset them.
*
* @param heading_string heading put above the final stats output
*
- * \details Explicitly stops all timers and outputs all stats.
- * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr
- * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats
- * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread
- * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed
- * It should be noted that all statistics are reset when this macro is called.
+ * \details Explicitly stops all timers and outputs all stats. Environment
+ * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a
+ * filename instead of stderr. Environment variable,
+ * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific
+ * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be
+ * defined with any value, which will print out thread specific stats, or it can
+ * be undefined (not specified in the environment) and thread specific stats
+ * won't be printed. It should be noted that all statistics are reset when this
+ * macro is called.
*
* @ingroup STATS_GATHERING
*/
-#define KMP_OUTPUT_STATS(heading_string) \
- __kmp_output_stats(heading_string)
+#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
/*!
* \brief Initializes the paritioned timers to begin with name.
@@ -802,27 +869,30 @@ extern kmp_stats_output_module __kmp_sta
*
* @ingroup STATS_GATHERING
*/
-#define KMP_INIT_PARTITIONED_TIMERS(name) \
- __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
-
-#define KMP_TIME_PARTITIONED_BLOCK(name) \
- blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
- timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
-
-#define KMP_PUSH_PARTITIONED_TIMER(name) \
- __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
-
-#define KMP_POP_PARTITIONED_TIMER() \
- __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
-
-#define KMP_SET_THREAD_STATE(state_name) \
- __kmp_stats_thread_ptr->setState(state_name)
-
-#define KMP_GET_THREAD_STATE() \
- __kmp_stats_thread_ptr->getState()
-
-#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
- blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
+#define KMP_INIT_PARTITIONED_TIMERS(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->init( \
+ timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_TIME_PARTITIONED_BLOCK(name) \
+ blockPartitionedTimer __PBLOCKTIME__( \
+ __kmp_stats_thread_ptr->getPartitionedTimers(), \
+ timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_PUSH_PARTITIONED_TIMER(name) \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->push( \
+ timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_POP_PARTITIONED_TIMER() \
+ __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
+
+#define KMP_SET_THREAD_STATE(state_name) \
+ __kmp_stats_thread_ptr->setState(state_name)
+
+#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
+
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
+ blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
+ state_name)
/*!
* \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
@@ -831,50 +901,50 @@ extern kmp_stats_output_module __kmp_sta
*
* @ingroup STATS_GATHERING
*/
-#define KMP_RESET_STATS() __kmp_reset_stats()
+#define KMP_RESET_STATS() __kmp_reset_stats()
#if (KMP_DEVELOPER_STATS)
-# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
-# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v)
-# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
-# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
-# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
-# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
+#define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
+#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
+#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
#else
// Null definitions
-# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
-# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
-# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
-# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
-# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
-# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
+#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
#endif
#else // KMP_STATS_ENABLED
// Null definitions
-#define KMP_TIME_BLOCK(n) ((void)0)
-#define KMP_COUNT_VALUE(n,v) ((void)0)
-#define KMP_COUNT_BLOCK(n) ((void)0)
-#define KMP_START_EXPLICIT_TIMER(n) ((void)0)
-#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_TIME_BLOCK(n) ((void)0)
+#define KMP_COUNT_VALUE(n, v) ((void)0)
+#define KMP_COUNT_BLOCK(n) ((void)0)
+#define KMP_START_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
#define KMP_OUTPUT_STATS(heading_string) ((void)0)
-#define KMP_RESET_STATS() ((void)0)
+#define KMP_RESET_STATS() ((void)0)
-#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
-#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0)
-#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
-#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
-#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
-#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
-#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
+#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
+#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
-#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
-#define KMP_POP_PARTITIONED_TIMER() ((void)0)
-#define KMP_SET_THREAD_STATE(state_name) ((void)0)
-#define KMP_GET_THREAD_STATE() ((void)0)
-#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
-#endif // KMP_STATS_ENABLED
+#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
+#define KMP_POP_PARTITIONED_TIMER() ((void)0)
+#define KMP_SET_THREAD_STATE(state_name) ((void)0)
+#define KMP_GET_THREAD_STATE() ((void)0)
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
+#endif // KMP_STATS_ENABLED
#endif // KMP_STATS_H
Modified: openmp/trunk/runtime/src/kmp_stats_timing.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats_timing.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats_timing.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats_timing.cpp Fri May 12 13:01:32 2017
@@ -16,8 +16,8 @@
#include <stdlib.h>
#include <unistd.h>
-#include <iostream>
#include <iomanip>
+#include <iostream>
#include <sstream>
#include "kmp.h"
@@ -26,119 +26,107 @@
using namespace std;
#if KMP_HAVE_TICK_TIME
-# if KMP_MIC
-double tsc_tick_count::tick_time()
-{
- // pretty bad assumption of 1GHz clock for MIC
- return 1/((double)1000*1.e6);
+#if KMP_MIC
+double tsc_tick_count::tick_time() {
+ // pretty bad assumption of 1GHz clock for MIC
+ return 1 / ((double)1000 * 1.e6);
}
-# elif KMP_ARCH_X86 || KMP_ARCH_X86_64
-# include <string.h>
+#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
+#include <string.h>
// Extract the value from the CPUID information
-double tsc_tick_count::tick_time()
-{
- static double result = 0.0;
-
- if (result == 0.0)
- {
- kmp_cpuid_t cpuinfo;
- char brand[256];
-
- __kmp_x86_cpuid(0x80000000, 0, &cpuinfo);
- memset(brand, 0, sizeof(brand));
- int ids = cpuinfo.eax;
-
- for (unsigned int i=2; i<(ids^0x80000000)+2; i++)
- __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t)));
-
- char * start = &brand[0];
- for (;*start == ' '; start++)
- ;
-
- char * end = brand + KMP_STRLEN(brand) - 3;
- uint64_t multiplier;
-
- if (*end == 'M') multiplier = 1000LL*1000LL;
- else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL;
- else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL;
- else
- {
- cout << "Error determining multiplier '" << *end << "'\n";
- exit (-1);
- }
- *end = 0;
- while (*end != ' ') end--;
- end++;
-
- double freq = strtod(end, &start);
- if (freq == 0.0)
- {
- cout << "Error calculating frequency " << end << "\n";
- exit (-1);
- }
+double tsc_tick_count::tick_time() {
+ static double result = 0.0;
- result = ((double)1.0)/(freq * multiplier);
+ if (result == 0.0) {
+ kmp_cpuid_t cpuinfo;
+ char brand[256];
+
+ __kmp_x86_cpuid(0x80000000, 0, &cpuinfo);
+ memset(brand, 0, sizeof(brand));
+ int ids = cpuinfo.eax;
+
+ for (unsigned int i = 2; i < (ids ^ 0x80000000) + 2; i++)
+ __kmp_x86_cpuid(i | 0x80000000, 0,
+ (kmp_cpuid_t *)(brand + (i - 2) * sizeof(kmp_cpuid_t)));
+
+ char *start = &brand[0];
+ for (; *start == ' '; start++)
+ ;
+
+ char *end = brand + KMP_STRLEN(brand) - 3;
+ uint64_t multiplier;
+
+ if (*end == 'M')
+ multiplier = 1000LL * 1000LL;
+ else if (*end == 'G')
+ multiplier = 1000LL * 1000LL * 1000LL;
+ else if (*end == 'T')
+ multiplier = 1000LL * 1000LL * 1000LL * 1000LL;
+ else {
+ cout << "Error determining multiplier '" << *end << "'\n";
+ exit(-1);
+ }
+ *end = 0;
+ while (*end != ' ')
+ end--;
+ end++;
+
+ double freq = strtod(end, &start);
+ if (freq == 0.0) {
+ cout << "Error calculating frequency " << end << "\n";
+ exit(-1);
}
- return result;
+
+ result = ((double)1.0) / (freq * multiplier);
+ }
+ return result;
}
-# endif
+#endif
#endif
static bool useSI = true;
// Return a formatted string after normalising the value into
// engineering style and using a suitable unit prefix (e.g. ms, us, ns).
-std::string formatSI(double interval, int width, char unit)
-{
- std::stringstream os;
-
- if (useSI)
- {
- // Preserve accuracy for small numbers, since we only multiply and the positive powers
- // of ten are precisely representable.
- static struct { double scale; char prefix; } ranges[] = {
- {1.e12,'f'},
- {1.e9, 'p'},
- {1.e6, 'n'},
- {1.e3, 'u'},
- {1.0, 'm'},
- {1.e-3,' '},
- {1.e-6,'k'},
- {1.e-9,'M'},
- {1.e-12,'G'},
- {1.e-15,'T'},
- {1.e-18,'P'},
- {1.e-21,'E'},
- {1.e-24,'Z'},
- {1.e-27,'Y'}
- };
-
- if (interval == 0.0)
- {
- os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit;
- return os.str();
- }
-
- bool negative = false;
- if (interval < 0.0)
- {
- negative = true;
- interval = -interval;
- }
-
- for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++)
- {
- if (interval*ranges[i].scale < 1.e0)
- {
- interval = interval * 1000.e0 * ranges[i].scale;
- os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right <<
- (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit;
-
- return os.str();
- }
- }
+std::string formatSI(double interval, int width, char unit) {
+ std::stringstream os;
+
+ if (useSI) {
+ // Preserve accuracy for small numbers, since we only multiply and the
+ // positive powers of ten are precisely representable.
+ static struct {
+ double scale;
+ char prefix;
+ } ranges[] = {{1.e12, 'f'}, {1.e9, 'p'}, {1.e6, 'n'}, {1.e3, 'u'},
+ {1.0, 'm'}, {1.e-3, ' '}, {1.e-6, 'k'}, {1.e-9, 'M'},
+ {1.e-12, 'G'}, {1.e-15, 'T'}, {1.e-18, 'P'}, {1.e-21, 'E'},
+ {1.e-24, 'Z'}, {1.e-27, 'Y'}};
+
+ if (interval == 0.0) {
+ os << std::setw(width - 3) << std::right << "0.00" << std::setw(3)
+ << unit;
+ return os.str();
+ }
+
+ bool negative = false;
+ if (interval < 0.0) {
+ negative = true;
+ interval = -interval;
+ }
+
+ for (int i = 0; i < (int)(sizeof(ranges) / sizeof(ranges[0])); i++) {
+ if (interval * ranges[i].scale < 1.e0) {
+ interval = interval * 1000.e0 * ranges[i].scale;
+ os << std::fixed << std::setprecision(2) << std::setw(width - 3)
+ << std::right << (negative ? -interval : interval) << std::setw(2)
+ << ranges[i].prefix << std::setw(1) << unit;
+
+ return os.str();
+ }
}
- os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit;
+ }
+ os << std::setprecision(2) << std::fixed << std::right << std::setw(width - 3)
+ << interval << std::setw(3) << unit;
- return os.str();
+ return os.str();
}
Modified: openmp/trunk/runtime/src/kmp_stats_timing.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats_timing.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats_timing.h (original)
+++ openmp/trunk/runtime/src/kmp_stats_timing.h Fri May 12 13:01:32 2017
@@ -16,97 +16,103 @@
//===----------------------------------------------------------------------===//
-
+#include "kmp_os.h"
+#include <limits>
#include <stdint.h>
#include <string>
-#include <limits>
-#include "kmp_os.h"
#if KMP_HAVE_X86INTRIN_H
-# include <x86intrin.h>
+#include <x86intrin.h>
#endif
class tsc_tick_count {
- private:
- int64_t my_count;
+private:
+ int64_t my_count;
+
+public:
+ class tsc_interval_t {
+ int64_t value;
+ explicit tsc_interval_t(int64_t _value) : value(_value) {}
public:
- class tsc_interval_t {
- int64_t value;
- explicit tsc_interval_t(int64_t _value) : value(_value) {}
- public:
- tsc_interval_t() : value(0) {}; // Construct 0 time duration
+ tsc_interval_t() : value(0){}; // Construct 0 time duration
#if KMP_HAVE_TICK_TIME
- double seconds() const; // Return the length of a time interval in seconds
+ double seconds() const; // Return the length of a time interval in seconds
#endif
- double ticks() const { return double(value); }
- int64_t getValue() const { return value; }
- tsc_interval_t& operator=(int64_t nvalue) { value = nvalue; return *this; }
-
- friend class tsc_tick_count;
-
- friend tsc_interval_t operator-(const tsc_tick_count& t1,
- const tsc_tick_count& t0);
- friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1,
- const tsc_tick_count::tsc_interval_t& i0);
- friend tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1,
- const tsc_tick_count::tsc_interval_t& i0);
- };
+ double ticks() const { return double(value); }
+ int64_t getValue() const { return value; }
+ tsc_interval_t &operator=(int64_t nvalue) {
+ value = nvalue;
+ return *this;
+ }
+
+ friend class tsc_tick_count;
+
+ friend tsc_interval_t operator-(const tsc_tick_count &t1,
+ const tsc_tick_count &t0);
+ friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t &i1,
+ const tsc_tick_count::tsc_interval_t &i0);
+ friend tsc_interval_t &operator+=(tsc_tick_count::tsc_interval_t &i1,
+ const tsc_tick_count::tsc_interval_t &i0);
+ };
#if KMP_HAVE___BUILTIN_READCYCLECOUNTER
- tsc_tick_count() : my_count(static_cast<int64_t>(__builtin_readcyclecounter())) {}
+ tsc_tick_count()
+ : my_count(static_cast<int64_t>(__builtin_readcyclecounter())) {}
#elif KMP_HAVE___RDTSC
- tsc_tick_count() : my_count(static_cast<int64_t>(__rdtsc())) {};
+ tsc_tick_count() : my_count(static_cast<int64_t>(__rdtsc())){};
#else
-# error Must have high resolution timer defined
+#error Must have high resolution timer defined
#endif
- tsc_tick_count(int64_t value) : my_count(value) {};
- int64_t getValue() const { return my_count; }
- tsc_tick_count later (tsc_tick_count const other) const {
- return my_count > other.my_count ? (*this) : other;
- }
- tsc_tick_count earlier(tsc_tick_count const other) const {
- return my_count < other.my_count ? (*this) : other;
- }
+ tsc_tick_count(int64_t value) : my_count(value){};
+ int64_t getValue() const { return my_count; }
+ tsc_tick_count later(tsc_tick_count const other) const {
+ return my_count > other.my_count ? (*this) : other;
+ }
+ tsc_tick_count earlier(tsc_tick_count const other) const {
+ return my_count < other.my_count ? (*this) : other;
+ }
#if KMP_HAVE_TICK_TIME
- static double tick_time(); // returns seconds per cycle (period) of clock
+ static double tick_time(); // returns seconds per cycle (period) of clock
#endif
- static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value
- friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0);
+ static tsc_tick_count now() {
+ return tsc_tick_count();
+ } // returns the rdtsc register value
+ friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1,
+ const tsc_tick_count &t0);
};
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0)
-{
- return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count );
-}
-
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
-{
- return tsc_tick_count::tsc_interval_t( i1.value-i0.value );
-}
-
-inline tsc_tick_count::tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
-{
- i1.value += i0.value;
- return i1;
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1,
+ const tsc_tick_count &t0) {
+ return tsc_tick_count::tsc_interval_t(t1.my_count - t0.my_count);
+}
+
+inline tsc_tick_count::tsc_interval_t
+operator-(const tsc_tick_count::tsc_interval_t &i1,
+ const tsc_tick_count::tsc_interval_t &i0) {
+ return tsc_tick_count::tsc_interval_t(i1.value - i0.value);
+}
+
+inline tsc_tick_count::tsc_interval_t &
+operator+=(tsc_tick_count::tsc_interval_t &i1,
+ const tsc_tick_count::tsc_interval_t &i0) {
+ i1.value += i0.value;
+ return i1;
}
#if KMP_HAVE_TICK_TIME
-inline double tsc_tick_count::tsc_interval_t::seconds() const
-{
- return value*tick_time();
+inline double tsc_tick_count::tsc_interval_t::seconds() const {
+ return value * tick_time();
}
#endif
extern std::string formatSI(double interval, int width, char unit);
-inline std::string formatSeconds(double interval, int width)
-{
- return formatSI(interval, width, 'S');
+inline std::string formatSeconds(double interval, int width) {
+ return formatSI(interval, width, 'S');
}
-inline std::string formatTicks(double interval, int width)
-{
- return formatSI(interval, width, 'T');
+inline std::string formatTicks(double interval, int width) {
+ return formatSI(interval, width, 'T');
}
#endif // KMP_STATS_TIMING_H
Modified: openmp/trunk/runtime/src/kmp_str.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_str.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_str.cpp (original)
+++ openmp/trunk/runtime/src/kmp_str.cpp Fri May 12 13:01:32 2017
@@ -15,866 +15,721 @@
#include "kmp_str.h"
-#include <stdarg.h> // va_*
-#include <stdio.h> // vsnprintf()
-#include <stdlib.h> // malloc(), realloc()
+#include <stdarg.h> // va_*
+#include <stdio.h> // vsnprintf()
+#include <stdlib.h> // malloc(), realloc()
#include "kmp.h"
#include "kmp_i18n.h"
-/*
- ------------------------------------------------------------------------------------------------
- String buffer.
- ------------------------------------------------------------------------------------------------
+/* String buffer.
- Usage:
+ Usage:
- // Declare buffer and initialize it.
- kmp_str_buf_t buffer;
- __kmp_str_buf_init( & buffer );
-
- // Print to buffer.
- __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 );
- __kmp_str_buf_print( & buffer, " <%s>\n", line );
-
- // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed
- // characters (not including terminating zero).
- write( fd, buffer.str, buffer.used );
-
- // Free buffer.
- __kmp_str_buf_free( & buffer );
-
- // Alternatively, you can detach allocated memory from buffer:
- __kmp_str_buf_detach( & buffer );
- return buffer.str; // That memory should be freed eventually.
-
-
- Notes:
-
- * Buffer users may use buffer.str and buffer.used. Users should not change any fields of
- buffer directly.
-
- * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string ("").
-
- * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is
- exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc()
- as amount of used memory grows.
-
- * Buffer doubles amount of allocated memory each time it is exhausted.
-
- ------------------------------------------------------------------------------------------------
+ // Declare buffer and initialize it.
+ kmp_str_buf_t buffer;
+ __kmp_str_buf_init( & buffer );
+
+ // Print to buffer.
+ __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12);
+ __kmp_str_buf_print(& buffer, " <%s>\n", line);
+
+ // Use buffer contents. buffer.str is a pointer to data, buffer.used is a
+ // number of printed characters (not including terminating zero).
+ write( fd, buffer.str, buffer.used );
+
+ // Free buffer.
+ __kmp_str_buf_free( & buffer );
+
+ // Alternatively, you can detach allocated memory from buffer:
+ __kmp_str_buf_detach( & buffer );
+ return buffer.str; // That memory should be freed eventually.
+
+ Notes:
+
+ * Buffer users may use buffer.str and buffer.used. Users should not change
+ any fields of buffer directly.
+ * buffer.str is never NULL. If buffer is empty, buffer.str points to empty
+ string ("").
+ * For performance reasons, buffer uses stack memory (buffer.bulk) first. If
+ stack memory is exhausted, buffer allocates memory on heap by malloc(), and
+ reallocates it by realloc() as amount of used memory grows.
+ * Buffer doubles amount of allocated memory each time it is exhausted.
*/
// TODO: __kmp_str_buf_print() can use thread local memory allocator.
-#define KMP_STR_BUF_INVARIANT( b ) \
- { \
- KMP_DEBUG_ASSERT( (b)->str != NULL ); \
- KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) ); \
- KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 ); \
- KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size ); \
- KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 ); \
- KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 ); \
- }
-
-void
- __kmp_str_buf_clear(
- kmp_str_buf_t * buffer
-) {
- KMP_STR_BUF_INVARIANT( buffer );
- if ( buffer->used > 0 ) {
- buffer->used = 0;
- buffer->str[ 0 ] = 0;
- }; // if
- KMP_STR_BUF_INVARIANT( buffer );
+#define KMP_STR_BUF_INVARIANT(b) \
+ { \
+ KMP_DEBUG_ASSERT((b)->str != NULL); \
+ KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \
+ KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \
+ KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \
+ KMP_DEBUG_ASSERT( \
+ (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \
+ KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \
+ : 1); \
+ }
+
+void __kmp_str_buf_clear(kmp_str_buf_t *buffer) {
+ KMP_STR_BUF_INVARIANT(buffer);
+ if (buffer->used > 0) {
+ buffer->used = 0;
+ buffer->str[0] = 0;
+ }; // if
+ KMP_STR_BUF_INVARIANT(buffer);
} // __kmp_str_buf_clear
+void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size) {
+ KMP_STR_BUF_INVARIANT(buffer);
+ KMP_DEBUG_ASSERT(size >= 0);
-void
-__kmp_str_buf_reserve(
- kmp_str_buf_t * buffer,
- int size
-) {
-
- KMP_STR_BUF_INVARIANT( buffer );
- KMP_DEBUG_ASSERT( size >= 0 );
-
- if ( buffer->size < (unsigned int)size ) {
-
- // Calculate buffer size.
- do {
- buffer->size *= 2;
- } while ( buffer->size < (unsigned int)size );
-
- // Enlarge buffer.
- if ( buffer->str == & buffer->bulk[ 0 ] ) {
- buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size );
- if ( buffer->str == NULL ) {
- KMP_FATAL( MemoryAllocFailed );
- }; // if
- KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 );
- } else {
- buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size );
- if ( buffer->str == NULL ) {
- KMP_FATAL( MemoryAllocFailed );
- }; // if
- }; // if
-
- }; // if
-
- KMP_DEBUG_ASSERT( buffer->size > 0 );
- KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size );
- KMP_STR_BUF_INVARIANT( buffer );
+ if (buffer->size < (unsigned int)size) {
+ // Calculate buffer size.
+ do {
+ buffer->size *= 2;
+ } while (buffer->size < (unsigned int)size);
+ // Enlarge buffer.
+ if (buffer->str == &buffer->bulk[0]) {
+ buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
+ if (buffer->str == NULL) {
+ KMP_FATAL(MemoryAllocFailed);
+ }; // if
+ KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
+ } else {
+ buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size);
+ if (buffer->str == NULL) {
+ KMP_FATAL(MemoryAllocFailed);
+ }; // if
+ }; // if
+
+ }; // if
+
+ KMP_DEBUG_ASSERT(buffer->size > 0);
+ KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size);
+ KMP_STR_BUF_INVARIANT(buffer);
} // __kmp_str_buf_reserve
+void __kmp_str_buf_detach(kmp_str_buf_t *buffer) {
+ KMP_STR_BUF_INVARIANT(buffer);
-void
-__kmp_str_buf_detach(
- kmp_str_buf_t * buffer
-) {
-
- KMP_STR_BUF_INVARIANT( buffer );
-
- // If internal bulk is used, allocate memory and copy it.
- if ( buffer->size <= sizeof( buffer->bulk ) ) {
- buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size );
- if ( buffer->str == NULL ) {
- KMP_FATAL( MemoryAllocFailed );
- }; // if
- KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 );
+ // If internal bulk is used, allocate memory and copy it.
+ if (buffer->size <= sizeof(buffer->bulk)) {
+ buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
+ if (buffer->str == NULL) {
+ KMP_FATAL(MemoryAllocFailed);
}; // if
-
+ KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
+ }; // if
} // __kmp_str_buf_detach
-
-void
-__kmp_str_buf_free(
- kmp_str_buf_t * buffer
-) {
- KMP_STR_BUF_INVARIANT( buffer );
- if ( buffer->size > sizeof( buffer->bulk ) ) {
- KMP_INTERNAL_FREE( buffer->str );
- }; // if
- buffer->str = buffer->bulk;
- buffer->size = sizeof( buffer->bulk );
- buffer->used = 0;
- KMP_STR_BUF_INVARIANT( buffer );
+void __kmp_str_buf_free(kmp_str_buf_t *buffer) {
+ KMP_STR_BUF_INVARIANT(buffer);
+ if (buffer->size > sizeof(buffer->bulk)) {
+ KMP_INTERNAL_FREE(buffer->str);
+ }; // if
+ buffer->str = buffer->bulk;
+ buffer->size = sizeof(buffer->bulk);
+ buffer->used = 0;
+ KMP_STR_BUF_INVARIANT(buffer);
} // __kmp_str_buf_free
-
-void
-__kmp_str_buf_cat(
- kmp_str_buf_t * buffer,
- char const * str,
- int len
-) {
- KMP_STR_BUF_INVARIANT( buffer );
- KMP_DEBUG_ASSERT( str != NULL );
- KMP_DEBUG_ASSERT( len >= 0 );
- __kmp_str_buf_reserve( buffer, buffer->used + len + 1 );
- KMP_MEMCPY( buffer->str + buffer->used, str, len );
- buffer->str[ buffer->used + len ] = 0;
- buffer->used += len;
- KMP_STR_BUF_INVARIANT( buffer );
+void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) {
+ KMP_STR_BUF_INVARIANT(buffer);
+ KMP_DEBUG_ASSERT(str != NULL);
+ KMP_DEBUG_ASSERT(len >= 0);
+ __kmp_str_buf_reserve(buffer, buffer->used + len + 1);
+ KMP_MEMCPY(buffer->str + buffer->used, str, len);
+ buffer->str[buffer->used + len] = 0;
+ buffer->used += len;
+ KMP_STR_BUF_INVARIANT(buffer);
} // __kmp_str_buf_cat
+void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+ va_list args) {
+ KMP_STR_BUF_INVARIANT(buffer);
+
+ for (;;) {
+ int const free = buffer->size - buffer->used;
+ int rc;
+ int size;
+
+ // Try to format string.
+ {
+/* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf()
+ crashes if it is called for the second time with the same args. To prevent
+ the crash, we have to pass a fresh intact copy of args to vsnprintf() on each
+ iteration.
-void
-__kmp_str_buf_vprint(
- kmp_str_buf_t * buffer,
- char const * format,
- va_list args
-) {
-
- KMP_STR_BUF_INVARIANT( buffer );
-
- for ( ; ; ) {
-
- int const free = buffer->size - buffer->used;
- int rc;
- int size;
-
- // Try to format string.
- {
- /*
- On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it
- is called for the second time with the same args. To prevent the crash, we have to
- pass a fresh intact copy of args to vsnprintf() on each iteration.
-
- Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it
- seems vsnprintf() does not modify args argument on Windows* OS.
- */
-
- #if ! KMP_OS_WINDOWS
- va_list _args;
- __va_copy( _args, args ); // Make copy of args.
- #define args _args // Substitute args with its copy, _args.
- #endif // KMP_OS_WINDOWS
- rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args );
- #if ! KMP_OS_WINDOWS
- #undef args // Remove substitution.
- va_end( _args );
- #endif // KMP_OS_WINDOWS
- }
-
- // No errors, string has been formatted.
- if ( rc >= 0 && rc < free ) {
- buffer->used += rc;
- break;
- }; // if
-
- // Error occurred, buffer is too small.
- if ( rc >= 0 ) {
- // C99-conforming implementation of vsnprintf returns required buffer size.
- size = buffer->used + rc + 1;
- } else {
- // Older implementations just return -1. Double buffer size.
- size = buffer->size * 2;
- }; // if
-
- // Enlarge buffer.
- __kmp_str_buf_reserve( buffer, size );
+ Unfortunately, standard va_copy() macro is not available on Windows* OS.
+ However, it seems vsnprintf() does not modify args argument on Windows* OS.
+*/
- // And try again.
+#if !KMP_OS_WINDOWS
+ va_list _args;
+ __va_copy(_args, args); // Make copy of args.
+#define args _args // Substitute args with its copy, _args.
+#endif // KMP_OS_WINDOWS
+ rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args);
+#if !KMP_OS_WINDOWS
+#undef args // Remove substitution.
+ va_end(_args);
+#endif // KMP_OS_WINDOWS
+ }
- }; // forever
+ // No errors, string has been formatted.
+ if (rc >= 0 && rc < free) {
+ buffer->used += rc;
+ break;
+ }; // if
- KMP_DEBUG_ASSERT( buffer->size > 0 );
- KMP_STR_BUF_INVARIANT( buffer );
+ // Error occurred, buffer is too small.
+ if (rc >= 0) {
+ // C99-conforming implementation of vsnprintf returns required buffer size
+ size = buffer->used + rc + 1;
+ } else {
+ // Older implementations just return -1. Double buffer size.
+ size = buffer->size * 2;
+ }; // if
-} // __kmp_str_buf_vprint
+ // Enlarge buffer.
+ __kmp_str_buf_reserve(buffer, size);
+ // And try again.
+ }; // forever
-void
-__kmp_str_buf_print(
- kmp_str_buf_t * buffer,
- char const * format,
- ...
-) {
-
- va_list args;
- va_start( args, format );
- __kmp_str_buf_vprint( buffer, format, args );
- va_end( args );
+ KMP_DEBUG_ASSERT(buffer->size > 0);
+ KMP_STR_BUF_INVARIANT(buffer);
+} // __kmp_str_buf_vprint
+void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
+ va_list args;
+ va_start(args, format);
+ __kmp_str_buf_vprint(buffer, format, args);
+ va_end(args);
} // __kmp_str_buf_print
+/* The function prints specified size to buffer. Size is expressed using biggest
+ possible unit, for example 1024 is printed as "1k". */
+void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) {
+ char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"};
+ int const units = sizeof(names) / sizeof(char const *);
+ int u = 0;
+ if (size > 0) {
+ while ((size % 1024 == 0) && (u + 1 < units)) {
+ size = size / 1024;
+ ++u;
+ }; // while
+ }; // if
-/*
- The function prints specified size to buffer. Size is expressed using biggest possible unit, for
- example 1024 is printed as "1k".
-*/
-
-void
-__kmp_str_buf_print_size(
- kmp_str_buf_t * buf,
- size_t size
-) {
-
- char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" };
- int const units = sizeof( names ) / sizeof( char const * );
- int u = 0;
- if ( size > 0 ) {
- while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) {
- size = size / 1024;
- ++ u;
- }; // while
- }; // if
-
- __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] );
-
+ __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]);
} // __kmp_str_buf_print_size
-
-void
-__kmp_str_fname_init(
- kmp_str_fname_t * fname,
- char const * path
-) {
-
- fname->path = NULL;
- fname->dir = NULL;
- fname->base = NULL;
-
- if ( path != NULL ) {
- char * slash = NULL; // Pointer to the last character of dir.
- char * base = NULL; // Pointer to the beginning of basename.
- fname->path = __kmp_str_format( "%s", path );
- // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it
- // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format().
- if ( KMP_OS_WINDOWS ) {
- __kmp_str_replace( fname->path, '\\', '/' );
- }; // if
- fname->dir = __kmp_str_format( "%s", fname->path );
- slash = strrchr( fname->dir, '/' );
- if ( KMP_OS_WINDOWS && slash == NULL ) { // On Windows* OS, if slash not found,
- char first = TOLOWER( fname->dir[ 0 ] ); // look for drive.
- if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) {
- slash = & fname->dir[ 1 ];
- }; // if
- }; // if
- base = ( slash == NULL ? fname->dir : slash + 1 );
- fname->base = __kmp_str_format( "%s", base ); // Copy basename
- * base = 0; // and truncate dir.
- }; // if
+void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) {
+ fname->path = NULL;
+ fname->dir = NULL;
+ fname->base = NULL;
+
+ if (path != NULL) {
+ char *slash = NULL; // Pointer to the last character of dir.
+ char *base = NULL; // Pointer to the beginning of basename.
+ fname->path = __kmp_str_format("%s", path);
+ // Original code used strdup() function to copy a string, but on Windows* OS
+ // Intel(R) 64 it causes assertioon id debug heap, so I had to replace
+ // strdup with __kmp_str_format().
+ if (KMP_OS_WINDOWS) {
+ __kmp_str_replace(fname->path, '\\', '/');
+ }; // if
+ fname->dir = __kmp_str_format("%s", fname->path);
+ slash = strrchr(fname->dir, '/');
+ if (KMP_OS_WINDOWS &&
+ slash == NULL) { // On Windows* OS, if slash not found,
+ char first = TOLOWER(fname->dir[0]); // look for drive.
+ if ('a' <= first && first <= 'z' && fname->dir[1] == ':') {
+ slash = &fname->dir[1];
+ }; // if
+ }; // if
+ base = (slash == NULL ? fname->dir : slash + 1);
+ fname->base = __kmp_str_format("%s", base); // Copy basename
+ *base = 0; // and truncate dir.
+ }; // if
} // kmp_str_fname_init
-
-void
-__kmp_str_fname_free(
- kmp_str_fname_t * fname
-) {
- __kmp_str_free( (char const **)( & fname->path ) );
- __kmp_str_free( (char const **)( & fname->dir ) );
- __kmp_str_free( (char const **)( & fname->base ) );
+void __kmp_str_fname_free(kmp_str_fname_t *fname) {
+ __kmp_str_free((char const **)(&fname->path));
+ __kmp_str_free((char const **)(&fname->dir));
+ __kmp_str_free((char const **)(&fname->base));
} // kmp_str_fname_free
+int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) {
+ int dir_match = 1;
+ int base_match = 1;
+
+ if (pattern != NULL) {
+ kmp_str_fname_t ptrn;
+ __kmp_str_fname_init(&ptrn, pattern);
+ dir_match = strcmp(ptrn.dir, "*/") == 0 ||
+ (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir));
+ base_match = strcmp(ptrn.base, "*") == 0 ||
+ (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base));
+ __kmp_str_fname_free(&ptrn);
+ }; // if
-int
-__kmp_str_fname_match(
- kmp_str_fname_t const * fname,
- char const * pattern
-) {
-
- int dir_match = 1;
- int base_match = 1;
-
- if ( pattern != NULL ) {
- kmp_str_fname_t ptrn;
- __kmp_str_fname_init( & ptrn, pattern );
- dir_match =
- strcmp( ptrn.dir, "*/" ) == 0
- ||
- ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) );
- base_match =
- strcmp( ptrn.base, "*" ) == 0
- ||
- ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) );
- __kmp_str_fname_free( & ptrn );
- }; // if
-
- return dir_match && base_match;
-
+ return dir_match && base_match;
} // __kmp_str_fname_match
+kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname) {
+ kmp_str_loc_t loc;
-kmp_str_loc_t
-__kmp_str_loc_init(
- char const * psource,
- int init_fname
-) {
-
- kmp_str_loc_t loc;
-
- loc._bulk = NULL;
- loc.file = NULL;
- loc.func = NULL;
- loc.line = 0;
- loc.col = 0;
-
- if ( psource != NULL ) {
-
- char * str = NULL;
- char * dummy = NULL;
- char * line = NULL;
- char * col = NULL;
-
- // Copy psource to keep it intact.
- loc._bulk = __kmp_str_format( "%s", psource );
-
- // Parse psource string: ";file;func;line;col;;"
- str = loc._bulk;
- __kmp_str_split( str, ';', & dummy, & str );
- __kmp_str_split( str, ';', & loc.file, & str );
- __kmp_str_split( str, ';', & loc.func, & str );
- __kmp_str_split( str, ';', & line, & str );
- __kmp_str_split( str, ';', & col, & str );
-
- // Convert line and col into numberic values.
- if ( line != NULL ) {
- loc.line = atoi( line );
- if ( loc.line < 0 ) {
- loc.line = 0;
- }; // if
- }; // if
- if ( col != NULL ) {
- loc.col = atoi( col );
- if ( loc.col < 0 ) {
- loc.col = 0;
- }; // if
- }; // if
-
+ loc._bulk = NULL;
+ loc.file = NULL;
+ loc.func = NULL;
+ loc.line = 0;
+ loc.col = 0;
+
+ if (psource != NULL) {
+ char *str = NULL;
+ char *dummy = NULL;
+ char *line = NULL;
+ char *col = NULL;
+
+ // Copy psource to keep it intact.
+ loc._bulk = __kmp_str_format("%s", psource);
+
+ // Parse psource string: ";file;func;line;col;;"
+ str = loc._bulk;
+ __kmp_str_split(str, ';', &dummy, &str);
+ __kmp_str_split(str, ';', &loc.file, &str);
+ __kmp_str_split(str, ';', &loc.func, &str);
+ __kmp_str_split(str, ';', &line, &str);
+ __kmp_str_split(str, ';', &col, &str);
+
+ // Convert line and col into numberic values.
+ if (line != NULL) {
+ loc.line = atoi(line);
+ if (loc.line < 0) {
+ loc.line = 0;
+ }; // if
+ }; // if
+ if (col != NULL) {
+ loc.col = atoi(col);
+ if (loc.col < 0) {
+ loc.col = 0;
+ }; // if
}; // if
- __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL );
+ }; // if
- return loc;
+ __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL);
+ return loc;
} // kmp_str_loc_init
-
-void
-__kmp_str_loc_free(
- kmp_str_loc_t * loc
-) {
- __kmp_str_fname_free( & loc->fname );
- __kmp_str_free((const char **) &(loc->_bulk));
- loc->file = NULL;
- loc->func = NULL;
+void __kmp_str_loc_free(kmp_str_loc_t *loc) {
+ __kmp_str_fname_free(&loc->fname);
+ __kmp_str_free((const char **)&(loc->_bulk));
+ loc->file = NULL;
+ loc->func = NULL;
} // kmp_str_loc_free
-
-
-/*
- This function is intended to compare file names. On Windows* OS file names are case-insensitive,
- so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive
- comparison.
- Note: The function returns *true* if strings are *equal*.
-*/
-
-int
-__kmp_str_eqf( // True, if strings are equal, false otherwise.
- char const * lhs, // First string.
- char const * rhs // Second string.
-) {
- int result;
- #if KMP_OS_WINDOWS
- result = ( _stricmp( lhs, rhs ) == 0 );
- #else
- result = ( strcmp( lhs, rhs ) == 0 );
- #endif
- return result;
+/* This function is intended to compare file names. On Windows* OS file names
+ are case-insensitive, so functions performs case-insensitive comparison. On
+ Linux* OS it performs case-sensitive comparison. Note: The function returns
+ *true* if strings are *equal*. */
+int __kmp_str_eqf( // True, if strings are equal, false otherwise.
+ char const *lhs, // First string.
+ char const *rhs // Second string.
+ ) {
+ int result;
+#if KMP_OS_WINDOWS
+ result = (_stricmp(lhs, rhs) == 0);
+#else
+ result = (strcmp(lhs, rhs) == 0);
+#endif
+ return result;
} // __kmp_str_eqf
-
-/*
- This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by
- __kmp_str_free(). The function is very convenient for constructing strings, it successfully
- replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid
- buffer overflows. Examples:
-
- str = __kmp_str_format( "%s", orig ); // strdup(), do not care about buffer size.
- __kmp_str_free( & str );
- str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), do not care about buffer size.
- __kmp_str_free( & str );
- str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
- __kmp_str_free( & str );
-
- Performance note:
- This function allocates memory with malloc() calls, so do not call it from
- performance-critical code. In performance-critical code consider using kmp_str_buf_t
- instead, since it uses stack-allocated buffer for short strings.
-
- Why does this function use malloc()?
- 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no
- reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned
- memory is not necessary.
- 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure.
- We need to perform string operations during library startup (for example, in
- __kmp_register_library_startup()) when no thread structures are allocated yet.
- So standard malloc() is the only available option.
+/* This function is like sprintf, but it *allocates* new buffer, which must be
+ freed eventually by __kmp_str_free(). The function is very convenient for
+ constructing strings, it successfully replaces strdup(), strcat(), it frees
+ programmer from buffer allocations and helps to avoid buffer overflows.
+ Examples:
+
+ str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size
+ __kmp_str_free( & str );
+ str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care
+ // about buffer size.
+ __kmp_str_free( & str );
+ str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
+ __kmp_str_free( & str );
+
+ Performance note:
+ This function allocates memory with malloc() calls, so do not call it from
+ performance-critical code. In performance-critical code consider using
+ kmp_str_buf_t instead, since it uses stack-allocated buffer for short
+ strings.
+
+ Why does this function use malloc()?
+ 1. __kmp_allocate() returns cache-aligned memory allocated with malloc().
+ There are no reasons in using __kmp_allocate() for strings due to extra
+ overhead while cache-aligned memory is not necessary.
+ 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread
+ structure. We need to perform string operations during library startup
+ (for example, in __kmp_register_library_startup()) when no thread
+ structures are allocated yet.
+ So standard malloc() is the only available option.
*/
-char *
-__kmp_str_format( // Allocated string.
- char const * format, // Format string.
- ... // Other parameters.
-) {
-
- va_list args;
- int size = 512;
- char * buffer = NULL;
- int rc;
-
- // Allocate buffer.
- buffer = (char *) KMP_INTERNAL_MALLOC( size );
- if ( buffer == NULL ) {
- KMP_FATAL( MemoryAllocFailed );
- }; // if
-
- for ( ; ; ) {
-
- // Try to format string.
- va_start( args, format );
- rc = KMP_VSNPRINTF( buffer, size, format, args );
- va_end( args );
-
- // No errors, string has been formatted.
- if ( rc >= 0 && rc < size ) {
- break;
- }; // if
-
- // Error occurred, buffer is too small.
- if ( rc >= 0 ) {
- // C99-conforming implementation of vsnprintf returns required buffer size.
- size = rc + 1;
- } else {
- // Older implementations just return -1.
- size = size * 2;
- }; // if
-
- // Enlarge buffer and try again.
- buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size );
- if ( buffer == NULL ) {
- KMP_FATAL( MemoryAllocFailed );
- }; // if
-
- }; // forever
-
- return buffer;
+char *__kmp_str_format( // Allocated string.
+ char const *format, // Format string.
+ ... // Other parameters.
+ ) {
+ va_list args;
+ int size = 512;
+ char *buffer = NULL;
+ int rc;
+
+ // Allocate buffer.
+ buffer = (char *)KMP_INTERNAL_MALLOC(size);
+ if (buffer == NULL) {
+ KMP_FATAL(MemoryAllocFailed);
+ }; // if
+
+ for (;;) {
+ // Try to format string.
+ va_start(args, format);
+ rc = KMP_VSNPRINTF(buffer, size, format, args);
+ va_end(args);
+
+ // No errors, string has been formatted.
+ if (rc >= 0 && rc < size) {
+ break;
+ }; // if
+
+ // Error occurred, buffer is too small.
+ if (rc >= 0) {
+ // C99-conforming implementation of vsnprintf returns required buffer
+ // size.
+ size = rc + 1;
+ } else {
+ // Older implementations just return -1.
+ size = size * 2;
+ }; // if
+
+ // Enlarge buffer and try again.
+ buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size);
+ if (buffer == NULL) {
+ KMP_FATAL(MemoryAllocFailed);
+ }; // if
+ }; // forever
+ return buffer;
} // func __kmp_str_format
-
-void
-__kmp_str_free(
- char const * * str
-) {
- KMP_DEBUG_ASSERT( str != NULL );
- KMP_INTERNAL_FREE( (void *) * str );
- * str = NULL;
+void __kmp_str_free(char const **str) {
+ KMP_DEBUG_ASSERT(str != NULL);
+ KMP_INTERNAL_FREE((void *)*str);
+ *str = NULL;
} // func __kmp_str_free
-
-/* If len is zero, returns true iff target and data have exact case-insensitive match.
- If len is negative, returns true iff target is a case-insensitive substring of data.
- If len is positive, returns true iff target is a case-insensitive substring of data or
- vice versa, and neither is shorter than len.
-*/
-int
-__kmp_str_match(
- char const * target,
- int len,
- char const * data
-) {
- int i;
- if ( target == NULL || data == NULL ) {
- return FALSE;
- }; // if
- for ( i = 0; target[i] && data[i]; ++ i ) {
- if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) {
- return FALSE;
- }; // if
- }; // for i
- return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) );
+/* If len is zero, returns true iff target and data have exact case-insensitive
+ match. If len is negative, returns true iff target is a case-insensitive
+ substring of data. If len is positive, returns true iff target is a
+ case-insensitive substring of data or vice versa, and neither is shorter than
+ len. */
+int __kmp_str_match(char const *target, int len, char const *data) {
+ int i;
+ if (target == NULL || data == NULL) {
+ return FALSE;
+ }; // if
+ for (i = 0; target[i] && data[i]; ++i) {
+ if (TOLOWER(target[i]) != TOLOWER(data[i])) {
+ return FALSE;
+ }; // if
+ }; // for i
+ return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
} // __kmp_str_match
-
-int
-__kmp_str_match_false( char const * data ) {
- int result =
- __kmp_str_match( "false", 1, data ) ||
- __kmp_str_match( "off", 2, data ) ||
- __kmp_str_match( "0", 1, data ) ||
- __kmp_str_match( ".false.", 2, data ) ||
- __kmp_str_match( ".f.", 2, data ) ||
- __kmp_str_match( "no", 1, data );
- return result;
+int __kmp_str_match_false(char const *data) {
+ int result =
+ __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
+ __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) ||
+ __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data);
+ return result;
} // __kmp_str_match_false
-
-int
-__kmp_str_match_true( char const * data ) {
- int result =
- __kmp_str_match( "true", 1, data ) ||
- __kmp_str_match( "on", 2, data ) ||
- __kmp_str_match( "1", 1, data ) ||
- __kmp_str_match( ".true.", 2, data ) ||
- __kmp_str_match( ".t.", 2, data ) ||
- __kmp_str_match( "yes", 1, data );
- return result;
+int __kmp_str_match_true(char const *data) {
+ int result =
+ __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) ||
+ __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) ||
+ __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data);
+ return result;
} // __kmp_str_match_true
-void
-__kmp_str_replace(
- char * str,
- char search_for,
- char replace_with
-) {
-
- char * found = NULL;
-
- found = strchr( str, search_for );
- while ( found ) {
- * found = replace_with;
- found = strchr( found + 1, search_for );
- }; // while
+void __kmp_str_replace(char *str, char search_for, char replace_with) {
+ char *found = NULL;
+ found = strchr(str, search_for);
+ while (found) {
+ *found = replace_with;
+ found = strchr(found + 1, search_for);
+ }; // while
} // __kmp_str_replace
-
-void
-__kmp_str_split(
- char * str, // I: String to split.
- char delim, // I: Character to split on.
- char ** head, // O: Pointer to head (may be NULL).
- char ** tail // O: Pointer to tail (may be NULL).
-) {
- char * h = str;
- char * t = NULL;
- if ( str != NULL ) {
- char * ptr = strchr( str, delim );
- if ( ptr != NULL ) {
- * ptr = 0;
- t = ptr + 1;
- }; // if
- }; // if
- if ( head != NULL ) {
- * head = h;
- }; // if
- if ( tail != NULL ) {
- * tail = t;
- }; // if
+void __kmp_str_split(char *str, // I: String to split.
+ char delim, // I: Character to split on.
+ char **head, // O: Pointer to head (may be NULL).
+ char **tail // O: Pointer to tail (may be NULL).
+ ) {
+ char *h = str;
+ char *t = NULL;
+ if (str != NULL) {
+ char *ptr = strchr(str, delim);
+ if (ptr != NULL) {
+ *ptr = 0;
+ t = ptr + 1;
+ }; // if
+ }; // if
+ if (head != NULL) {
+ *head = h;
+ }; // if
+ if (tail != NULL) {
+ *tail = t;
+ }; // if
} // __kmp_str_split
-/*
- strtok_r() is not available on Windows* OS. This function reimplements strtok_r().
-*/
-char *
-__kmp_str_token(
- char * str, // String to split into tokens. Note: String *is* modified!
- char const * delim, // Delimiters.
- char ** buf // Internal buffer.
-) {
- char * token = NULL;
- #if KMP_OS_WINDOWS
- // On Windows* OS there is no strtok_r() function. Let us implement it.
- if ( str != NULL ) {
- * buf = str; // First call, initialize buf.
- }; // if
- * buf += strspn( * buf, delim ); // Skip leading delimiters.
- if ( ** buf != 0 ) { // Rest of the string is not yet empty.
- token = * buf; // Use it as result.
- * buf += strcspn( * buf, delim ); // Skip non-delimiters.
- if ( ** buf != 0 ) { // Rest of the string is not yet empty.
- ** buf = 0; // Terminate token here.
- * buf += 1; // Advance buf to start with the next token next time.
- }; // if
- }; // if
- #else
- // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
- token = strtok_r( str, delim, buf );
- #endif
- return token;
+/* strtok_r() is not available on Windows* OS. This function reimplements
+ strtok_r(). */
+char *__kmp_str_token(
+ char *str, // String to split into tokens. Note: String *is* modified!
+ char const *delim, // Delimiters.
+ char **buf // Internal buffer.
+ ) {
+ char *token = NULL;
+#if KMP_OS_WINDOWS
+ // On Windows* OS there is no strtok_r() function. Let us implement it.
+ if (str != NULL) {
+ *buf = str; // First call, initialize buf.
+ }; // if
+ *buf += strspn(*buf, delim); // Skip leading delimiters.
+ if (**buf != 0) { // Rest of the string is not yet empty.
+ token = *buf; // Use it as result.
+ *buf += strcspn(*buf, delim); // Skip non-delimiters.
+ if (**buf != 0) { // Rest of the string is not yet empty.
+ **buf = 0; // Terminate token here.
+ *buf += 1; // Advance buf to start with the next token next time.
+ }; // if
+ }; // if
+#else
+ // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
+ token = strtok_r(str, delim, buf);
+#endif
+ return token;
}; // __kmp_str_token
+int __kmp_str_to_int(char const *str, char sentinel) {
+ int result, factor;
+ char const *t;
+
+ result = 0;
+
+ for (t = str; *t != '\0'; ++t) {
+ if (*t < '0' || *t > '9')
+ break;
+ result = (result * 10) + (*t - '0');
+ }
+
+ switch (*t) {
+ case '\0': /* the current default for no suffix is bytes */
+ factor = 1;
+ break;
+ case 'b':
+ case 'B': /* bytes */
+ ++t;
+ factor = 1;
+ break;
+ case 'k':
+ case 'K': /* kilo-bytes */
+ ++t;
+ factor = 1024;
+ break;
+ case 'm':
+ case 'M': /* mega-bytes */
+ ++t;
+ factor = (1024 * 1024);
+ break;
+ default:
+ if (*t != sentinel)
+ return (-1);
+ t = "";
+ factor = 1;
+ }
+
+ if (result > (INT_MAX / factor))
+ result = INT_MAX;
+ else
+ result *= factor;
-int
-__kmp_str_to_int(
- char const * str,
- char sentinel
-) {
- int result, factor;
- char const * t;
-
- result = 0;
-
- for (t = str; *t != '\0'; ++t) {
- if (*t < '0' || *t > '9')
- break;
- result = (result * 10) + (*t - '0');
- }
-
- switch (*t) {
- case '\0': /* the current default for no suffix is bytes */
- factor = 1;
- break;
- case 'b': case 'B': /* bytes */
- ++t;
- factor = 1;
- break;
- case 'k': case 'K': /* kilo-bytes */
- ++t;
- factor = 1024;
- break;
- case 'm': case 'M': /* mega-bytes */
- ++t;
- factor = (1024 * 1024);
- break;
- default:
- if(*t != sentinel)
- return (-1);
- t = "";
- factor = 1;
- }
-
- if (result > (INT_MAX / factor))
- result = INT_MAX;
- else
- result *= factor;
-
- return (*t != 0 ? 0 : result);
-
+ return (*t != 0 ? 0 : result);
} // __kmp_str_to_int
-
-/*
- The routine parses input string. It is expected it is a unsigned integer with optional unit.
- Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb"
- or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is
- ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
- value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set
- to zero.
-*/
-void
-__kmp_str_to_size( // R: Error code.
- char const * str, // I: String of characters, unsigned number and unit ("b", "kb", etc).
- size_t * out, // O: Parsed number.
- size_t dfactor, // I: The factor if none of the letters specified.
- char const * * error // O: Null if everything is ok, error message otherwise.
-) {
-
- size_t value = 0;
- size_t factor = 0;
- int overflow = 0;
- int i = 0;
- int digit;
-
-
- KMP_DEBUG_ASSERT( str != NULL );
-
- // Skip spaces.
- while ( str[ i ] == ' ' || str[ i ] == '\t') {
- ++ i;
- }; // while
-
- // Parse number.
- if ( str[ i ] < '0' || str[ i ] > '9' ) {
- * error = KMP_I18N_STR( NotANumber );
- return;
- }; // if
- do {
- digit = str[ i ] - '0';
- overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 );
- value = ( value * 10 ) + digit;
- ++ i;
- } while ( str[ i ] >= '0' && str[ i ] <= '9' );
-
- // Skip spaces.
- while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
- ++ i;
- }; // while
-
- // Parse unit.
- #define _case( ch, exp ) \
- case ch : \
- case ch - ( 'a' - 'A' ) : { \
- size_t shift = (exp) * 10; \
- ++ i; \
- if ( shift < sizeof( size_t ) * 8 ) { \
- factor = (size_t)( 1 ) << shift; \
- } else { \
- overflow = 1; \
- }; \
- } break;
- switch ( str[ i ] ) {
- _case( 'k', 1 ); // Kilo
- _case( 'm', 2 ); // Mega
- _case( 'g', 3 ); // Giga
- _case( 't', 4 ); // Tera
- _case( 'p', 5 ); // Peta
- _case( 'e', 6 ); // Exa
- _case( 'z', 7 ); // Zetta
- _case( 'y', 8 ); // Yotta
- // Oops. No more units...
- }; // switch
- #undef _case
- if ( str[ i ] == 'b' || str[ i ] == 'B' ) { // Skip optional "b".
- if ( factor == 0 ) {
- factor = 1;
- }
- ++ i;
- }; // if
- if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit
- * error = KMP_I18N_STR( BadUnit );
- return;
- }; // if
-
- if ( factor == 0 ) {
- factor = dfactor;
+/* The routine parses input string. It is expected it is a unsigned integer with
+ optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb"
+ or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is
+ case-insensitive. The routine returns 0 if everything is ok, or error code:
+ -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
+ value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown
+ unit *size is set to zero. */
+void __kmp_str_to_size( // R: Error code.
+ char const *str, // I: String of characters, unsigned number and unit ("b",
+ // "kb", etc).
+ size_t *out, // O: Parsed number.
+ size_t dfactor, // I: The factor if none of the letters specified.
+ char const **error // O: Null if everything is ok, error message otherwise.
+ ) {
+
+ size_t value = 0;
+ size_t factor = 0;
+ int overflow = 0;
+ int i = 0;
+ int digit;
+
+ KMP_DEBUG_ASSERT(str != NULL);
+
+ // Skip spaces.
+ while (str[i] == ' ' || str[i] == '\t') {
+ ++i;
+ }; // while
+
+ // Parse number.
+ if (str[i] < '0' || str[i] > '9') {
+ *error = KMP_I18N_STR(NotANumber);
+ return;
+ }; // if
+ do {
+ digit = str[i] - '0';
+ overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
+ value = (value * 10) + digit;
+ ++i;
+ } while (str[i] >= '0' && str[i] <= '9');
+
+ // Skip spaces.
+ while (str[i] == ' ' || str[i] == '\t') {
+ ++i;
+ }; // while
+
+// Parse unit.
+#define _case(ch, exp) \
+ case ch: \
+ case ch - ('a' - 'A'): { \
+ size_t shift = (exp)*10; \
+ ++i; \
+ if (shift < sizeof(size_t) * 8) { \
+ factor = (size_t)(1) << shift; \
+ } else { \
+ overflow = 1; \
+ }; \
+ } break;
+ switch (str[i]) {
+ _case('k', 1); // Kilo
+ _case('m', 2); // Mega
+ _case('g', 3); // Giga
+ _case('t', 4); // Tera
+ _case('p', 5); // Peta
+ _case('e', 6); // Exa
+ _case('z', 7); // Zetta
+ _case('y', 8); // Yotta
+ // Oops. No more units...
+ }; // switch
+#undef _case
+ if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b".
+ if (factor == 0) {
+ factor = 1;
}
+ ++i;
+ }; // if
+ if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit
+ *error = KMP_I18N_STR(BadUnit);
+ return;
+ }; // if
+
+ if (factor == 0) {
+ factor = dfactor;
+ }
+
+ // Apply factor.
+ overflow = overflow || (value > (KMP_SIZE_T_MAX / factor));
+ value *= factor;
+
+ // Skip spaces.
+ while (str[i] == ' ' || str[i] == '\t') {
+ ++i;
+ }; // while
+
+ if (str[i] != 0) {
+ *error = KMP_I18N_STR(IllegalCharacters);
+ return;
+ }; // if
+
+ if (overflow) {
+ *error = KMP_I18N_STR(ValueTooLarge);
+ *out = KMP_SIZE_T_MAX;
+ return;
+ }; // if
- // Apply factor.
- overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) );
- value *= factor;
-
- // Skip spaces.
- while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
- ++ i;
- }; // while
-
- if ( str[ i ] != 0 ) {
- * error = KMP_I18N_STR( IllegalCharacters );
- return;
- }; // if
-
- if ( overflow ) {
- * error = KMP_I18N_STR( ValueTooLarge );
- * out = KMP_SIZE_T_MAX;
- return;
- }; // if
-
- * error = NULL;
- * out = value;
-
+ *error = NULL;
+ *out = value;
} // __kmp_str_to_size
+void __kmp_str_to_uint( // R: Error code.
+ char const *str, // I: String of characters, unsigned number.
+ kmp_uint64 *out, // O: Parsed number.
+ char const **error // O: Null if everything is ok, error message otherwise.
+ ) {
+ size_t value = 0;
+ int overflow = 0;
+ int i = 0;
+ int digit;
+
+ KMP_DEBUG_ASSERT(str != NULL);
+
+ // Skip spaces.
+ while (str[i] == ' ' || str[i] == '\t') {
+ ++i;
+ }; // while
+
+ // Parse number.
+ if (str[i] < '0' || str[i] > '9') {
+ *error = KMP_I18N_STR(NotANumber);
+ return;
+ }; // if
+ do {
+ digit = str[i] - '0';
+ overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
+ value = (value * 10) + digit;
+ ++i;
+ } while (str[i] >= '0' && str[i] <= '9');
+
+ // Skip spaces.
+ while (str[i] == ' ' || str[i] == '\t') {
+ ++i;
+ }; // while
+
+ if (str[i] != 0) {
+ *error = KMP_I18N_STR(IllegalCharacters);
+ return;
+ }; // if
+
+ if (overflow) {
+ *error = KMP_I18N_STR(ValueTooLarge);
+ *out = (kmp_uint64)-1;
+ return;
+ }; // if
-void
-__kmp_str_to_uint( // R: Error code.
- char const * str, // I: String of characters, unsigned number.
- kmp_uint64 * out, // O: Parsed number.
- char const * * error // O: Null if everything is ok, error message otherwise.
-) {
-
- size_t value = 0;
- int overflow = 0;
- int i = 0;
- int digit;
-
-
- KMP_DEBUG_ASSERT( str != NULL );
-
- // Skip spaces.
- while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
- ++ i;
- }; // while
-
- // Parse number.
- if ( str[ i ] < '0' || str[ i ] > '9' ) {
- * error = KMP_I18N_STR( NotANumber );
- return;
- }; // if
- do {
- digit = str[ i ] - '0';
- overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 );
- value = ( value * 10 ) + digit;
- ++ i;
- } while ( str[ i ] >= '0' && str[ i ] <= '9' );
-
- // Skip spaces.
- while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
- ++ i;
- }; // while
-
- if ( str[ i ] != 0 ) {
- * error = KMP_I18N_STR( IllegalCharacters );
- return;
- }; // if
-
- if ( overflow ) {
- * error = KMP_I18N_STR( ValueTooLarge );
- * out = (kmp_uint64) -1;
- return;
- }; // if
-
- * error = NULL;
- * out = value;
-
+ *error = NULL;
+ *out = value;
} // __kmp_str_to_unit
-
-
// end of file //
Modified: openmp/trunk/runtime/src/kmp_str.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_str.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_str.h (original)
+++ openmp/trunk/runtime/src/kmp_str.h Fri May 12 13:01:32 2017
@@ -16,104 +16,112 @@
#ifndef KMP_STR_H
#define KMP_STR_H
-#include <string.h>
#include <stdarg.h>
+#include <string.h>
#include "kmp_os.h"
#ifdef __cplusplus
- extern "C" {
+extern "C" {
#endif // __cplusplus
#if KMP_OS_WINDOWS
-# define strdup _strdup
+#define strdup _strdup
#endif
/* some macros to replace ctype.h functions */
-#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c))
+#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c))
struct kmp_str_buf {
- char * str; // Pointer to buffer content, read only.
- unsigned int size; // Do not change this field!
- int used; // Number of characters printed to buffer, read only.
- char bulk[ 512 ]; // Do not use this field!
+ char *str; // Pointer to buffer content, read only.
+ unsigned int size; // Do not change this field!
+ int used; // Number of characters printed to buffer, read only.
+ char bulk[512]; // Do not use this field!
}; // struct kmp_str_buf
-typedef struct kmp_str_buf kmp_str_buf_t;
-
-#define __kmp_str_buf_init( b ) { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; }
-
-void __kmp_str_buf_clear( kmp_str_buf_t * buffer );
-void __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size );
-void __kmp_str_buf_detach( kmp_str_buf_t * buffer );
-void __kmp_str_buf_free( kmp_str_buf_t * buffer );
-void __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len );
-void __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args );
-void __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... );
-void __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size );
-
-/*
- File name parser. Usage:
-
- kmp_str_fname_t fname = __kmp_str_fname_init( path );
- // Use fname.path (copy of original path ), fname.dir, fname.base.
- // Note fname.dir concatenated with fname.base gives exact copy of path.
- __kmp_str_fname_free( & fname );
+typedef struct kmp_str_buf kmp_str_buf_t;
+#define __kmp_str_buf_init(b) \
+ { \
+ (b)->str = (b)->bulk; \
+ (b)->size = sizeof((b)->bulk); \
+ (b)->used = 0; \
+ (b)->bulk[0] = 0; \
+ }
+
+void __kmp_str_buf_clear(kmp_str_buf_t *buffer);
+void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size);
+void __kmp_str_buf_detach(kmp_str_buf_t *buffer);
+void __kmp_str_buf_free(kmp_str_buf_t *buffer);
+void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len);
+void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+ va_list args);
+void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...);
+void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size);
+
+/* File name parser.
+ Usage:
+
+ kmp_str_fname_t fname = __kmp_str_fname_init( path );
+ // Use fname.path (copy of original path ), fname.dir, fname.base.
+ // Note fname.dir concatenated with fname.base gives exact copy of path.
+ __kmp_str_fname_free( & fname );
*/
struct kmp_str_fname {
- char * path;
- char * dir;
- char * base;
+ char *path;
+ char *dir;
+ char *base;
}; // struct kmp_str_fname
typedef struct kmp_str_fname kmp_str_fname_t;
-void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path );
-void __kmp_str_fname_free( kmp_str_fname_t * fname );
-// Compares file name with specified patern. If pattern is NULL, any fname matched.
-int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern );
-
-/*
- The compiler provides source locations in string form ";file;func;line;col;;". It not not
- convenient for manupulation. These structure keeps source location in more convenient form.
- Usage:
-
- kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 );
- // use loc.file, loc.func, loc.line, loc.col.
- // loc.fname is available if the second argument of __kmp_str_loc_init is true.
- __kmp_str_loc_free( & loc );
+void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path);
+void __kmp_str_fname_free(kmp_str_fname_t *fname);
+// Compares file name with specified patern. If pattern is NULL, any fname
+// matched.
+int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern);
+
+/* The compiler provides source locations in string form
+ ";file;func;line;col;;". It is not convenient for manupulation. This
+ structure keeps source location in more convenient form.
+ Usage:
+
+ kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 );
+ // use loc.file, loc.func, loc.line, loc.col.
+ // loc.fname is available if second argument of __kmp_str_loc_init is true.
+ __kmp_str_loc_free( & loc );
- If psource is NULL or does not follow format above, file and/or func may be NULL pointers.
+ If psource is NULL or does not follow format above, file and/or func may be
+ NULL pointers.
*/
struct kmp_str_loc {
- char * _bulk; // Do not use thid field.
- kmp_str_fname_t fname; // Will be initialized if init_fname is true.
- char * file;
- char * func;
- int line;
- int col;
+ char *_bulk; // Do not use thid field.
+ kmp_str_fname_t fname; // Will be initialized if init_fname is true.
+ char *file;
+ char *func;
+ int line;
+ int col;
}; // struct kmp_str_loc
typedef struct kmp_str_loc kmp_str_loc_t;
-kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname );
-void __kmp_str_loc_free( kmp_str_loc_t * loc );
-
-int __kmp_str_eqf( char const * lhs, char const * rhs );
-char * __kmp_str_format( char const * format, ... );
-void __kmp_str_free( char const * * str );
-int __kmp_str_match( char const * target, int len, char const * data );
-int __kmp_str_match_false( char const * data );
-int __kmp_str_match_true( char const * data );
-void __kmp_str_replace( char * str, char search_for, char replace_with );
-void __kmp_str_split( char * str, char delim, char ** head, char ** tail );
-char * __kmp_str_token( char * str, char const * delim, char ** buf );
-int __kmp_str_to_int( char const * str, char sentinel );
+kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname);
+void __kmp_str_loc_free(kmp_str_loc_t *loc);
-void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error );
-void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error );
+int __kmp_str_eqf(char const *lhs, char const *rhs);
+char *__kmp_str_format(char const *format, ...);
+void __kmp_str_free(char const **str);
+int __kmp_str_match(char const *target, int len, char const *data);
+int __kmp_str_match_false(char const *data);
+int __kmp_str_match_true(char const *data);
+void __kmp_str_replace(char *str, char search_for, char replace_with);
+void __kmp_str_split(char *str, char delim, char **head, char **tail);
+char *__kmp_str_token(char *str, char const *delim, char **buf);
+int __kmp_str_to_int(char const *str, char sentinel);
+
+void __kmp_str_to_size(char const *str, size_t *out, size_t dfactor,
+ char const **error);
+void __kmp_str_to_uint(char const *str, kmp_uint64 *out, char const **error);
#ifdef __cplusplus
- } // extern "C"
+} // extern "C"
#endif // __cplusplus
#endif // KMP_STR_H
// end of file //
-
Modified: openmp/trunk/runtime/src/kmp_stub.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stub.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stub.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stub.cpp Fri May 12 13:01:32 2017
@@ -13,258 +13,304 @@
//===----------------------------------------------------------------------===//
-#include <stdlib.h>
-#include <limits.h>
#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
-#include "omp.h" // Function renamings.
-#include "kmp.h" // KMP_DEFAULT_STKSIZE
+#include "kmp.h" // KMP_DEFAULT_STKSIZE
#include "kmp_stub.h"
+#include "omp.h" // Function renamings.
#if KMP_OS_WINDOWS
- #include <windows.h>
+#include <windows.h>
#else
- #include <sys/time.h>
+#include <sys/time.h>
#endif
// Moved from omp.h
-#define omp_set_max_active_levels ompc_set_max_active_levels
-#define omp_set_schedule ompc_set_schedule
-#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num
-#define omp_get_team_size ompc_get_team_size
-
-#define omp_set_num_threads ompc_set_num_threads
-#define omp_set_dynamic ompc_set_dynamic
-#define omp_set_nested ompc_set_nested
-#define kmp_set_stacksize kmpc_set_stacksize
-#define kmp_set_stacksize_s kmpc_set_stacksize_s
-#define kmp_set_blocktime kmpc_set_blocktime
-#define kmp_set_library kmpc_set_library
-#define kmp_set_defaults kmpc_set_defaults
-#define kmp_set_disp_num_buffers kmpc_set_disp_num_buffers
-#define kmp_malloc kmpc_malloc
-#define kmp_aligned_malloc kmpc_aligned_malloc
-#define kmp_calloc kmpc_calloc
-#define kmp_realloc kmpc_realloc
-#define kmp_free kmpc_free
+#define omp_set_max_active_levels ompc_set_max_active_levels
+#define omp_set_schedule ompc_set_schedule
+#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num
+#define omp_get_team_size ompc_get_team_size
+
+#define omp_set_num_threads ompc_set_num_threads
+#define omp_set_dynamic ompc_set_dynamic
+#define omp_set_nested ompc_set_nested
+#define kmp_set_stacksize kmpc_set_stacksize
+#define kmp_set_stacksize_s kmpc_set_stacksize_s
+#define kmp_set_blocktime kmpc_set_blocktime
+#define kmp_set_library kmpc_set_library
+#define kmp_set_defaults kmpc_set_defaults
+#define kmp_set_disp_num_buffers kmpc_set_disp_num_buffers
+#define kmp_malloc kmpc_malloc
+#define kmp_aligned_malloc kmpc_aligned_malloc
+#define kmp_calloc kmpc_calloc
+#define kmp_realloc kmpc_realloc
+#define kmp_free kmpc_free
static double frequency = 0.0;
// Helper functions.
static size_t __kmps_init() {
- static int initialized = 0;
- static size_t dummy = 0;
- if ( ! initialized ) {
-
- // TODO: Analyze KMP_VERSION environment variable, print
- // __kmp_version_copyright and __kmp_version_build_time.
- // WARNING: Do not use "fprintf( stderr, ... )" because it will cause
- // unresolved "__iob" symbol (see C70080). We need to extract
- // __kmp_printf() stuff from kmp_runtime.cpp and use it.
-
- // Trick with dummy variable forces linker to keep __kmp_version_copyright
- // and __kmp_version_build_time strings in executable file (in case of
- // static linkage). When KMP_VERSION analysis is implemented, dummy
- // variable should be deleted, function should return void.
- dummy = __kmp_version_copyright - __kmp_version_build_time;
-
- #if KMP_OS_WINDOWS
- LARGE_INTEGER freq;
- BOOL status = QueryPerformanceFrequency( & freq );
- if ( status ) {
- frequency = double( freq.QuadPart );
- }; // if
- #endif
+ static int initialized = 0;
+ static size_t dummy = 0;
+ if (!initialized) {
+ // TODO: Analyze KMP_VERSION environment variable, print
+ // __kmp_version_copyright and __kmp_version_build_time.
+ // WARNING: Do not use "fprintf(stderr, ...)" because it will cause
+ // unresolved "__iob" symbol (see C70080). We need to extract __kmp_printf()
+ // stuff from kmp_runtime.cpp and use it.
+
+ // Trick with dummy variable forces linker to keep __kmp_version_copyright
+ // and __kmp_version_build_time strings in executable file (in case of
+ // static linkage). When KMP_VERSION analysis is implemented, dummy
+ // variable should be deleted, function should return void.
+ dummy = __kmp_version_copyright - __kmp_version_build_time;
- initialized = 1;
+#if KMP_OS_WINDOWS
+ LARGE_INTEGER freq;
+ BOOL status = QueryPerformanceFrequency(&freq);
+ if (status) {
+ frequency = double(freq.QuadPart);
}; // if
- return dummy;
+#endif
+
+ initialized = 1;
+ }; // if
+ return dummy;
}; // __kmps_init
#define i __kmps_init();
/* set API functions */
-void omp_set_num_threads( omp_int_t num_threads ) { i; }
-void omp_set_dynamic( omp_int_t dynamic ) { i; __kmps_set_dynamic( dynamic ); }
-void omp_set_nested( omp_int_t nested ) { i; __kmps_set_nested( nested ); }
-void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; }
-void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); }
-int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); }
-int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); }
-int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
-int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
-int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
+void omp_set_num_threads(omp_int_t num_threads) { i; }
+void omp_set_dynamic(omp_int_t dynamic) {
+ i;
+ __kmps_set_dynamic(dynamic);
+}
+void omp_set_nested(omp_int_t nested) {
+ i;
+ __kmps_set_nested(nested);
+}
+void omp_set_max_active_levels(omp_int_t max_active_levels) { i; }
+void omp_set_schedule(omp_sched_t kind, omp_int_t modifier) {
+ i;
+ __kmps_set_schedule((kmp_sched_t)kind, modifier);
+}
+int omp_get_ancestor_thread_num(omp_int_t level) {
+ i;
+ return (level) ? (-1) : (0);
+}
+int omp_get_team_size(omp_int_t level) {
+ i;
+ return (level) ? (-1) : (1);
+}
+int kmpc_set_affinity_mask_proc(int proc, void **mask) {
+ i;
+ return -1;
+}
+int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
+ i;
+ return -1;
+}
+int kmpc_get_affinity_mask_proc(int proc, void **mask) {
+ i;
+ return -1;
+}
/* kmp API functions */
-void kmp_set_stacksize( omp_int_t arg ) { i; __kmps_set_stacksize( arg ); }
-void kmp_set_stacksize_s( size_t arg ) { i; __kmps_set_stacksize( arg ); }
-void kmp_set_blocktime( omp_int_t arg ) { i; __kmps_set_blocktime( arg ); }
-void kmp_set_library( omp_int_t arg ) { i; __kmps_set_library( arg ); }
-void kmp_set_defaults( char const * str ) { i; }
-void kmp_set_disp_num_buffers( omp_int_t arg ) { i; }
+void kmp_set_stacksize(omp_int_t arg) {
+ i;
+ __kmps_set_stacksize(arg);
+}
+void kmp_set_stacksize_s(size_t arg) {
+ i;
+ __kmps_set_stacksize(arg);
+}
+void kmp_set_blocktime(omp_int_t arg) {
+ i;
+ __kmps_set_blocktime(arg);
+}
+void kmp_set_library(omp_int_t arg) {
+ i;
+ __kmps_set_library(arg);
+}
+void kmp_set_defaults(char const *str) { i; }
+void kmp_set_disp_num_buffers(omp_int_t arg) { i; }
/* KMP memory management functions. */
-void * kmp_malloc( size_t size ) { i; return malloc( size ); }
-void * kmp_aligned_malloc( size_t sz, size_t a ) {
- i;
+void *kmp_malloc(size_t size) {
+ i;
+ return malloc(size);
+}
+void *kmp_aligned_malloc(size_t sz, size_t a) {
+ i;
#if KMP_OS_WINDOWS
- errno = ENOSYS; // not supported
- return NULL; // no standard aligned allocator on Windows (pre - C11)
+ errno = ENOSYS; // not supported
+ return NULL; // no standard aligned allocator on Windows (pre - C11)
#else
- void *res;
- int err;
- if( err = posix_memalign( &res, a, sz ) ) {
- errno = err; // can be EINVAL or ENOMEM
- return NULL;
- }
- return res;
+ void *res;
+ int err;
+ if (err = posix_memalign(&res, a, sz)) {
+ errno = err; // can be EINVAL or ENOMEM
+ return NULL;
+ }
+ return res;
#endif
}
-void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); }
-void * kmp_realloc( void *ptr, size_t size ) { i; return realloc( ptr, size ); }
-void kmp_free( void * ptr ) { i; free( ptr ); }
+void *kmp_calloc(size_t nelem, size_t elsize) {
+ i;
+ return calloc(nelem, elsize);
+}
+void *kmp_realloc(void *ptr, size_t size) {
+ i;
+ return realloc(ptr, size);
+}
+void kmp_free(void *ptr) {
+ i;
+ free(ptr);
+}
static int __kmps_blocktime = INT_MAX;
-void __kmps_set_blocktime( int arg ) {
- i;
- __kmps_blocktime = arg;
+void __kmps_set_blocktime(int arg) {
+ i;
+ __kmps_blocktime = arg;
} // __kmps_set_blocktime
-int __kmps_get_blocktime( void ) {
- i;
- return __kmps_blocktime;
+int __kmps_get_blocktime(void) {
+ i;
+ return __kmps_blocktime;
} // __kmps_get_blocktime
static int __kmps_dynamic = 0;
-void __kmps_set_dynamic( int arg ) {
- i;
- __kmps_dynamic = arg;
+void __kmps_set_dynamic(int arg) {
+ i;
+ __kmps_dynamic = arg;
} // __kmps_set_dynamic
-int __kmps_get_dynamic( void ) {
- i;
- return __kmps_dynamic;
+int __kmps_get_dynamic(void) {
+ i;
+ return __kmps_dynamic;
} // __kmps_get_dynamic
static int __kmps_library = 1000;
-void __kmps_set_library( int arg ) {
- i;
- __kmps_library = arg;
+void __kmps_set_library(int arg) {
+ i;
+ __kmps_library = arg;
} // __kmps_set_library
-int __kmps_get_library( void ) {
- i;
- return __kmps_library;
+int __kmps_get_library(void) {
+ i;
+ return __kmps_library;
} // __kmps_get_library
static int __kmps_nested = 0;
-void __kmps_set_nested( int arg ) {
- i;
- __kmps_nested = arg;
+void __kmps_set_nested(int arg) {
+ i;
+ __kmps_nested = arg;
} // __kmps_set_nested
-int __kmps_get_nested( void ) {
- i;
- return __kmps_nested;
+int __kmps_get_nested(void) {
+ i;
+ return __kmps_nested;
} // __kmps_get_nested
static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE;
-void __kmps_set_stacksize( int arg ) {
- i;
- __kmps_stacksize = arg;
+void __kmps_set_stacksize(int arg) {
+ i;
+ __kmps_stacksize = arg;
} // __kmps_set_stacksize
-int __kmps_get_stacksize( void ) {
- i;
- return __kmps_stacksize;
+int __kmps_get_stacksize(void) {
+ i;
+ return __kmps_stacksize;
} // __kmps_get_stacksize
-static kmp_sched_t __kmps_sched_kind = kmp_sched_default;
-static int __kmps_sched_modifier = 0;
+static kmp_sched_t __kmps_sched_kind = kmp_sched_default;
+static int __kmps_sched_modifier = 0;
- void __kmps_set_schedule( kmp_sched_t kind, int modifier ) {
- i;
- __kmps_sched_kind = kind;
- __kmps_sched_modifier = modifier;
- } // __kmps_set_schedule
-
- void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) {
- i;
- *kind = __kmps_sched_kind;
- *modifier = __kmps_sched_modifier;
- } // __kmps_get_schedule
+void __kmps_set_schedule(kmp_sched_t kind, int modifier) {
+ i;
+ __kmps_sched_kind = kind;
+ __kmps_sched_modifier = modifier;
+} // __kmps_set_schedule
+
+void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) {
+ i;
+ *kind = __kmps_sched_kind;
+ *modifier = __kmps_sched_modifier;
+} // __kmps_get_schedule
#if OMP_40_ENABLED
static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false;
-void __kmps_set_proc_bind( kmp_proc_bind_t arg ) {
- i;
- __kmps_proc_bind = arg;
+void __kmps_set_proc_bind(kmp_proc_bind_t arg) {
+ i;
+ __kmps_proc_bind = arg;
} // __kmps_set_proc_bind
-kmp_proc_bind_t __kmps_get_proc_bind( void ) {
- i;
- return __kmps_proc_bind;
+kmp_proc_bind_t __kmps_get_proc_bind(void) {
+ i;
+ return __kmps_proc_bind;
} // __kmps_get_proc_bind
#endif /* OMP_40_ENABLED */
-double __kmps_get_wtime( void ) {
- // Elapsed wall clock time (in second) from "sometime in the past".
- double wtime = 0.0;
- i;
- #if KMP_OS_WINDOWS
- if ( frequency > 0.0 ) {
- LARGE_INTEGER now;
- BOOL status = QueryPerformanceCounter( & now );
- if ( status ) {
- wtime = double( now.QuadPart ) / frequency;
- }; // if
- }; // if
- #else
- // gettimeofday() returns seconds and microseconds since the Epoch.
- struct timeval tval;
- int rc;
- rc = gettimeofday( & tval, NULL );
- if ( rc == 0 ) {
- wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec );
- } else {
- // TODO: Assert or abort here.
- }; // if
- #endif
- return wtime;
+double __kmps_get_wtime(void) {
+ // Elapsed wall clock time (in second) from "sometime in the past".
+ double wtime = 0.0;
+ i;
+#if KMP_OS_WINDOWS
+ if (frequency > 0.0) {
+ LARGE_INTEGER now;
+ BOOL status = QueryPerformanceCounter(&now);
+ if (status) {
+ wtime = double(now.QuadPart) / frequency;
+ }; // if
+ }; // if
+#else
+ // gettimeofday() returns seconds and microseconds since the Epoch.
+ struct timeval tval;
+ int rc;
+ rc = gettimeofday(&tval, NULL);
+ if (rc == 0) {
+ wtime = (double)(tval.tv_sec) + 1.0E-06 * (double)(tval.tv_usec);
+ } else {
+ // TODO: Assert or abort here.
+ }; // if
+#endif
+ return wtime;
}; // __kmps_get_wtime
-double __kmps_get_wtick( void ) {
- // Number of seconds between successive clock ticks.
- double wtick = 0.0;
- i;
- #if KMP_OS_WINDOWS
- {
- DWORD increment;
- DWORD adjustment;
- BOOL disabled;
- BOOL rc;
- rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled );
- if ( rc ) {
- wtick = 1.0E-07 * (double)( disabled ? increment : adjustment );
- } else {
- // TODO: Assert or abort here.
- wtick = 1.0E-03;
- }; // if
- }
- #else
- // TODO: gettimeofday() returns in microseconds, but what the precision?
- wtick = 1.0E-06;
- #endif
- return wtick;
+double __kmps_get_wtick(void) {
+ // Number of seconds between successive clock ticks.
+ double wtick = 0.0;
+ i;
+#if KMP_OS_WINDOWS
+ {
+ DWORD increment;
+ DWORD adjustment;
+ BOOL disabled;
+ BOOL rc;
+ rc = GetSystemTimeAdjustment(&adjustment, &increment, &disabled);
+ if (rc) {
+ wtick = 1.0E-07 * (double)(disabled ? increment : adjustment);
+ } else {
+ // TODO: Assert or abort here.
+ wtick = 1.0E-03;
+ }; // if
+ }
+#else
+ // TODO: gettimeofday() returns in microseconds, but what the precision?
+ wtick = 1.0E-06;
+#endif
+ return wtick;
}; // __kmps_get_wtick
// end of file //
-
Modified: openmp/trunk/runtime/src/kmp_stub.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stub.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stub.h (original)
+++ openmp/trunk/runtime/src/kmp_stub.h Fri May 12 13:01:32 2017
@@ -17,43 +17,43 @@
#define KMP_STUB_H
#ifdef __cplusplus
- extern "C" {
+extern "C" {
#endif // __cplusplus
-void __kmps_set_blocktime( int arg );
-int __kmps_get_blocktime( void );
-void __kmps_set_dynamic( int arg );
-int __kmps_get_dynamic( void );
-void __kmps_set_library( int arg );
-int __kmps_get_library( void );
-void __kmps_set_nested( int arg );
-int __kmps_get_nested( void );
-void __kmps_set_stacksize( int arg );
-int __kmps_get_stacksize();
+void __kmps_set_blocktime(int arg);
+int __kmps_get_blocktime(void);
+void __kmps_set_dynamic(int arg);
+int __kmps_get_dynamic(void);
+void __kmps_set_library(int arg);
+int __kmps_get_library(void);
+void __kmps_set_nested(int arg);
+int __kmps_get_nested(void);
+void __kmps_set_stacksize(int arg);
+int __kmps_get_stacksize();
#ifndef KMP_SCHED_TYPE_DEFINED
#define KMP_SCHED_TYPE_DEFINED
typedef enum kmp_sched {
- kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
- kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
- kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
- kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
- kmp_sched_default = kmp_sched_static // default scheduling
+ kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)
+ kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)
+ kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)
+ kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)
+ kmp_sched_default = kmp_sched_static // default scheduling
} kmp_sched_t;
#endif
-void __kmps_set_schedule( kmp_sched_t kind, int modifier );
-void __kmps_get_schedule( kmp_sched_t *kind, int *modifier );
+void __kmps_set_schedule(kmp_sched_t kind, int modifier);
+void __kmps_get_schedule(kmp_sched_t *kind, int *modifier);
#if OMP_40_ENABLED
-void __kmps_set_proc_bind( kmp_proc_bind_t arg );
-kmp_proc_bind_t __kmps_get_proc_bind( void );
+void __kmps_set_proc_bind(kmp_proc_bind_t arg);
+kmp_proc_bind_t __kmps_get_proc_bind(void);
#endif /* OMP_40_ENABLED */
double __kmps_get_wtime();
double __kmps_get_wtick();
#ifdef __cplusplus
- } // extern "C"
+} // extern "C"
#endif // __cplusplus
#endif // KMP_STUB_H
Modified: openmp/trunk/runtime/src/kmp_taskdeps.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_taskdeps.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_taskdeps.cpp (original)
+++ openmp/trunk/runtime/src/kmp_taskdeps.cpp Fri May 12 13:01:32 2017
@@ -21,511 +21,543 @@
#if OMP_40_ENABLED
-//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries?
-//TODO: don't use atomic ref counters for stack-allocated nodes.
-//TODO: find an alternate to atomic refs for heap-allocated nodes?
-//TODO: Finish graph output support
-//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks
-//TODO: Any ITT support needed?
+// TODO: Improve memory allocation? keep a list of pre-allocated structures?
+// allocate in blocks? re-use list finished list entries?
+// TODO: don't use atomic ref counters for stack-allocated nodes.
+// TODO: find an alternate to atomic refs for heap-allocated nodes?
+// TODO: Finish graph output support
+// TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
+// runtime locks
+// TODO: Any ITT support needed?
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
static kmp_int32 kmp_node_id_seed = 0;
#endif
-static void
-__kmp_init_node ( kmp_depnode_t *node )
-{
- node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed
- node->dn.successors = NULL;
- __kmp_init_lock(&node->dn.lock);
- node->dn.nrefs = 1; // init creates the first reference to the node
+static void __kmp_init_node(kmp_depnode_t *node) {
+ node->dn.task = NULL; // set to null initially, it will point to the right
+ // task once dependences have been processed
+ node->dn.successors = NULL;
+ __kmp_init_lock(&node->dn.lock);
+ node->dn.nrefs = 1; // init creates the first reference to the node
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
- node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
+ node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
#endif
}
-static inline kmp_depnode_t *
-__kmp_node_ref ( kmp_depnode_t *node )
-{
- KMP_TEST_THEN_INC32(&node->dn.nrefs);
- return node;
+static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
+ KMP_TEST_THEN_INC32(&node->dn.nrefs);
+ return node;
}
-static inline void
-__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node )
-{
- if (!node) return;
+static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
+ if (!node)
+ return;
- kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
- if ( n == 0 ) {
- KMP_ASSERT(node->dn.nrefs == 0);
+ kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
+ if (n == 0) {
+ KMP_ASSERT(node->dn.nrefs == 0);
#if USE_FAST_MEMORY
- __kmp_fast_free(thread,node);
+ __kmp_fast_free(thread, node);
#else
- __kmp_thread_free(thread,node);
+ __kmp_thread_free(thread, node);
#endif
- }
+ }
}
-#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid))
-#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid))
+#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid))
+#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid))
-static void
-__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list );
+static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list);
-enum {
- KMP_DEPHASH_OTHER_SIZE = 97,
- KMP_DEPHASH_MASTER_SIZE = 997
-};
+enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
-static inline kmp_int32
-__kmp_dephash_hash ( kmp_intptr_t addr, size_t hsize )
-{
- //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets );
- return ((addr >> 6) ^ (addr >> 2)) % hsize;
-}
-
-static kmp_dephash_t *
-__kmp_dephash_create ( kmp_info_t *thread, kmp_taskdata_t *current_task )
-{
- kmp_dephash_t *h;
-
- size_t h_size;
-
- if ( current_task->td_flags.tasktype == TASK_IMPLICIT )
- h_size = KMP_DEPHASH_MASTER_SIZE;
- else
- h_size = KMP_DEPHASH_OTHER_SIZE;
+static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
+ // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
+ // m_num_sets );
+ return ((addr >> 6) ^ (addr >> 2)) % hsize;
+}
+
+static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
+ kmp_taskdata_t *current_task) {
+ kmp_dephash_t *h;
- kmp_int32 size =
- h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
+ size_t h_size;
+
+ if (current_task->td_flags.tasktype == TASK_IMPLICIT)
+ h_size = KMP_DEPHASH_MASTER_SIZE;
+ else
+ h_size = KMP_DEPHASH_OTHER_SIZE;
+
+ kmp_int32 size =
+ h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
#if USE_FAST_MEMORY
- h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size );
+ h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
#else
- h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size );
+ h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
#endif
- h->size = h_size;
+ h->size = h_size;
#ifdef KMP_DEBUG
- h->nelements = 0;
- h->nconflicts = 0;
+ h->nelements = 0;
+ h->nconflicts = 0;
#endif
- h->buckets = (kmp_dephash_entry **)(h+1);
+ h->buckets = (kmp_dephash_entry **)(h + 1);
- for ( size_t i = 0; i < h_size; i++ )
- h->buckets[i] = 0;
+ for (size_t i = 0; i < h_size; i++)
+ h->buckets[i] = 0;
- return h;
-}
-
-void
-__kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h)
-{
- for (size_t i = 0; i < h->size; i++) {
- if (h->buckets[i]) {
- kmp_dephash_entry_t *next;
- for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
- next = entry->next_in_bucket;
- __kmp_depnode_list_free(thread,entry->last_ins);
- __kmp_node_deref(thread,entry->last_out);
+ return h;
+}
+
+void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) {
+ for (size_t i = 0; i < h->size; i++) {
+ if (h->buckets[i]) {
+ kmp_dephash_entry_t *next;
+ for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
+ next = entry->next_in_bucket;
+ __kmp_depnode_list_free(thread, entry->last_ins);
+ __kmp_node_deref(thread, entry->last_out);
#if USE_FAST_MEMORY
- __kmp_fast_free(thread,entry);
+ __kmp_fast_free(thread, entry);
#else
- __kmp_thread_free(thread,entry);
+ __kmp_thread_free(thread, entry);
#endif
- }
- h->buckets[i] = 0;
- }
+ }
+ h->buckets[i] = 0;
}
+ }
}
-void
-__kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h)
-{
- __kmp_dephash_free_entries(thread, h);
+void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
+ __kmp_dephash_free_entries(thread, h);
#if USE_FAST_MEMORY
- __kmp_fast_free(thread,h);
+ __kmp_fast_free(thread, h);
#else
- __kmp_thread_free(thread,h);
+ __kmp_thread_free(thread, h);
#endif
}
static kmp_dephash_entry *
-__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr )
-{
- kmp_int32 bucket = __kmp_dephash_hash(addr,h->size);
-
- kmp_dephash_entry_t *entry;
- for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket )
- if ( entry->addr == addr ) break;
+__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
+ kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
+
+ kmp_dephash_entry_t *entry;
+ for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
+ if (entry->addr == addr)
+ break;
- if ( entry == NULL ) {
- // create entry. This is only done by one thread so no locking required
+ if (entry == NULL) {
+// create entry. This is only done by one thread so no locking required
#if USE_FAST_MEMORY
- entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) );
+ entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_dephash_entry_t));
#else
- entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) );
+ entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
+ thread, sizeof(kmp_dephash_entry_t));
#endif
- entry->addr = addr;
- entry->last_out = NULL;
- entry->last_ins = NULL;
- entry->next_in_bucket = h->buckets[bucket];
- h->buckets[bucket] = entry;
+ entry->addr = addr;
+ entry->last_out = NULL;
+ entry->last_ins = NULL;
+ entry->next_in_bucket = h->buckets[bucket];
+ h->buckets[bucket] = entry;
#ifdef KMP_DEBUG
- h->nelements++;
- if ( entry->next_in_bucket ) h->nconflicts++;
-#endif
- }
- return entry;
+ h->nelements++;
+ if (entry->next_in_bucket)
+ h->nconflicts++;
+#endif
+ }
+ return entry;
}
-static kmp_depnode_list_t *
-__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node )
-{
- kmp_depnode_list_t *new_head;
+static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
+ kmp_depnode_list_t *list,
+ kmp_depnode_t *node) {
+ kmp_depnode_list_t *new_head;
#if USE_FAST_MEMORY
- new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t));
+ new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
+ thread, sizeof(kmp_depnode_list_t));
#else
- new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t));
+ new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
+ thread, sizeof(kmp_depnode_list_t));
#endif
- new_head->node = __kmp_node_ref(node);
- new_head->next = list;
+ new_head->node = __kmp_node_ref(node);
+ new_head->next = list;
- return new_head;
+ return new_head;
}
-static void
-__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list )
-{
- kmp_depnode_list *next;
+static void __kmp_depnode_list_free(kmp_info_t *thread,
+ kmp_depnode_list *list) {
+ kmp_depnode_list *next;
- for ( ; list ; list = next ) {
- next = list->next;
+ for (; list; list = next) {
+ next = list->next;
- __kmp_node_deref(thread,list->node);
+ __kmp_node_deref(thread, list->node);
#if USE_FAST_MEMORY
- __kmp_fast_free(thread,list);
+ __kmp_fast_free(thread, list);
#else
- __kmp_thread_free(thread,list);
+ __kmp_thread_free(thread, list);
#endif
- }
+ }
}
-static inline void
-__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink,
- kmp_task_t *sink_task )
-{
+static inline void __kmp_track_dependence(kmp_depnode_t *source,
+ kmp_depnode_t *sink,
+ kmp_task_t *sink_task) {
#ifdef KMP_SUPPORT_GRAPH_OUTPUT
- kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
- // do not use sink->dn.task as that is only filled after the dependencies
- // are already processed!
- kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task);
-
- __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource);
+ kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
+ // do not use sink->dn.task as that is only filled after the dependencies
+ // are already processed!
+ kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
+
+ __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
+ task_source->td_ident->psource, sink->dn.id,
+ task_sink->td_ident->psource);
#endif
#if OMPT_SUPPORT && OMPT_TRACE
- /* OMPT tracks dependences between task (a=source, b=sink) in which
- task a blocks the execution of b through the ompt_new_dependence_callback */
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair))
- {
- kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
- kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task);
-
- ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)(
- task_source->ompt_task_info.task_id,
- task_sink->ompt_task_info.task_id);
- }
+ // OMPT tracks dependences between task (a=source, b=sink) in which
+ // task a blocks the execution of b through the ompt_new_dependence_callback
+ if (ompt_enabled &&
+ ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) {
+ kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
+ kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
+
+ ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)(
+ task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id);
+ }
#endif /* OMPT_SUPPORT && OMPT_TRACE */
}
-template< bool filter >
+template <bool filter>
static inline kmp_int32
-__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
- bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list,
- kmp_task_t *task )
-{
- KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) );
-
- kmp_info_t *thread = __kmp_threads[ gtid ];
- kmp_int32 npredecessors=0;
- for ( kmp_int32 i = 0; i < ndeps ; i++ ) {
- const kmp_depend_info_t * dep = &dep_list[i];
-
- KMP_DEBUG_ASSERT(dep->flags.in);
-
- if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries
-
- kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr);
- kmp_depnode_t *last_out = info->last_out;
-
- if ( dep->flags.out && info->last_ins ) {
- for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) {
- kmp_depnode_t * indep = p->node;
- if ( indep->dn.task ) {
- KMP_ACQUIRE_DEPNODE(gtid,indep);
- if ( indep->dn.task ) {
- __kmp_track_dependence(indep,node,task);
- indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node);
- KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
- filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(task)));
- npredecessors++;
- }
- KMP_RELEASE_DEPNODE(gtid,indep);
- }
- }
-
- __kmp_depnode_list_free(thread,info->last_ins);
- info->last_ins = NULL;
-
- } else if ( last_out && last_out->dn.task ) {
- KMP_ACQUIRE_DEPNODE(gtid,last_out);
- if ( last_out->dn.task ) {
- __kmp_track_dependence(last_out,node,task);
- last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node);
- KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
- filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(task)));
-
- npredecessors++;
- }
- KMP_RELEASE_DEPNODE(gtid,last_out);
+__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
+ bool dep_barrier, kmp_int32 ndeps,
+ kmp_depend_info_t *dep_list, kmp_task_t *task) {
+ KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : "
+ "dep_barrier = %d\n",
+ filter, gtid, ndeps, dep_barrier));
+
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_int32 npredecessors = 0;
+ for (kmp_int32 i = 0; i < ndeps; i++) {
+ const kmp_depend_info_t *dep = &dep_list[i];
+
+ KMP_DEBUG_ASSERT(dep->flags.in);
+
+ if (filter && dep->base_addr == 0)
+ continue; // skip filtered entries
+
+ kmp_dephash_entry_t *info =
+ __kmp_dephash_find(thread, hash, dep->base_addr);
+ kmp_depnode_t *last_out = info->last_out;
+
+ if (dep->flags.out && info->last_ins) {
+ for (kmp_depnode_list_t *p = info->last_ins; p; p = p->next) {
+ kmp_depnode_t *indep = p->node;
+ if (indep->dn.task) {
+ KMP_ACQUIRE_DEPNODE(gtid, indep);
+ if (indep->dn.task) {
+ __kmp_track_dependence(indep, node, task);
+ indep->dn.successors =
+ __kmp_add_node(thread, indep->dn.successors, node);
+ KA_TRACE(40, ("__kmp_process_deps<%d>: T#%d adding dependence from "
+ "%p to %p\n",
+ filter, gtid, KMP_TASK_TO_TASKDATA(indep->dn.task),
+ KMP_TASK_TO_TASKDATA(task)));
+ npredecessors++;
+ }
+ KMP_RELEASE_DEPNODE(gtid, indep);
}
+ }
- if ( dep_barrier ) {
- // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after
- // the execution of this task so the previous output nodes can be cleared.
- __kmp_node_deref(thread,last_out);
- info->last_out = NULL;
- } else {
- if ( dep->flags.out ) {
- __kmp_node_deref(thread,last_out);
- info->last_out = __kmp_node_ref(node);
- } else
- info->last_ins = __kmp_add_node(thread, info->last_ins, node);
- }
+ __kmp_depnode_list_free(thread, info->last_ins);
+ info->last_ins = NULL;
+ } else if (last_out && last_out->dn.task) {
+ KMP_ACQUIRE_DEPNODE(gtid, last_out);
+ if (last_out->dn.task) {
+ __kmp_track_dependence(last_out, node, task);
+ last_out->dn.successors =
+ __kmp_add_node(thread, last_out->dn.successors, node);
+ KA_TRACE(
+ 40,
+ ("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
+ filter, gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task),
+ KMP_TASK_TO_TASKDATA(task)));
+
+ npredecessors++;
+ }
+ KMP_RELEASE_DEPNODE(gtid, last_out);
+ }
+
+ if (dep_barrier) {
+ // if this is a sync point in the serial sequence, then the previous
+ // outputs are guaranteed to be completed after
+ // the execution of this task so the previous output nodes can be cleared.
+ __kmp_node_deref(thread, last_out);
+ info->last_out = NULL;
+ } else {
+ if (dep->flags.out) {
+ __kmp_node_deref(thread, last_out);
+ info->last_out = __kmp_node_ref(node);
+ } else
+ info->last_ins = __kmp_add_node(thread, info->last_ins, node);
}
+ }
- KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) );
+ KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
+ gtid, npredecessors));
- return npredecessors;
+ return npredecessors;
}
#define NO_DEP_BARRIER (false)
#define DEP_BARRIER (true)
// returns true if the task has any outstanding dependence
-static bool
-__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier,
- kmp_int32 ndeps, kmp_depend_info_t *dep_list,
- kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
-{
- int i;
+static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
+ kmp_task_t *task, kmp_dephash_t *hash,
+ bool dep_barrier, kmp_int32 ndeps,
+ kmp_depend_info_t *dep_list,
+ kmp_int32 ndeps_noalias,
+ kmp_depend_info_t *noalias_dep_list) {
+ int i;
#if KMP_DEBUG
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
#endif
- KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) );
-
- // Filter deps in dep_list
- // TODO: Different algorithm for large dep_list ( > 10 ? )
- for ( i = 0; i < ndeps; i ++ ) {
- if ( dep_list[i].base_addr != 0 )
- for ( int j = i+1; j < ndeps; j++ )
- if ( dep_list[i].base_addr == dep_list[j].base_addr ) {
- dep_list[i].flags.in |= dep_list[j].flags.in;
- dep_list[i].flags.out |= dep_list[j].flags.out;
- dep_list[j].base_addr = 0; // Mark j element as void
- }
- }
-
- // doesn't need to be atomic as no other thread is going to be accessing this node just yet
- // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies
- node->dn.npredecessors = -1;
-
- // used to pack all npredecessors additions into a single atomic operation at the end
- int npredecessors;
-
- npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
- ndeps, dep_list, task);
- npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier,
- ndeps_noalias, noalias_dep_list, task);
-
- node->dn.task = task;
- KMP_MB();
-
- // Account for our initial fake value
- npredecessors++;
-
- // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences)
- npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors;
-
- KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) );
-
- // beyond this point the task could be queued (and executed) by a releasing task...
- return npredecessors > 0 ? true : false;
-}
-
-void
-__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
-{
- kmp_info_t *thread = __kmp_threads[ gtid ];
- kmp_depnode_t *node = task->td_depnode;
+ KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d "
+ "possibly aliased dependencies, %d non-aliased depedencies : "
+ "dep_barrier=%d .\n",
+ gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
+
+ // Filter deps in dep_list
+ // TODO: Different algorithm for large dep_list ( > 10 ? )
+ for (i = 0; i < ndeps; i++) {
+ if (dep_list[i].base_addr != 0)
+ for (int j = i + 1; j < ndeps; j++)
+ if (dep_list[i].base_addr == dep_list[j].base_addr) {
+ dep_list[i].flags.in |= dep_list[j].flags.in;
+ dep_list[i].flags.out |= dep_list[j].flags.out;
+ dep_list[j].base_addr = 0; // Mark j element as void
+ }
+ }
- if ( task->td_dephash ) {
- KA_TRACE(40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) );
- __kmp_dephash_free(thread,task->td_dephash);
- task->td_dephash = NULL;
+ // doesn't need to be atomic as no other thread is going to be accessing this
+ // node just yet.
+ // npredecessors is set -1 to ensure that none of the releasing tasks queues
+ // this task before we have finished processing all the dependencies
+ node->dn.npredecessors = -1;
+
+ // used to pack all npredecessors additions into a single atomic operation at
+ // the end
+ int npredecessors;
+
+ npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
+ dep_list, task);
+ npredecessors += __kmp_process_deps<false>(
+ gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
+
+ node->dn.task = task;
+ KMP_MB();
+
+ // Account for our initial fake value
+ npredecessors++;
+
+ // Update predecessors and obtain current value to check if there are still
+ // any outstandig dependences (some tasks may have finished while we processed
+ // the dependences)
+ npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) +
+ npredecessors;
+
+ KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
+ gtid, npredecessors, taskdata));
+
+ // beyond this point the task could be queued (and executed) by a releasing
+ // task...
+ return npredecessors > 0 ? true : false;
+}
+
+void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_depnode_t *node = task->td_depnode;
+
+ if (task->td_dephash) {
+ KA_TRACE(
+ 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n",
+ gtid, task));
+ __kmp_dephash_free(thread, task->td_dephash);
+ task->td_dephash = NULL;
+ }
+
+ if (!node)
+ return;
+
+ KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n",
+ gtid, task));
+
+ KMP_ACQUIRE_DEPNODE(gtid, node);
+ node->dn.task =
+ NULL; // mark this task as finished, so no new dependencies are generated
+ KMP_RELEASE_DEPNODE(gtid, node);
+
+ kmp_depnode_list_t *next;
+ for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) {
+ kmp_depnode_t *successor = p->node;
+ kmp_int32 npredecessors =
+ KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
+
+ // successor task can be NULL for wait_depends or because deps are still
+ // being processed
+ if (npredecessors == 0) {
+ KMP_MB();
+ if (successor->dn.task) {
+ KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled "
+ "for execution.\n",
+ gtid, successor->dn.task, task));
+ __kmp_omp_task(gtid, successor->dn.task, false);
+ }
}
- if ( !node ) return;
-
- KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", gtid, task ) );
-
- KMP_ACQUIRE_DEPNODE(gtid,node);
- node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated
- KMP_RELEASE_DEPNODE(gtid,node);
-
- kmp_depnode_list_t *next;
- for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) {
- kmp_depnode_t *successor = p->node;
- kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
-
- // successor task can be NULL for wait_depends or because deps are still being processed
- if ( npredecessors == 0 ) {
- KMP_MB();
- if ( successor->dn.task ) {
- KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) );
- __kmp_omp_task(gtid,successor->dn.task,false);
- }
- }
-
- next = p->next;
- __kmp_node_deref(thread,p->node);
+ next = p->next;
+ __kmp_node_deref(thread, p->node);
#if USE_FAST_MEMORY
- __kmp_fast_free(thread,p);
+ __kmp_fast_free(thread, p);
#else
- __kmp_thread_free(thread,p);
+ __kmp_thread_free(thread, p);
#endif
- }
+ }
- __kmp_node_deref(thread,node);
+ __kmp_node_deref(thread, node);
- KA_TRACE(20, ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", gtid, task ) );
+ KA_TRACE(
+ 20,
+ ("__kmp_release_deps: T#%d all successors of %p notified of completion\n",
+ gtid, task));
}
/*!
@ingroup TASKING
@param loc_ref location of the original task directive
@param gtid Global Thread ID of encountering thread
- at param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
+ at param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
+task''
@param ndeps Number of depend items with possible aliasing
@param dep_list List of depend items with possible aliasing
@param ndeps_noalias Number of depend items with no aliasing
@param noalias_dep_list List of depend items with no aliasing
- at return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
+ at return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
+suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
Schedule a non-thread-switchable task with dependences for execution
*/
-kmp_int32
-__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
- kmp_int32 ndeps, kmp_depend_info_t *dep_list,
- kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
-{
-
- kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
- KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n",
- gtid, loc_ref, new_taskdata ) );
+kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task, kmp_int32 ndeps,
+ kmp_depend_info_t *dep_list,
+ kmp_int32 ndeps_noalias,
+ kmp_depend_info_t *noalias_dep_list) {
+
+ kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+ KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
+ loc_ref, new_taskdata));
- kmp_info_t *thread = __kmp_threads[ gtid ];
- kmp_taskdata_t * current_task = thread->th.th_current_task;
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *current_task = thread->th.th_current_task;
#if OMPT_SUPPORT && OMPT_TRACE
- /* OMPT grab all dependences if requested by the tool */
- if (ompt_enabled && ndeps+ndeps_noalias > 0 &&
- ompt_callbacks.ompt_callback(ompt_event_task_dependences))
- {
- kmp_int32 i;
-
- new_taskdata->ompt_task_info.ndeps = ndeps+ndeps_noalias;
- new_taskdata->ompt_task_info.deps = (ompt_task_dependence_t *)
- KMP_OMPT_DEPS_ALLOC(thread,
- (ndeps+ndeps_noalias)*sizeof(ompt_task_dependence_t));
-
- KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
-
- for (i = 0; i < ndeps; i++)
- {
- new_taskdata->ompt_task_info.deps[i].variable_addr =
- (void*) dep_list[i].base_addr;
- if (dep_list[i].flags.in && dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[i].dependence_flags =
- ompt_task_dependence_type_inout;
- else if (dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[i].dependence_flags =
- ompt_task_dependence_type_out;
- else if (dep_list[i].flags.in)
- new_taskdata->ompt_task_info.deps[i].dependence_flags =
- ompt_task_dependence_type_in;
- }
- for (i = 0; i < ndeps_noalias; i++)
- {
- new_taskdata->ompt_task_info.deps[ndeps+i].variable_addr =
- (void*) noalias_dep_list[i].base_addr;
- if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
- ompt_task_dependence_type_inout;
- else if (noalias_dep_list[i].flags.out)
- new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
- ompt_task_dependence_type_out;
- else if (noalias_dep_list[i].flags.in)
- new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
- ompt_task_dependence_type_in;
- }
+ /* OMPT grab all dependences if requested by the tool */
+ if (ompt_enabled && ndeps + ndeps_noalias > 0 &&
+ ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
+ kmp_int32 i;
+
+ new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
+ new_taskdata->ompt_task_info.deps =
+ (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC(
+ thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t));
+
+ KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
+
+ for (i = 0; i < ndeps; i++) {
+ new_taskdata->ompt_task_info.deps[i].variable_addr =
+ (void *)dep_list[i].base_addr;
+ if (dep_list[i].flags.in && dep_list[i].flags.out)
+ new_taskdata->ompt_task_info.deps[i].dependence_flags =
+ ompt_task_dependence_type_inout;
+ else if (dep_list[i].flags.out)
+ new_taskdata->ompt_task_info.deps[i].dependence_flags =
+ ompt_task_dependence_type_out;
+ else if (dep_list[i].flags.in)
+ new_taskdata->ompt_task_info.deps[i].dependence_flags =
+ ompt_task_dependence_type_in;
+ }
+ for (i = 0; i < ndeps_noalias; i++) {
+ new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr =
+ (void *)noalias_dep_list[i].base_addr;
+ if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
+ new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
+ ompt_task_dependence_type_inout;
+ else if (noalias_dep_list[i].flags.out)
+ new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
+ ompt_task_dependence_type_out;
+ else if (noalias_dep_list[i].flags.in)
+ new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
+ ompt_task_dependence_type_in;
}
+ }
#endif /* OMPT_SUPPORT && OMPT_TRACE */
- bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
+ bool serial = current_task->td_flags.team_serial ||
+ current_task->td_flags.tasking_ser ||
+ current_task->td_flags.final;
#if OMP_45_ENABLED
- kmp_task_team_t * task_team = thread->th.th_task_team;
- serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks);
+ kmp_task_team_t *task_team = thread->th.th_task_team;
+ serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks);
#endif
- if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) {
- /* if no dependencies have been tracked yet, create the dependence hash */
- if ( current_task->td_dephash == NULL )
- current_task->td_dephash = __kmp_dephash_create(thread, current_task);
+ if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
+ /* if no dependencies have been tracked yet, create the dependence hash */
+ if (current_task->td_dephash == NULL)
+ current_task->td_dephash = __kmp_dephash_create(thread, current_task);
#if USE_FAST_MEMORY
- kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t));
+ kmp_depnode_t *node =
+ (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
#else
- kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t));
+ kmp_depnode_t *node =
+ (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
#endif
- __kmp_init_node(node);
- new_taskdata->td_depnode = node;
+ __kmp_init_node(node);
+ new_taskdata->td_depnode = node;
- if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER,
- ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) {
- KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: "
- "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
- new_taskdata ) );
- return TASK_CURRENT_NOT_QUEUED;
- }
- } else {
- KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)"
- "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) );
- }
-
- KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : "
- "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref,
- new_taskdata ) );
+ if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash,
+ NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
+ noalias_dep_list)) {
+ KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
+ "dependencies: "
+ "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
+ gtid, loc_ref, new_taskdata));
+ return TASK_CURRENT_NOT_QUEUED;
+ }
+ } else {
+ KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies "
+ "for task (serialized)"
+ "loc=%p task=%p\n",
+ gtid, loc_ref, new_taskdata));
+ }
+
+ KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
+ "dependencies : "
+ "loc=%p task=%p, transferring to __kmpc_omp_task\n",
+ gtid, loc_ref, new_taskdata));
- return __kmpc_omp_task(loc_ref,gtid,new_task);
+ return __kmpc_omp_task(loc_ref, gtid, new_task);
}
/*!
@@ -539,55 +571,64 @@ __kmpc_omp_task_with_deps( ident_t *loc_
Blocks the current task until all specifies dependencies have been fulfilled.
*/
-void
-__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
- kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
-{
- KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) );
-
- if ( ndeps == 0 && ndeps_noalias == 0 ) {
- KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) );
- return;
- }
-
- kmp_info_t *thread = __kmp_threads[ gtid ];
- kmp_taskdata_t * current_task = thread->th.th_current_task;
-
- // We can return immediately as:
- // - dependences are not computed in serial teams (except if we have proxy tasks)
- // - if the dephash is not yet created it means we have nothing to wait for
- bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
+void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
+ kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
+ kmp_depend_info_t *noalias_dep_list) {
+ KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
+
+ if (ndeps == 0 && ndeps_noalias == 0) {
+ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to "
+ "wait upon : loc=%p\n",
+ gtid, loc_ref));
+ return;
+ }
+
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *current_task = thread->th.th_current_task;
+
+ // We can return immediately as:
+ // - dependences are not computed in serial teams (except with proxy tasks)
+ // - if the dephash is not yet created it means we have nothing to wait for
+ bool ignore = current_task->td_flags.team_serial ||
+ current_task->td_flags.tasking_ser ||
+ current_task->td_flags.final;
#if OMP_45_ENABLED
- ignore = ignore && thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
+ ignore = ignore && thread->th.th_task_team != NULL &&
+ thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
#endif
- ignore = ignore || current_task->td_dephash == NULL;
-
- if ( ignore ) {
- KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
- return;
- }
-
- kmp_depnode_t node;
- __kmp_init_node(&node);
-
- if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER,
- ndeps, dep_list, ndeps_noalias, noalias_dep_list )) {
- KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
- return;
- }
+ ignore = ignore || current_task->td_dephash == NULL;
- int thread_finished = FALSE;
- kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U);
- while ( node.dn.npredecessors > 0 ) {
- flag.execute_tasks(thread, gtid, FALSE, &thread_finished,
+ if (ignore) {
+ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
+ "dependencies : loc=%p\n",
+ gtid, loc_ref));
+ return;
+ }
+
+ kmp_depnode_t node;
+ __kmp_init_node(&node);
+
+ if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash,
+ DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
+ noalias_dep_list)) {
+ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
+ "dependencies : loc=%p\n",
+ gtid, loc_ref));
+ return;
+ }
+
+ int thread_finished = FALSE;
+ kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U);
+ while (node.dn.npredecessors > 0) {
+ flag.execute_tasks(thread, gtid, FALSE, &thread_finished,
#if USE_ITT_BUILD
- NULL,
+ NULL,
#endif
- __kmp_task_stealing_constraint );
- }
+ __kmp_task_stealing_constraint);
+ }
- KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) );
+ KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
+ gtid, loc_ref));
}
#endif /* OMP_40_ENABLED */
-
Modified: openmp/trunk/runtime/src/kmp_tasking.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.cpp (original)
+++ openmp/trunk/runtime/src/kmp_tasking.cpp Fri May 12 13:01:32 2017
@@ -16,8 +16,8 @@
#include "kmp.h"
#include "kmp_i18n.h"
#include "kmp_itt.h"
-#include "kmp_wait_release.h"
#include "kmp_stats.h"
+#include "kmp_wait_release.h"
#if OMPT_SUPPORT
#include "ompt-specific.h"
@@ -25,1608 +25,1625 @@
#include "tsan_annotations.h"
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-
/* forward declaration */
-static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
-static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
-static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
+static void __kmp_enable_tasking(kmp_task_team_t *task_team,
+ kmp_info_t *this_thr);
+static void __kmp_alloc_task_deque(kmp_info_t *thread,
+ kmp_thread_data_t *thread_data);
+static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
+ kmp_task_team_t *task_team);
#ifdef OMP_45_ENABLED
-static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
+static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
#endif
#ifdef BUILD_TIED_TASK_STACK
-//---------------------------------------------------------------------------
// __kmp_trace_task_stack: print the tied tasks from the task stack in order
-// from top do bottom
+// from top do bottom
//
// gtid: global thread identifier for thread containing stack
// thread_data: thread data for task team thread containing stack
// threshold: value above which the trace statement triggers
// location: string identifying call site of this function (for trace)
+static void __kmp_trace_task_stack(kmp_int32 gtid,
+ kmp_thread_data_t *thread_data,
+ int threshold, char *location) {
+ kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+ kmp_taskdata_t **stack_top = task_stack->ts_top;
+ kmp_int32 entries = task_stack->ts_entries;
+ kmp_taskdata_t *tied_task;
+
+ KA_TRACE(
+ threshold,
+ ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
+ "first_block = %p, stack_top = %p \n",
+ location, gtid, entries, task_stack->ts_first_block, stack_top));
-static void
-__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
-{
- kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
- kmp_taskdata_t **stack_top = task_stack -> ts_top;
- kmp_int32 entries = task_stack -> ts_entries;
- kmp_taskdata_t *tied_task;
-
- KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
- "first_block = %p, stack_top = %p \n",
- location, gtid, entries, task_stack->ts_first_block, stack_top ) );
-
- KMP_DEBUG_ASSERT( stack_top != NULL );
- KMP_DEBUG_ASSERT( entries > 0 );
-
- while ( entries != 0 )
- {
- KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
- // fix up ts_top if we need to pop from previous block
- if ( entries & TASK_STACK_INDEX_MASK == 0 )
- {
- kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
+ KMP_DEBUG_ASSERT(stack_top != NULL);
+ KMP_DEBUG_ASSERT(entries > 0);
- stack_block = stack_block -> sb_prev;
- stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
- }
+ while (entries != 0) {
+ KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
+ // fix up ts_top if we need to pop from previous block
+ if (entries & TASK_STACK_INDEX_MASK == 0) {
+ kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
- // finish bookkeeping
- stack_top--;
- entries--;
+ stack_block = stack_block->sb_prev;
+ stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
+ }
- tied_task = * stack_top;
+ // finish bookkeeping
+ stack_top--;
+ entries--;
- KMP_DEBUG_ASSERT( tied_task != NULL );
- KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
+ tied_task = *stack_top;
- KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
- "stack_top=%p, tied_task=%p\n",
- location, gtid, entries, stack_top, tied_task ) );
- }
- KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
+ KMP_DEBUG_ASSERT(tied_task != NULL);
+ KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
- KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
- location, gtid ) );
+ KA_TRACE(threshold,
+ ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
+ "stack_top=%p, tied_task=%p\n",
+ location, gtid, entries, stack_top, tied_task));
+ }
+ KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
+
+ KA_TRACE(threshold,
+ ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
+ location, gtid));
}
-//---------------------------------------------------------------------------
// __kmp_init_task_stack: initialize the task stack for the first time
-// after a thread_data structure is created.
-// It should not be necessary to do this again (assuming the stack works).
+// after a thread_data structure is created.
+// It should not be necessary to do this again (assuming the stack works).
//
// gtid: global thread identifier of calling thread
// thread_data: thread data for task team thread containing stack
-
-static void
-__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
-{
- kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
- kmp_stack_block_t *first_block;
-
- // set up the first block of the stack
- first_block = & task_stack -> ts_first_block;
- task_stack -> ts_top = (kmp_taskdata_t **) first_block;
- memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
-
- // initialize the stack to be empty
- task_stack -> ts_entries = TASK_STACK_EMPTY;
- first_block -> sb_next = NULL;
- first_block -> sb_prev = NULL;
+static void __kmp_init_task_stack(kmp_int32 gtid,
+ kmp_thread_data_t *thread_data) {
+ kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+ kmp_stack_block_t *first_block;
+
+ // set up the first block of the stack
+ first_block = &task_stack->ts_first_block;
+ task_stack->ts_top = (kmp_taskdata_t **)first_block;
+ memset((void *)first_block, '\0',
+ TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
+
+ // initialize the stack to be empty
+ task_stack->ts_entries = TASK_STACK_EMPTY;
+ first_block->sb_next = NULL;
+ first_block->sb_prev = NULL;
}
-
-//---------------------------------------------------------------------------
// __kmp_free_task_stack: free the task stack when thread_data is destroyed.
//
// gtid: global thread identifier for calling thread
// thread_data: thread info for thread containing stack
-
-static void
-__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
-{
- kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
- kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
-
- KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
- // free from the second block of the stack
- while ( stack_block != NULL ) {
- kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
-
- stack_block -> sb_next = NULL;
- stack_block -> sb_prev = NULL;
- if (stack_block != & task_stack -> ts_first_block) {
- __kmp_thread_free( thread, stack_block ); // free the block, if not the first
- }
- stack_block = next_block;
- }
- // initialize the stack to be empty
- task_stack -> ts_entries = 0;
- task_stack -> ts_top = NULL;
+static void __kmp_free_task_stack(kmp_int32 gtid,
+ kmp_thread_data_t *thread_data) {
+ kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+ kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
+
+ KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
+ // free from the second block of the stack
+ while (stack_block != NULL) {
+ kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
+
+ stack_block->sb_next = NULL;
+ stack_block->sb_prev = NULL;
+ if (stack_block != &task_stack->ts_first_block) {
+ __kmp_thread_free(thread,
+ stack_block); // free the block, if not the first
+ }
+ stack_block = next_block;
+ }
+ // initialize the stack to be empty
+ task_stack->ts_entries = 0;
+ task_stack->ts_top = NULL;
}
-
-//---------------------------------------------------------------------------
// __kmp_push_task_stack: Push the tied task onto the task stack.
// Grow the stack if necessary by allocating another block.
//
// gtid: global thread identifier for calling thread
// thread: thread info for thread containing stack
// tied_task: the task to push on the stack
-
-static void
-__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
-{
- // GEH - need to consider what to do if tt_threads_data not allocated yet
- kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
- tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
- kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
-
- if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
- return; // Don't push anything on stack if team or team tasks are serialized
- }
-
- KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
- KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
-
- KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
- gtid, thread, tied_task ) );
- // Store entry
- * (task_stack -> ts_top) = tied_task;
-
- // Do bookkeeping for next push
- task_stack -> ts_top++;
- task_stack -> ts_entries++;
-
- if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
- {
- // Find beginning of this task block
- kmp_stack_block_t *stack_block =
- (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
-
- // Check if we already have a block
- if ( stack_block -> sb_next != NULL )
- { // reset ts_top to beginning of next block
- task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
- }
- else
- { // Alloc new block and link it up
- kmp_stack_block_t *new_block = (kmp_stack_block_t *)
- __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
-
- task_stack -> ts_top = & new_block -> sb_block[0];
- stack_block -> sb_next = new_block;
- new_block -> sb_prev = stack_block;
- new_block -> sb_next = NULL;
-
- KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
- gtid, tied_task, new_block ) );
- }
- }
- KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
+static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskdata_t *tied_task) {
+ // GEH - need to consider what to do if tt_threads_data not allocated yet
+ kmp_thread_data_t *thread_data =
+ &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
+ kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+
+ if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
+ return; // Don't push anything on stack if team or team tasks are serialized
+ }
+
+ KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
+ KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
+
+ KA_TRACE(20,
+ ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
+ gtid, thread, tied_task));
+ // Store entry
+ *(task_stack->ts_top) = tied_task;
+
+ // Do bookkeeping for next push
+ task_stack->ts_top++;
+ task_stack->ts_entries++;
+
+ if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
+ // Find beginning of this task block
+ kmp_stack_block_t *stack_block =
+ (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
+
+ // Check if we already have a block
+ if (stack_block->sb_next !=
+ NULL) { // reset ts_top to beginning of next block
+ task_stack->ts_top = &stack_block->sb_next->sb_block[0];
+ } else { // Alloc new block and link it up
+ kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
+ thread, sizeof(kmp_stack_block_t));
+
+ task_stack->ts_top = &new_block->sb_block[0];
+ stack_block->sb_next = new_block;
+ new_block->sb_prev = stack_block;
+ new_block->sb_next = NULL;
+
+ KA_TRACE(
+ 30,
+ ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
+ gtid, tied_task, new_block));
+ }
+ }
+ KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
+ tied_task));
}
-//---------------------------------------------------------------------------
// __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
-// the task, just check to make sure it matches the ending task passed in.
+// the task, just check to make sure it matches the ending task passed in.
//
// gtid: global thread identifier for the calling thread
// thread: thread info structure containing stack
// tied_task: the task popped off the stack
// ending_task: the task that is ending (should match popped task)
+static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
+ kmp_taskdata_t *ending_task) {
+ // GEH - need to consider what to do if tt_threads_data not allocated yet
+ kmp_thread_data_t *thread_data =
+ &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
+ kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+ kmp_taskdata_t *tied_task;
-static void
-__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
-{
- // GEH - need to consider what to do if tt_threads_data not allocated yet
- kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
- kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
- kmp_taskdata_t *tied_task;
-
- if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
- return; // Don't pop anything from stack if team or team tasks are serialized
- }
+ if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
+ // Don't pop anything from stack if team or team tasks are serialized
+ return;
+ }
- KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
- KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
+ KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
+ KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
- KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
+ KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
+ thread));
- // fix up ts_top if we need to pop from previous block
- if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
- {
- kmp_stack_block_t *stack_block =
- (kmp_stack_block_t *) (task_stack -> ts_top) ;
+ // fix up ts_top if we need to pop from previous block
+ if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
+ kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
- stack_block = stack_block -> sb_prev;
- task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
- }
+ stack_block = stack_block->sb_prev;
+ task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
+ }
- // finish bookkeeping
- task_stack -> ts_top--;
- task_stack -> ts_entries--;
+ // finish bookkeeping
+ task_stack->ts_top--;
+ task_stack->ts_entries--;
- tied_task = * (task_stack -> ts_top );
+ tied_task = *(task_stack->ts_top);
- KMP_DEBUG_ASSERT( tied_task != NULL );
- KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
- KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
+ KMP_DEBUG_ASSERT(tied_task != NULL);
+ KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
+ KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly
- KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
- return;
+ KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
+ tied_task));
+ return;
}
#endif /* BUILD_TIED_TASK_STACK */
-//---------------------------------------------------
// __kmp_push_task: Add a task to the thread's deque
-
-static kmp_int32
-__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
-{
- kmp_info_t * thread = __kmp_threads[ gtid ];
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- kmp_task_team_t * task_team = thread->th.th_task_team;
- kmp_int32 tid = __kmp_tid_from_gtid( gtid );
- kmp_thread_data_t * thread_data;
-
- KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
-
- if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
- // untied task needs to increment counter so that the task structure is not freed prematurely
- kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
- KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
- gtid, counter, taskdata ) );
- }
-
- // The first check avoids building task_team thread data if serialized
- if ( taskdata->td_flags.task_serial ) {
- KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
- gtid, taskdata ) );
- return TASK_NOT_PUSHED;
- }
-
- // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- if ( ! KMP_TASKING_ENABLED(task_team) ) {
- __kmp_enable_tasking( task_team, thread );
- }
- KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
- KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
-
- // Find tasking deque specific to encountering thread
- thread_data = & task_team -> tt.tt_threads_data[ tid ];
-
- // No lock needed since only owner can allocate
- if (thread_data -> td.td_deque == NULL ) {
- __kmp_alloc_task_deque( thread, thread_data );
- }
-
- // Check if deque is full
- if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
- {
- KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
- gtid, taskdata ) );
- return TASK_NOT_PUSHED;
- }
-
- // Lock the deque for the task push operation
- __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
-
-#if OMP_45_ENABLED
- // Need to recheck as we can get a proxy task from a thread outside of OpenMP
- if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
- {
- __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
- KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
- gtid, taskdata ) );
- return TASK_NOT_PUSHED;
- }
+static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_task_team_t *task_team = thread->th.th_task_team;
+ kmp_int32 tid = __kmp_tid_from_gtid(gtid);
+ kmp_thread_data_t *thread_data;
+
+ KA_TRACE(20,
+ ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
+
+ if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+ // untied task needs to increment counter so that the task structure is not
+ // freed prematurely
+ kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
+ KA_TRACE(
+ 20,
+ ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
+ gtid, counter, taskdata));
+ }
+
+ // The first check avoids building task_team thread data if serialized
+ if (taskdata->td_flags.task_serial) {
+ KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
+ "TASK_NOT_PUSHED for task %p\n",
+ gtid, taskdata));
+ return TASK_NOT_PUSHED;
+ }
+
+ // Now that serialized tasks have returned, we can assume that we are not in
+ // immediate exec mode
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+ if (!KMP_TASKING_ENABLED(task_team)) {
+ __kmp_enable_tasking(task_team, thread);
+ }
+ KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
+ KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
+
+ // Find tasking deque specific to encountering thread
+ thread_data = &task_team->tt.tt_threads_data[tid];
+
+ // No lock needed since only owner can allocate
+ if (thread_data->td.td_deque == NULL) {
+ __kmp_alloc_task_deque(thread, thread_data);
+ }
+
+ // Check if deque is full
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
+ "TASK_NOT_PUSHED for task %p\n",
+ gtid, taskdata));
+ return TASK_NOT_PUSHED;
+ }
+
+ // Lock the deque for the task push operation
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+#if OMP_45_ENABLED
+ // Need to recheck as we can get a proxy task from a thread outside of OpenMP
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; returning "
+ "TASK_NOT_PUSHED for task %p\n",
+ gtid, taskdata));
+ return TASK_NOT_PUSHED;
+ }
#else
- // Must have room since no thread can add tasks but calling thread
- KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
+ // Must have room since no thread can add tasks but calling thread
+ KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
+ TASK_DEQUE_SIZE(thread_data->td));
#endif
- thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
- // Wrap index.
- thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
- TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
+ thread_data->td.td_deque[thread_data->td.td_deque_tail] =
+ taskdata; // Push taskdata
+ // Wrap index.
+ thread_data->td.td_deque_tail =
+ (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
+ TCW_4(thread_data->td.td_deque_ntasks,
+ TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
- KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
- "task=%p ntasks=%d head=%u tail=%u\n",
- gtid, taskdata, thread_data->td.td_deque_ntasks,
- thread_data->td.td_deque_head, thread_data->td.td_deque_tail) );
+ KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
+ "task=%p ntasks=%d head=%u tail=%u\n",
+ gtid, taskdata, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
- __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
- return TASK_SUCCESSFULLY_PUSHED;
+ return TASK_SUCCESSFULLY_PUSHED;
}
-
-//-----------------------------------------------------------------------------------------
-// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
+// __kmp_pop_current_task_from_thread: set up current task from called thread
+// when team ends
+//
// this_thr: thread structure to set current_task in.
-
-void
-__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
-{
- KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
- "curtask_parent=%p\n",
- 0, this_thr, this_thr -> th.th_current_task,
- this_thr -> th.th_current_task -> td_parent ) );
-
- this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
-
- KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
- "curtask_parent=%p\n",
- 0, this_thr, this_thr -> th.th_current_task,
- this_thr -> th.th_current_task -> td_parent ) );
+void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
+ KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
+ "this_thread=%p, curtask=%p, "
+ "curtask_parent=%p\n",
+ 0, this_thr, this_thr->th.th_current_task,
+ this_thr->th.th_current_task->td_parent));
+
+ this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
+
+ KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
+ "this_thread=%p, curtask=%p, "
+ "curtask_parent=%p\n",
+ 0, this_thr, this_thr->th.th_current_task,
+ this_thr->th.th_current_task->td_parent));
}
-
-//---------------------------------------------------------------------------------------
-// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
+// __kmp_push_current_task_to_thread: set up current task in called thread for a
+// new team
+//
// this_thr: thread structure to set up
// team: team for implicit task data
// tid: thread within team to set up
-
-void
-__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
-{
- // current task of the thread is a parent of the new just created implicit tasks of new team
- KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
- "parent_task=%p\n",
- tid, this_thr, this_thr->th.th_current_task,
- team->t.t_implicit_task_taskdata[tid].td_parent ) );
-
- KMP_DEBUG_ASSERT (this_thr != NULL);
-
- if( tid == 0 ) {
- if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
- team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
- this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
- }
- } else {
- team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
- this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
- }
-
- KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
- "parent_task=%p\n",
- tid, this_thr, this_thr->th.th_current_task,
- team->t.t_implicit_task_taskdata[tid].td_parent ) );
+void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
+ int tid) {
+ // current task of the thread is a parent of the new just created implicit
+ // tasks of new team
+ KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
+ "curtask=%p "
+ "parent_task=%p\n",
+ tid, this_thr, this_thr->th.th_current_task,
+ team->t.t_implicit_task_taskdata[tid].td_parent));
+
+ KMP_DEBUG_ASSERT(this_thr != NULL);
+
+ if (tid == 0) {
+ if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
+ team->t.t_implicit_task_taskdata[0].td_parent =
+ this_thr->th.th_current_task;
+ this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
+ }
+ } else {
+ team->t.t_implicit_task_taskdata[tid].td_parent =
+ team->t.t_implicit_task_taskdata[0].td_parent;
+ this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
+ }
+
+ KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
+ "curtask=%p "
+ "parent_task=%p\n",
+ tid, this_thr, this_thr->th.th_current_task,
+ team->t.t_implicit_task_taskdata[tid].td_parent));
}
-
-//----------------------------------------------------------------------
// __kmp_task_start: bookkeeping for a task starting execution
+//
// GTID: global thread id of calling thread
// task: task starting execution
// current_task: task suspending
+static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
+ kmp_taskdata_t *current_task) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_info_t *thread = __kmp_threads[gtid];
+
+ KA_TRACE(10,
+ ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
+ gtid, taskdata, current_task));
+
+ KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+
+ // mark currently executing task as suspended
+ // TODO: GEH - make sure root team implicit task is initialized properly.
+ // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
+ current_task->td_flags.executing = 0;
-static void
-__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- kmp_info_t * thread = __kmp_threads[ gtid ];
-
- KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
- gtid, taskdata, current_task) );
-
- KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
-
- // mark currently executing task as suspended
- // TODO: GEH - make sure root team implicit task is initialized properly.
- // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
- current_task -> td_flags.executing = 0;
-
- // Add task to stack if tied
+// Add task to stack if tied
#ifdef BUILD_TIED_TASK_STACK
- if ( taskdata -> td_flags.tiedness == TASK_TIED )
- {
- __kmp_push_task_stack( gtid, thread, taskdata );
- }
+ if (taskdata->td_flags.tiedness == TASK_TIED) {
+ __kmp_push_task_stack(gtid, thread, taskdata);
+ }
#endif /* BUILD_TIED_TASK_STACK */
- // mark starting task as executing and as current task
- thread -> th.th_current_task = taskdata;
+ // mark starting task as executing and as current task
+ thread->th.th_current_task = taskdata;
- KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
- KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
- taskdata -> td_flags.started = 1;
- taskdata -> td_flags.executing = 1;
- KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
- KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
-
- // GEH TODO: shouldn't we pass some sort of location identifier here?
- // APT: yes, we will pass location here.
- // need to store current thread state (in a thread or taskdata structure)
- // before setting work_state, otherwise wrong state is set after end of task
+ KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
+ taskdata->td_flags.tiedness == TASK_UNTIED);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
+ taskdata->td_flags.tiedness == TASK_UNTIED);
+ taskdata->td_flags.started = 1;
+ taskdata->td_flags.executing = 1;
+ KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
+ // GEH TODO: shouldn't we pass some sort of location identifier here?
+ // APT: yes, we will pass location here.
+ // need to store current thread state (in a thread or taskdata structure)
+ // before setting work_state, otherwise wrong state is set after end of task
- KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
- gtid, taskdata ) );
+ KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
#if OMPT_SUPPORT
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
- kmp_taskdata_t *parent = taskdata->td_parent;
- ompt_callbacks.ompt_callback(ompt_event_task_begin)(
- parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
- parent ? &(parent->ompt_task_info.frame) : NULL,
- taskdata->ompt_task_info.task_id,
- taskdata->ompt_task_info.function);
- }
+ if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ ompt_callbacks.ompt_callback(ompt_event_task_begin)(
+ parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
+ parent ? &(parent->ompt_task_info.frame) : NULL,
+ taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
+ }
#endif
#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
- /* OMPT emit all dependences if requested by the tool */
- if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
- ompt_callbacks.ompt_callback(ompt_event_task_dependences))
- {
- ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
- taskdata->ompt_task_info.task_id,
- taskdata->ompt_task_info.deps,
- taskdata->ompt_task_info.ndeps
- );
- /* We can now free the allocated memory for the dependencies */
- KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
- taskdata->ompt_task_info.deps = NULL;
- taskdata->ompt_task_info.ndeps = 0;
- }
+ /* OMPT emit all dependences if requested by the tool */
+ if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
+ ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
+ ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
+ taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
+ taskdata->ompt_task_info.ndeps);
+ /* We can now free the allocated memory for the dependencies */
+ KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
+ taskdata->ompt_task_info.deps = NULL;
+ taskdata->ompt_task_info.ndeps = 0;
+ }
#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
- return;
+ return;
}
-
-//----------------------------------------------------------------------
-// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
+// __kmpc_omp_task_begin_if0: report that a given serialized task has started
+// execution
+//
// loc_ref: source location information; points to beginning of task block.
// gtid: global thread number.
// task: task thunk for the started task.
+void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *task) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
+
+ KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
+ "current_task=%p\n",
+ gtid, loc_ref, taskdata, current_task));
+
+ if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+ // untied task needs to increment counter so that the task structure is not
+ // freed prematurely
+ kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
+ KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
+ "incremented for task %p\n",
+ gtid, counter, taskdata));
+ }
+
+ taskdata->td_flags.task_serial =
+ 1; // Execute this task immediately, not deferred.
+ __kmp_task_start(gtid, task, current_task);
-void
-__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
-
- KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
- gtid, loc_ref, taskdata, current_task ) );
+ KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
+ loc_ref, taskdata));
- if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
- // untied task needs to increment counter so that the task structure is not freed prematurely
- kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
- KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
- gtid, counter, taskdata ) );
- }
-
- taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
- __kmp_task_start( gtid, task, current_task );
-
- KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
- gtid, loc_ref, taskdata ) );
-
- return;
+ return;
}
#ifdef TASK_UNUSED
-//----------------------------------------------------------------------
// __kmpc_omp_task_begin: report that a given task has started execution
// NEVER GENERATED BY COMPILER, DEPRECATED!!!
+void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
+ kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
-void
-__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
-{
- kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
-
- KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
- gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
-
- __kmp_task_start( gtid, task, current_task );
-
- KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
- gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
- return;
+ KA_TRACE(
+ 10,
+ ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
+ gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
+
+ __kmp_task_start(gtid, task, current_task);
+
+ KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
+ loc_ref, KMP_TASK_TO_TASKDATA(task)));
+ return;
}
#endif // TASK_UNUSED
-
-//-------------------------------------------------------------------------------------
// __kmp_free_task: free the current task space and the space for shareds
+//
// gtid: Global thread ID of calling thread
// taskdata: task to free
// thread: thread data structure of caller
+static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
+ kmp_info_t *thread) {
+ KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
+ taskdata));
+
+ // Check to make sure all flags and counters have the correct values
+ KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+ KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 ||
+ taskdata->td_flags.task_serial == 1);
+ KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0);
+
+ taskdata->td_flags.freed = 1;
+ ANNOTATE_HAPPENS_BEFORE(taskdata);
+// deallocate the taskdata and shared variable blocks associated with this task
+#if USE_FAST_MEMORY
+ __kmp_fast_free(thread, taskdata);
+#else /* ! USE_FAST_MEMORY */
+ __kmp_thread_free(thread, taskdata);
+#endif
-static void
-__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
-{
- KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
- gtid, taskdata) );
-
- // Check to make sure all flags and counters have the correct values
- KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
- KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
- KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
- KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
- KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
- KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
-
- taskdata->td_flags.freed = 1;
- ANNOTATE_HAPPENS_BEFORE(taskdata);
- // deallocate the taskdata and shared variable blocks associated with this task
- #if USE_FAST_MEMORY
- __kmp_fast_free( thread, taskdata );
- #else /* ! USE_FAST_MEMORY */
- __kmp_thread_free( thread, taskdata );
- #endif
-
- KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
- gtid, taskdata) );
+ KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
}
-//-------------------------------------------------------------------------------------
-// __kmp_free_task_and_ancestors: free the current task and ancestors without children
+// __kmp_free_task_and_ancestors: free the current task and ancestors without
+// children
//
// gtid: Global thread ID of calling thread
// taskdata: task to free
// thread: thread data structure of caller
-
-static void
-__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
-{
-#if OMP_45_ENABLED
- // Proxy tasks must always be allowed to free their parents
- // because they can be run in background even in serial mode.
- kmp_int32 team_serial = ( taskdata->td_flags.team_serial ||
- taskdata->td_flags.tasking_ser ) && !taskdata->td_flags.proxy;
+static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
+ kmp_taskdata_t *taskdata,
+ kmp_info_t *thread) {
+#if OMP_45_ENABLED
+ // Proxy tasks must always be allowed to free their parents
+ // because they can be run in background even in serial mode.
+ kmp_int32 team_serial =
+ (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
+ !taskdata->td_flags.proxy;
#else
- kmp_int32 team_serial = taskdata->td_flags.team_serial ||
- taskdata->td_flags.tasking_ser;
+ kmp_int32 team_serial =
+ taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
#endif
- KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+ KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
- kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
- KMP_DEBUG_ASSERT( children >= 0 );
+ kmp_int32 children =
+ KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_allocated_child_tasks)) -
+ 1;
+ KMP_DEBUG_ASSERT(children >= 0);
- // Now, go up the ancestor tree to see if any ancestors can now be freed.
- while ( children == 0 )
- {
- kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
-
- KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
- "and freeing itself\n", gtid, taskdata) );
+ // Now, go up the ancestor tree to see if any ancestors can now be freed.
+ while (children == 0) {
+ kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
- // --- Deallocate my ancestor task ---
- __kmp_free_task( gtid, taskdata, thread );
+ KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
+ "and freeing itself\n",
+ gtid, taskdata));
- taskdata = parent_taskdata;
+ // --- Deallocate my ancestor task ---
+ __kmp_free_task(gtid, taskdata, thread);
- // Stop checking ancestors at implicit task
- // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
- if ( team_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
- return;
+ taskdata = parent_taskdata;
- // Predecrement simulated by "- 1" calculation
- children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
- KMP_DEBUG_ASSERT( children >= 0 );
- }
+ // Stop checking ancestors at implicit task instead of walking up ancestor
+ // tree to avoid premature deallocation of ancestors.
+ if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
+ return;
- KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
- "not freeing it yet\n", gtid, taskdata, children) );
+ // Predecrement simulated by "- 1" calculation
+ children = KMP_TEST_THEN_DEC32(
+ (kmp_int32 *)(&taskdata->td_allocated_child_tasks)) -
+ 1;
+ KMP_DEBUG_ASSERT(children >= 0);
+ }
+
+ KA_TRACE(
+ 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
+ "not freeing it yet\n",
+ gtid, taskdata, children));
}
-//---------------------------------------------------------------------
// __kmp_task_finish: bookkeeping to do when a task finishes execution
+//
// gtid: global thread ID for calling thread
// task: task to be finished
// resumed_task: task to be resumed. (may be NULL if task is serialized)
-
-static void
-__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- kmp_info_t * thread = __kmp_threads[ gtid ];
- kmp_task_team_t * task_team = thread->th.th_task_team; // might be NULL for serial teams...
- kmp_int32 children = 0;
+static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
+ kmp_taskdata_t *resumed_task) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_task_team_t *task_team =
+ thread->th.th_task_team; // might be NULL for serial teams...
+ kmp_int32 children = 0;
#if OMPT_SUPPORT
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_task_end)) {
- kmp_taskdata_t *parent = taskdata->td_parent;
- ompt_callbacks.ompt_callback(ompt_event_task_end)(
- taskdata->ompt_task_info.task_id);
- }
+ if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ ompt_callbacks.ompt_callback(ompt_event_task_end)(
+ taskdata->ompt_task_info.task_id);
+ }
#endif
- KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
- gtid, taskdata, resumed_task) );
+ KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
+ "task %p\n",
+ gtid, taskdata, resumed_task));
- KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+ KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
- // Pop task from stack if tied
+// Pop task from stack if tied
#ifdef BUILD_TIED_TASK_STACK
- if ( taskdata -> td_flags.tiedness == TASK_TIED )
- {
- __kmp_pop_task_stack( gtid, thread, taskdata );
- }
+ if (taskdata->td_flags.tiedness == TASK_TIED) {
+ __kmp_pop_task_stack(gtid, thread, taskdata);
+ }
#endif /* BUILD_TIED_TASK_STACK */
- if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
- // untied task needs to check the counter so that the task structure is not freed prematurely
- kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
- KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
- gtid, counter, taskdata ) );
- if ( counter > 0 ) {
- // untied task is not done, to be continued possibly by other thread, do not free it now
- if (resumed_task == NULL) {
- KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
- resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
- }
- thread->th.th_current_task = resumed_task; // restore current_task
- resumed_task->td_flags.executing = 1; // resume previous task
- KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
- gtid, taskdata, resumed_task) );
- return;
- }
- }
-
- KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
- taskdata -> td_flags.complete = 1; // mark the task as completed
- KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
- KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
-
- // Only need to keep track of count if team parallel and tasking not serialized
- if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
- // Predecrement simulated by "- 1" calculation
- children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
- KMP_DEBUG_ASSERT( children >= 0 );
+ if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+ // untied task needs to check the counter so that the task structure is not
+ // freed prematurely
+ kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
+ KA_TRACE(
+ 20,
+ ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
+ gtid, counter, taskdata));
+ if (counter > 0) {
+ // untied task is not done, to be continued possibly by other thread, do
+ // not free it now
+ if (resumed_task == NULL) {
+ KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
+ resumed_task = taskdata->td_parent; // In a serialized task, the resumed
+ // task is the parent
+ }
+ thread->th.th_current_task = resumed_task; // restore current_task
+ resumed_task->td_flags.executing = 1; // resume previous task
+ KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
+ "resuming task %p\n",
+ gtid, taskdata, resumed_task));
+ return;
+ }
+ }
+
+ KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+ taskdata->td_flags.complete = 1; // mark the task as completed
+ KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
+ // Only need to keep track of count if team parallel and tasking not
+ // serialized
+ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+ // Predecrement simulated by "- 1" calculation
+ children =
+ KMP_TEST_THEN_DEC32(
+ (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) -
+ 1;
+ KMP_DEBUG_ASSERT(children >= 0);
#if OMP_40_ENABLED
- if ( taskdata->td_taskgroup )
- KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
+ if (taskdata->td_taskgroup)
+ KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
#if OMP_45_ENABLED
- }
- // if we found proxy tasks there could exist a dependency chain
- // with the proxy task as origin
- if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) {
-#endif
- __kmp_release_deps(gtid,taskdata);
-#endif
- }
-
- // td_flags.executing must be marked as 0 after __kmp_release_deps has been called
- // Othertwise, if a task is executed immediately from the release_deps code
- // the flag will be reset to 1 again by this same function
- KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
- taskdata -> td_flags.executing = 0; // suspend the finishing task
-
- KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
- gtid, taskdata, children) );
+ }
+ // if we found proxy tasks there could exist a dependency chain
+ // with the proxy task as origin
+ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
+ (task_team && task_team->tt.tt_found_proxy_tasks)) {
+#endif
+ __kmp_release_deps(gtid, taskdata);
+#endif
+ }
+
+ // td_flags.executing must be marked as 0 after __kmp_release_deps has been
+ // called. Othertwise, if a task is executed immediately from the release_deps
+ // code, the flag will be reset to 1 again by this same function
+ KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
+ taskdata->td_flags.executing = 0; // suspend the finishing task
+
+ KA_TRACE(
+ 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
+ gtid, taskdata, children));
#if OMP_40_ENABLED
- /* If the tasks' destructor thunk flag has been set, we need to invoke the
- destructor thunk that has been generated by the compiler.
- The code is placed here, since at this point other tasks might have been released
- hence overlapping the destructor invokations with some other work in the
- released tasks. The OpenMP spec is not specific on when the destructors are
- invoked, so we should be free to choose.
- */
- if (taskdata->td_flags.destructors_thunk) {
- kmp_routine_entry_t destr_thunk = task->data1.destructors;
- KMP_ASSERT(destr_thunk);
- destr_thunk(gtid, task);
- }
+ /* If the tasks' destructor thunk flag has been set, we need to invoke the
+ destructor thunk that has been generated by the compiler. The code is
+ placed here, since at this point other tasks might have been released
+ hence overlapping the destructor invokations with some other work in the
+ released tasks. The OpenMP spec is not specific on when the destructors
+ are invoked, so we should be free to choose. */
+ if (taskdata->td_flags.destructors_thunk) {
+ kmp_routine_entry_t destr_thunk = task->data1.destructors;
+ KMP_ASSERT(destr_thunk);
+ destr_thunk(gtid, task);
+ }
#endif // OMP_40_ENABLED
- // bookkeeping for resuming task:
- // GEH - note tasking_ser => task_serial
- KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
- taskdata->td_flags.task_serial);
- if ( taskdata->td_flags.task_serial )
- {
- if (resumed_task == NULL) {
- resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
- }
- else
+ // bookkeeping for resuming task:
+ // GEH - note tasking_ser => task_serial
+ KMP_DEBUG_ASSERT(
+ (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
+ taskdata->td_flags.task_serial);
+ if (taskdata->td_flags.task_serial) {
+ if (resumed_task == NULL) {
+ resumed_task = taskdata->td_parent; // In a serialized task, the resumed
+ // task is the parent
+ } else
#if OMP_45_ENABLED
- if ( !(task_team && task_team->tt.tt_found_proxy_tasks) )
+ if (!(task_team && task_team->tt.tt_found_proxy_tasks))
#endif
- {
- // verify resumed task passed in points to parent
- KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
- }
- }
- else {
- KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
+ {
+ // verify resumed task passed in points to parent
+ KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent);
}
+ } else {
+ KMP_DEBUG_ASSERT(resumed_task !=
+ NULL); // verify that resumed task is passed as arguemnt
+ }
+
+ // Free this task and then ancestor tasks if they have no children.
+ // Restore th_current_task first as suggested by John:
+ // johnmc: if an asynchronous inquiry peers into the runtime system
+ // it doesn't see the freed task as the current task.
+ thread->th.th_current_task = resumed_task;
+ __kmp_free_task_and_ancestors(gtid, taskdata, thread);
+
+ // TODO: GEH - make sure root team implicit task is initialized properly.
+ // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
+ resumed_task->td_flags.executing = 1; // resume previous task
+
+ KA_TRACE(
+ 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
+ gtid, taskdata, resumed_task));
- // Free this task and then ancestor tasks if they have no children.
- // Restore th_current_task first as suggested by John:
- // johnmc: if an asynchronous inquiry peers into the runtime system
- // it doesn't see the freed task as the current task.
- thread->th.th_current_task = resumed_task;
- __kmp_free_task_and_ancestors(gtid, taskdata, thread);
-
- // TODO: GEH - make sure root team implicit task is initialized properly.
- // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
- resumed_task->td_flags.executing = 1; // resume previous task
-
- KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
- gtid, taskdata, resumed_task) );
-
- return;
+ return;
}
-//---------------------------------------------------------------------
// __kmpc_omp_task_complete_if0: report that a task has completed execution
+//
// loc_ref: source location information; points to end of task block.
// gtid: global thread number.
// task: task thunk for the completed task.
-
-void
-__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
-{
- KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
- gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
- __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
-
- KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
- gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
- return;
+void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *task) {
+ KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
+ gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
+ // this routine will provide task to resume
+ __kmp_task_finish(gtid, task, NULL);
+
+ KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
+ gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
+ return;
}
#ifdef TASK_UNUSED
-//---------------------------------------------------------------------
// __kmpc_omp_task_complete: report that a task has completed execution
// NEVER GENERATED BY COMPILER, DEPRECATED!!!
-
-void
-__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
-{
- KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
- gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
- __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
-
- KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
- gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
- return;
+void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *task) {
+ KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
+ loc_ref, KMP_TASK_TO_TASKDATA(task)));
+
+ __kmp_task_finish(gtid, task, NULL); // Not sure how to find task to resume
+
+ KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
+ loc_ref, KMP_TASK_TO_TASKDATA(task)));
+ return;
}
#endif // TASK_UNUSED
-
#if OMPT_SUPPORT
-//----------------------------------------------------------------------------------------------------
-// __kmp_task_init_ompt:
-// Initialize OMPT fields maintained by a task. This will only be called after
-// ompt_tool, so we already know whether ompt is enabled or not.
-
-static inline void
-__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
-{
- if (ompt_enabled) {
- task->ompt_task_info.task_id = __ompt_task_id_new(tid);
- task->ompt_task_info.function = function;
- task->ompt_task_info.frame.exit_runtime_frame = NULL;
- task->ompt_task_info.frame.reenter_runtime_frame = NULL;
+// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will
+// only be called after ompt_tool, so we already know whether ompt is enabled
+// or not.
+static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid,
+ void *function) {
+ if (ompt_enabled) {
+ task->ompt_task_info.task_id = __ompt_task_id_new(tid);
+ task->ompt_task_info.function = function;
+ task->ompt_task_info.frame.exit_runtime_frame = NULL;
+ task->ompt_task_info.frame.reenter_runtime_frame = NULL;
#if OMP_40_ENABLED
- task->ompt_task_info.ndeps = 0;
- task->ompt_task_info.deps = NULL;
+ task->ompt_task_info.ndeps = 0;
+ task->ompt_task_info.deps = NULL;
#endif /* OMP_40_ENABLED */
- }
+ }
}
#endif
-
-//----------------------------------------------------------------------------------------------------
-// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
+// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
+// task for a given thread
//
// loc_ref: reference to source location of parallel region
// this_thr: thread data structure corresponding to implicit task
// team: team for this_thr
// tid: thread id of given thread within team
// set_curr_task: TRUE if need to push current task to thread
-// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
+// NOTE: Routine does not set up the implicit task ICVS. This is assumed to
+// have already been done elsewhere.
// TODO: Get better loc_ref. Value passed in may be NULL
-
-void
-__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
-{
- kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
-
- KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
- tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
-
- task->td_task_id = KMP_GEN_TASK_ID();
- task->td_team = team;
-// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
- task->td_ident = loc_ref;
- task->td_taskwait_ident = NULL;
- task->td_taskwait_counter = 0;
- task->td_taskwait_thread = 0;
-
- task->td_flags.tiedness = TASK_TIED;
- task->td_flags.tasktype = TASK_IMPLICIT;
-#if OMP_45_ENABLED
- task->td_flags.proxy = TASK_FULL;
-#endif
-
- // All implicit tasks are executed immediately, not deferred
- task->td_flags.task_serial = 1;
- task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
- task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
-
- task->td_flags.started = 1;
- task->td_flags.executing = 1;
- task->td_flags.complete = 0;
- task->td_flags.freed = 0;
+void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
+ kmp_team_t *team, int tid, int set_curr_task) {
+ kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
+
+ KF_TRACE(
+ 10,
+ ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
+ tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
+
+ task->td_task_id = KMP_GEN_TASK_ID();
+ task->td_team = team;
+ // task->td_parent = NULL; // fix for CQ230101 (broken parent task info
+ // in debugger)
+ task->td_ident = loc_ref;
+ task->td_taskwait_ident = NULL;
+ task->td_taskwait_counter = 0;
+ task->td_taskwait_thread = 0;
+
+ task->td_flags.tiedness = TASK_TIED;
+ task->td_flags.tasktype = TASK_IMPLICIT;
+#if OMP_45_ENABLED
+ task->td_flags.proxy = TASK_FULL;
+#endif
+
+ // All implicit tasks are executed immediately, not deferred
+ task->td_flags.task_serial = 1;
+ task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
+ task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
+
+ task->td_flags.started = 1;
+ task->td_flags.executing = 1;
+ task->td_flags.complete = 0;
+ task->td_flags.freed = 0;
#if OMP_40_ENABLED
- task->td_depnode = NULL;
+ task->td_depnode = NULL;
#endif
- if (set_curr_task) { // only do this initialization the first time a thread is created
- task->td_incomplete_child_tasks = 0;
- task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
+ if (set_curr_task) { // only do this init first time thread is created
+ task->td_incomplete_child_tasks = 0;
+ task->td_allocated_child_tasks = 0; // Not used: don't need to
+// deallocate implicit task
#if OMP_40_ENABLED
- task->td_taskgroup = NULL; // An implicit task does not have taskgroup
- task->td_dephash = NULL;
+ task->td_taskgroup = NULL; // An implicit task does not have taskgroup
+ task->td_dephash = NULL;
#endif
- __kmp_push_current_task_to_thread( this_thr, team, tid );
- } else {
- KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
- KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
- }
+ __kmp_push_current_task_to_thread(this_thr, team, tid);
+ } else {
+ KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
+ KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
+ }
#if OMPT_SUPPORT
- __kmp_task_init_ompt(task, tid, NULL);
+ __kmp_task_init_ompt(task, tid, NULL);
#endif
- KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
- tid, team, task ) );
+ KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
+ team, task));
}
-
-//-----------------------------------------------------------------------------
-//// __kmp_finish_implicit_task: Release resources associated to implicit tasks
-//// at the end of parallel regions. Some resources are kept for reuse in the
-//// next parallel region.
-////
-//// thread: thread data structure corresponding to implicit task
+// __kmp_finish_implicit_task: Release resources associated to implicit tasks
+// at the end of parallel regions. Some resources are kept for reuse in the next
+// parallel region.
//
-void
-__kmp_finish_implicit_task(kmp_info_t *thread)
-{
- kmp_taskdata_t *task = thread->th.th_current_task;
- if (task->td_dephash)
- __kmp_dephash_free_entries(thread, task->td_dephash);
+// thread: thread data structure corresponding to implicit task
+void __kmp_finish_implicit_task(kmp_info_t *thread) {
+ kmp_taskdata_t *task = thread->th.th_current_task;
+ if (task->td_dephash)
+ __kmp_dephash_free_entries(thread, task->td_dephash);
}
-
-//-----------------------------------------------------------------------------
-//// __kmp_free_implicit_task: Release resources associated to implicit tasks
-//// when these are destroyed regions
-////
-//// thread: thread data structure corresponding to implicit task
+// __kmp_free_implicit_task: Release resources associated to implicit tasks
+// when these are destroyed regions
//
-void
-__kmp_free_implicit_task(kmp_info_t *thread)
-{
- kmp_taskdata_t *task = thread->th.th_current_task;
- if (task->td_dephash)
- __kmp_dephash_free(thread, task->td_dephash);
- task->td_dephash = NULL;
+// thread: thread data structure corresponding to implicit task
+void __kmp_free_implicit_task(kmp_info_t *thread) {
+ kmp_taskdata_t *task = thread->th.th_current_task;
+ if (task->td_dephash)
+ __kmp_dephash_free(thread, task->td_dephash);
+ task->td_dephash = NULL;
}
-
-// Round up a size to a power of two specified by val
-// Used to insert padding between structures co-allocated using a single malloc() call
-static size_t
-__kmp_round_up_to_val( size_t size, size_t val ) {
- if ( size & ( val - 1 ) ) {
- size &= ~ ( val - 1 );
- if ( size <= KMP_SIZE_T_MAX - val ) {
- size += val; // Round up if there is no overflow.
- }; // if
+// Round up a size to a power of two specified by val: Used to insert padding
+// between structures co-allocated using a single malloc() call
+static size_t __kmp_round_up_to_val(size_t size, size_t val) {
+ if (size & (val - 1)) {
+ size &= ~(val - 1);
+ if (size <= KMP_SIZE_T_MAX - val) {
+ size += val; // Round up if there is no overflow.
}; // if
- return size;
+ }; // if
+ return size;
} // __kmp_round_up_to_va
-
-//---------------------------------------------------------------------------------
// __kmp_task_alloc: Allocate the taskdata and task data structures for a task
//
// loc_ref: source location information
// gtid: global thread number.
-// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
-// Converted from kmp_int32 to kmp_tasking_flags_t in routine.
-// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
-// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
+// flags: include tiedness & task type (explicit vs. implicit) of the ''new''
+// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
+// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including
+// private vars accessed in task.
+// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed
+// in task.
// task_entry: Pointer to task code entry point generated by compiler.
// returns: a pointer to the allocated kmp_task_t structure (task).
-
-kmp_task_t *
-__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
- size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- kmp_routine_entry_t task_entry )
-{
- kmp_task_t *task;
- kmp_taskdata_t *taskdata;
- kmp_info_t *thread = __kmp_threads[ gtid ];
- kmp_team_t *team = thread->th.th_team;
- kmp_taskdata_t *parent_task = thread->th.th_current_task;
- size_t shareds_offset;
-
- KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
- "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
- gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
- sizeof_shareds, task_entry) );
-
- if ( parent_task->td_flags.final ) {
- if (flags->merged_if0) {
- }
- flags->final = 1;
- }
-
-#if OMP_45_ENABLED
- if ( flags->proxy == TASK_PROXY ) {
- flags->tiedness = TASK_UNTIED;
- flags->merged_if0 = 1;
-
- /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
- if ( (thread->th.th_task_team) == NULL ) {
- /* This should only happen if the team is serialized
- setup a task team and propagate it to the thread
- */
- KMP_DEBUG_ASSERT(team->t.t_serialized);
- KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
- __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
- thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
- }
- kmp_task_team_t * task_team = thread->th.th_task_team;
-
- /* tasking must be enabled now as the task might not be pushed */
- if ( !KMP_TASKING_ENABLED( task_team ) ) {
- KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
- __kmp_enable_tasking( task_team, thread );
- kmp_int32 tid = thread->th.th_info.ds.ds_tid;
- kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
- // No lock needed since only owner can allocate
- if (thread_data -> td.td_deque == NULL ) {
- __kmp_alloc_task_deque( thread, thread_data );
- }
- }
-
- if ( task_team->tt.tt_found_proxy_tasks == FALSE )
- TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
+kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_tasking_flags_t *flags,
+ size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry) {
+ kmp_task_t *task;
+ kmp_taskdata_t *taskdata;
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_team_t *team = thread->th.th_team;
+ kmp_taskdata_t *parent_task = thread->th.th_current_task;
+ size_t shareds_offset;
+
+ KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
+ "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+ gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
+ sizeof_shareds, task_entry));
+
+ if (parent_task->td_flags.final) {
+ if (flags->merged_if0) {
+ }
+ flags->final = 1;
+ }
+
+#if OMP_45_ENABLED
+ if (flags->proxy == TASK_PROXY) {
+ flags->tiedness = TASK_UNTIED;
+ flags->merged_if0 = 1;
+
+ /* are we running in a sequential parallel or tskm_immediate_exec... we need
+ tasking support enabled */
+ if ((thread->th.th_task_team) == NULL) {
+ /* This should only happen if the team is serialized
+ setup a task team and propagate it to the thread */
+ KMP_DEBUG_ASSERT(team->t.t_serialized);
+ KA_TRACE(30,
+ ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
+ gtid));
+ __kmp_task_team_setup(
+ thread, team,
+ 1); // 1 indicates setup the current team regardless of nthreads
+ thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
+ }
+ kmp_task_team_t *task_team = thread->th.th_task_team;
+
+ /* tasking must be enabled now as the task might not be pushed */
+ if (!KMP_TASKING_ENABLED(task_team)) {
+ KA_TRACE(
+ 30,
+ ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
+ __kmp_enable_tasking(task_team, thread);
+ kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+ kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
+ // No lock needed since only owner can allocate
+ if (thread_data->td.td_deque == NULL) {
+ __kmp_alloc_task_deque(thread, thread_data);
+ }
}
-#endif
-
- // Calculate shared structure offset including padding after kmp_task_t struct
- // to align pointers in shared struct
- shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
- shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
- // Allocate a kmp_taskdata_t block and a kmp_task_t block.
- KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
- gtid, shareds_offset) );
- KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
- gtid, sizeof_shareds) );
+ if (task_team->tt.tt_found_proxy_tasks == FALSE)
+ TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
+ }
+#endif
+
+ // Calculate shared structure offset including padding after kmp_task_t struct
+ // to align pointers in shared struct
+ shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
+ shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
+
+ // Allocate a kmp_taskdata_t block and a kmp_task_t block.
+ KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
+ shareds_offset));
+ KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
+ sizeof_shareds));
+
+// Avoid double allocation here by combining shareds with taskdata
+#if USE_FAST_MEMORY
+ taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
+ sizeof_shareds);
+#else /* ! USE_FAST_MEMORY */
+ taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
+ sizeof_shareds);
+#endif /* USE_FAST_MEMORY */
+ ANNOTATE_HAPPENS_AFTER(taskdata);
- // Avoid double allocation here by combining shareds with taskdata
- #if USE_FAST_MEMORY
- taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
- #else /* ! USE_FAST_MEMORY */
- taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
- #endif /* USE_FAST_MEMORY */
- ANNOTATE_HAPPENS_AFTER(taskdata);
+ task = KMP_TASKDATA_TO_TASK(taskdata);
- task = KMP_TASKDATA_TO_TASK(taskdata);
-
- // Make sure task & taskdata are aligned appropriately
+// Make sure task & taskdata are aligned appropriately
#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
- KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
- KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
+ KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
+ KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
#else
- KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
- KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
+ KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
+ KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
#endif
- if (sizeof_shareds > 0) {
- // Avoid double allocation here by combining shareds with taskdata
- task->shareds = & ((char *) taskdata)[ shareds_offset ];
- // Make sure shareds struct is aligned to pointer size
- KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
- } else {
- task->shareds = NULL;
- }
- task->routine = task_entry;
- task->part_id = 0; // AC: Always start with 0 part id
-
- taskdata->td_task_id = KMP_GEN_TASK_ID();
- taskdata->td_team = team;
- taskdata->td_alloc_thread = thread;
- taskdata->td_parent = parent_task;
- taskdata->td_level = parent_task->td_level + 1; // increment nesting level
- taskdata->td_untied_count = 0;
- taskdata->td_ident = loc_ref;
- taskdata->td_taskwait_ident = NULL;
- taskdata->td_taskwait_counter = 0;
- taskdata->td_taskwait_thread = 0;
- KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
-#if OMP_45_ENABLED
- // avoid copying icvs for proxy tasks
- if ( flags->proxy == TASK_FULL )
-#endif
- copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
-
- taskdata->td_flags.tiedness = flags->tiedness;
- taskdata->td_flags.final = flags->final;
- taskdata->td_flags.merged_if0 = flags->merged_if0;
+ if (sizeof_shareds > 0) {
+ // Avoid double allocation here by combining shareds with taskdata
+ task->shareds = &((char *)taskdata)[shareds_offset];
+ // Make sure shareds struct is aligned to pointer size
+ KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
+ 0);
+ } else {
+ task->shareds = NULL;
+ }
+ task->routine = task_entry;
+ task->part_id = 0; // AC: Always start with 0 part id
+
+ taskdata->td_task_id = KMP_GEN_TASK_ID();
+ taskdata->td_team = team;
+ taskdata->td_alloc_thread = thread;
+ taskdata->td_parent = parent_task;
+ taskdata->td_level = parent_task->td_level + 1; // increment nesting level
+ taskdata->td_untied_count = 0;
+ taskdata->td_ident = loc_ref;
+ taskdata->td_taskwait_ident = NULL;
+ taskdata->td_taskwait_counter = 0;
+ taskdata->td_taskwait_thread = 0;
+ KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
+#if OMP_45_ENABLED
+ // avoid copying icvs for proxy tasks
+ if (flags->proxy == TASK_FULL)
+#endif
+ copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
+
+ taskdata->td_flags.tiedness = flags->tiedness;
+ taskdata->td_flags.final = flags->final;
+ taskdata->td_flags.merged_if0 = flags->merged_if0;
#if OMP_40_ENABLED
- taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
+ taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
#endif // OMP_40_ENABLED
#if OMP_45_ENABLED
- taskdata->td_flags.proxy = flags->proxy;
- taskdata->td_task_team = thread->th.th_task_team;
- taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
+ taskdata->td_flags.proxy = flags->proxy;
+ taskdata->td_task_team = thread->th.th_task_team;
+ taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
#endif
- taskdata->td_flags.tasktype = TASK_EXPLICIT;
+ taskdata->td_flags.tasktype = TASK_EXPLICIT;
+
+ // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
+ taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
- // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
- taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
+ // GEH - TODO: fix this to copy parent task's value of team_serial flag
+ taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
- // GEH - TODO: fix this to copy parent task's value of team_serial flag
- taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
+ // GEH - Note we serialize the task if the team is serialized to make sure
+ // implicit parallel region tasks are not left until program termination to
+ // execute. Also, it helps locality to execute immediately.
- // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
- // tasks are not left until program termination to execute. Also, it helps locality to execute
- // immediately.
- taskdata->td_flags.task_serial = ( parent_task->td_flags.final
- || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
+ taskdata->td_flags.task_serial =
+ (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
+ taskdata->td_flags.tasking_ser);
- taskdata->td_flags.started = 0;
- taskdata->td_flags.executing = 0;
- taskdata->td_flags.complete = 0;
- taskdata->td_flags.freed = 0;
+ taskdata->td_flags.started = 0;
+ taskdata->td_flags.executing = 0;
+ taskdata->td_flags.complete = 0;
+ taskdata->td_flags.freed = 0;
- taskdata->td_flags.native = flags->native;
+ taskdata->td_flags.native = flags->native;
- taskdata->td_incomplete_child_tasks = 0;
- taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
+ taskdata->td_incomplete_child_tasks = 0;
+ taskdata->td_allocated_child_tasks = 1; // start at one because counts current
+// task and children
#if OMP_40_ENABLED
- taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
- taskdata->td_dephash = NULL;
- taskdata->td_depnode = NULL;
+ taskdata->td_taskgroup =
+ parent_task->td_taskgroup; // task inherits taskgroup from the parent task
+ taskdata->td_dephash = NULL;
+ taskdata->td_depnode = NULL;
#endif
- // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
+// Only need to keep track of child task counts if team parallel and tasking not
+// serialized or if it is a proxy task
#if OMP_45_ENABLED
- if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
+ if (flags->proxy == TASK_PROXY ||
+ !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
#else
- if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
+ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
#endif
- {
- KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
+ {
+ KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks));
#if OMP_40_ENABLED
- if ( parent_task->td_taskgroup )
- KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
+ if (parent_task->td_taskgroup)
+ KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
#endif
- // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
- if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
- KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
- }
- }
-
- KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
- gtid, taskdata, taskdata->td_parent) );
- ANNOTATE_HAPPENS_BEFORE(task);
+ // Only need to keep track of allocated child tasks for explicit tasks since
+ // implicit not deallocated
+ if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
+ KMP_TEST_THEN_INC32(
+ (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks));
+ }
+ }
+
+ KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
+ gtid, taskdata, taskdata->td_parent));
+ ANNOTATE_HAPPENS_BEFORE(task);
#if OMPT_SUPPORT
- __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
+ __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry);
#endif
- return task;
+ return task;
}
+kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_int32 flags, size_t sizeof_kmp_task_t,
+ size_t sizeof_shareds,
+ kmp_routine_entry_t task_entry) {
+ kmp_task_t *retval;
+ kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
-kmp_task_t *
-__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
- size_t sizeof_kmp_task_t, size_t sizeof_shareds,
- kmp_routine_entry_t task_entry )
-{
- kmp_task_t *retval;
- kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
-
- input_flags->native = FALSE;
- // __kmp_task_alloc() sets up all other runtime flags
+ input_flags->native = FALSE;
+// __kmp_task_alloc() sets up all other runtime flags
#if OMP_45_ENABLED
- KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
- "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
- gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
- input_flags->proxy ? "proxy" : "",
- sizeof_kmp_task_t, sizeof_shareds, task_entry) );
+ KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
+ "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+ gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
+ input_flags->proxy ? "proxy" : "", sizeof_kmp_task_t,
+ sizeof_shareds, task_entry));
#else
- KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
- "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
- gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
- sizeof_kmp_task_t, sizeof_shareds, task_entry) );
+ KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
+ "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+ gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
+ sizeof_kmp_task_t, sizeof_shareds, task_entry));
#endif
- retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
- sizeof_shareds, task_entry );
+ retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
+ sizeof_shareds, task_entry);
- KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
+ KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
- return retval;
+ return retval;
}
-//-----------------------------------------------------------
// __kmp_invoke_task: invoke the specified task
//
// gtid: global thread ID of caller
// task: the task to invoke
// current_task: the task to resume after task invokation
-
-static void
-__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- kmp_uint64 cur_time;
+static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
+ kmp_taskdata_t *current_task) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_uint64 cur_time;
#if OMP_40_ENABLED
- int discard = 0 /* false */;
+ int discard = 0 /* false */;
#endif
- KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
- gtid, taskdata, current_task) );
- KMP_DEBUG_ASSERT(task);
-#if OMP_45_ENABLED
- if ( taskdata->td_flags.proxy == TASK_PROXY &&
- taskdata->td_flags.complete == 1)
- {
- // This is a proxy task that was already completed but it needs to run
- // its bottom-half finish
- KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
- gtid, taskdata) );
+ KA_TRACE(
+ 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
+ gtid, taskdata, current_task));
+ KMP_DEBUG_ASSERT(task);
+#if OMP_45_ENABLED
+ if (taskdata->td_flags.proxy == TASK_PROXY &&
+ taskdata->td_flags.complete == 1) {
+ // This is a proxy task that was already completed but it needs to run
+ // its bottom-half finish
+ KA_TRACE(
+ 30,
+ ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
+ gtid, taskdata));
+
+ __kmp_bottom_half_finish_proxy(gtid, task);
+
+ KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
+ "proxy task %p, resuming task %p\n",
+ gtid, taskdata, current_task));
- __kmp_bottom_half_finish_proxy(gtid,task);
-
- KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
-
- return;
- }
+ return;
+ }
#endif
#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if(__kmp_forkjoin_frames_mode == 3) {
- // Get the current time stamp to measure task execution time to correct barrier imbalance time
- cur_time = __itt_get_timestamp();
- }
+ if (__kmp_forkjoin_frames_mode == 3) {
+ // Get the current time stamp to measure task execution time to correct
+ // barrier imbalance time
+ cur_time = __itt_get_timestamp();
+ }
#endif
#if OMP_45_ENABLED
- // Proxy tasks are not handled by the runtime
- if ( taskdata->td_flags.proxy != TASK_PROXY ) {
+ // Proxy tasks are not handled by the runtime
+ if (taskdata->td_flags.proxy != TASK_PROXY) {
#endif
- ANNOTATE_HAPPENS_AFTER(task);
- __kmp_task_start( gtid, task, current_task );
+ ANNOTATE_HAPPENS_AFTER(task);
+ __kmp_task_start(gtid, task, current_task);
#if OMP_45_ENABLED
- }
+ }
#endif
#if OMPT_SUPPORT
- ompt_thread_info_t oldInfo;
- kmp_info_t * thread;
- if (ompt_enabled) {
- // Store the threads states and restore them after the task
- thread = __kmp_threads[ gtid ];
- oldInfo = thread->th.ompt_thread_info;
- thread->th.ompt_thread_info.wait_id = 0;
- thread->th.ompt_thread_info.state = ompt_state_work_parallel;
- taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
- }
+ ompt_thread_info_t oldInfo;
+ kmp_info_t *thread;
+ if (ompt_enabled) {
+ // Store the threads states and restore them after the task
+ thread = __kmp_threads[gtid];
+ oldInfo = thread->th.ompt_thread_info;
+ thread->th.ompt_thread_info.wait_id = 0;
+ thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+ taskdata->ompt_task_info.frame.exit_runtime_frame =
+ __builtin_frame_address(0);
+ }
#endif
#if OMP_40_ENABLED
- // TODO: cancel tasks if the parallel region has also been cancelled
- // TODO: check if this sequence can be hoisted above __kmp_task_start
- // if cancellation has been enabled for this run ...
- if (__kmp_omp_cancellation) {
- kmp_info_t *this_thr = __kmp_threads [ gtid ];
- kmp_team_t * this_team = this_thr->th.th_team;
- kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
- if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
- KMP_COUNT_BLOCK(TASK_cancelled);
- // this task belongs to a task group and we need to cancel it
- discard = 1 /* true */;
- }
- }
-
- //
- // Invoke the task routine and pass in relevant data.
- // Thunks generated by gcc take a different argument list.
- //
- if (!discard) {
+ // TODO: cancel tasks if the parallel region has also been cancelled
+ // TODO: check if this sequence can be hoisted above __kmp_task_start
+ // if cancellation has been enabled for this run ...
+ if (__kmp_omp_cancellation) {
+ kmp_info_t *this_thr = __kmp_threads[gtid];
+ kmp_team_t *this_team = this_thr->th.th_team;
+ kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
+ if ((taskgroup && taskgroup->cancel_request) ||
+ (this_team->t.t_cancel_request == cancel_parallel)) {
+ KMP_COUNT_BLOCK(TASK_cancelled);
+ // this task belongs to a task group and we need to cancel it
+ discard = 1 /* true */;
+ }
+ }
+
+ // Invoke the task routine and pass in relevant data.
+ // Thunks generated by gcc take a different argument list.
+ if (!discard) {
#if KMP_STATS_ENABLED
- KMP_COUNT_BLOCK(TASK_executed);
- switch(KMP_GET_THREAD_STATE()) {
- case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
- case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
- case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
- case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
- case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
- default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
- }
+ KMP_COUNT_BLOCK(TASK_executed);
+ switch (KMP_GET_THREAD_STATE()) {
+ case FORK_JOIN_BARRIER:
+ KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
+ break;
+ case PLAIN_BARRIER:
+ KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
+ break;
+ case TASKYIELD:
+ KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
+ break;
+ case TASKWAIT:
+ KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
+ break;
+ case TASKGROUP:
+ KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
+ break;
+ default:
+ KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
+ break;
+ }
#endif // KMP_STATS_ENABLED
#endif // OMP_40_ENABLED
#if OMPT_SUPPORT && OMPT_TRACE
- /* let OMPT know that we're about to run this task */
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_task_switch))
- {
- ompt_callbacks.ompt_callback(ompt_event_task_switch)(
- current_task->ompt_task_info.task_id,
- taskdata->ompt_task_info.task_id);
- }
+ /* let OMPT know that we're about to run this task */
+ if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
+ ompt_callbacks.ompt_callback(ompt_event_task_switch)(
+ current_task->ompt_task_info.task_id,
+ taskdata->ompt_task_info.task_id);
+ }
#endif
#ifdef KMP_GOMP_COMPAT
- if (taskdata->td_flags.native) {
- ((void (*)(void *))(*(task->routine)))(task->shareds);
- }
- else
+ if (taskdata->td_flags.native) {
+ ((void (*)(void *))(*(task->routine)))(task->shareds);
+ } else
#endif /* KMP_GOMP_COMPAT */
- {
- (*(task->routine))(gtid, task);
- }
- KMP_POP_PARTITIONED_TIMER();
+ {
+ (*(task->routine))(gtid, task);
+ }
+ KMP_POP_PARTITIONED_TIMER();
#if OMPT_SUPPORT && OMPT_TRACE
- /* let OMPT know that we're returning to the callee task */
- if (ompt_enabled &&
- ompt_callbacks.ompt_callback(ompt_event_task_switch))
- {
- ompt_callbacks.ompt_callback(ompt_event_task_switch)(
- taskdata->ompt_task_info.task_id,
- current_task->ompt_task_info.task_id);
- }
+ /* let OMPT know that we're returning to the callee task */
+ if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
+ ompt_callbacks.ompt_callback(ompt_event_task_switch)(
+ taskdata->ompt_task_info.task_id,
+ current_task->ompt_task_info.task_id);
+ }
#endif
#if OMP_40_ENABLED
- }
+ }
#endif // OMP_40_ENABLED
-
#if OMPT_SUPPORT
- if (ompt_enabled) {
- thread->th.ompt_thread_info = oldInfo;
- taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
- }
+ if (ompt_enabled) {
+ thread->th.ompt_thread_info = oldInfo;
+ taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
+ }
#endif
#if OMP_45_ENABLED
- // Proxy tasks are not handled by the runtime
- if ( taskdata->td_flags.proxy != TASK_PROXY ) {
+ // Proxy tasks are not handled by the runtime
+ if (taskdata->td_flags.proxy != TASK_PROXY) {
#endif
- ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
- __kmp_task_finish( gtid, task, current_task );
+ ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
+ __kmp_task_finish(gtid, task, current_task);
#if OMP_45_ENABLED
- }
+ }
#endif
#if USE_ITT_BUILD && USE_ITT_NOTIFY
- // Barrier imbalance - correct arrive time after the task finished
- if(__kmp_forkjoin_frames_mode == 3) {
- kmp_info_t *this_thr = __kmp_threads [ gtid ];
- if(this_thr->th.th_bar_arrive_time) {
- this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
- }
- }
-#endif
- KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
- gtid, taskdata, current_task) );
- return;
+ // Barrier imbalance - correct arrive time after the task finished
+ if (__kmp_forkjoin_frames_mode == 3) {
+ kmp_info_t *this_thr = __kmp_threads[gtid];
+ if (this_thr->th.th_bar_arrive_time) {
+ this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
+ }
+ }
+#endif
+ KA_TRACE(
+ 30,
+ ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
+ gtid, taskdata, current_task));
+ return;
}
-//-----------------------------------------------------------------------
// __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
//
// loc_ref: location of original task pragma (ignored)
// gtid: Global Thread ID of encountering thread
// new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
// Returns:
-// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
-// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
-
-kmp_int32
-__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
-{
- kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
-
- KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
- gtid, loc_ref, new_taskdata ) );
-
- /* Should we execute the new task or queue it? For now, let's just always try to
- queue it. If the queue fills up, then we'll execute it. */
-
- if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
- { // Execute this task immediately
- kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
- new_taskdata->td_flags.task_serial = 1;
- __kmp_invoke_task( gtid, new_task, current_task );
- }
+// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+// be resumed later.
+// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+// resumed later.
+kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task) {
+ kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+
+ KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
+ loc_ref, new_taskdata));
+
+ /* Should we execute the new task or queue it? For now, let's just always try
+ to queue it. If the queue fills up, then we'll execute it. */
+
+ if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
+ { // Execute this task immediately
+ kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
+ new_taskdata->td_flags.task_serial = 1;
+ __kmp_invoke_task(gtid, new_task, current_task);
+ }
+
+ KA_TRACE(
+ 10,
+ ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
+ "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
+ gtid, loc_ref, new_taskdata));
- KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
- "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
- new_taskdata ) );
-
- ANNOTATE_HAPPENS_BEFORE(new_task);
- return TASK_CURRENT_NOT_QUEUED;
+ ANNOTATE_HAPPENS_BEFORE(new_task);
+ return TASK_CURRENT_NOT_QUEUED;
}
-//---------------------------------------------------------------------
// __kmp_omp_task: Schedule a non-thread-switchable task for execution
+//
// gtid: Global Thread ID of encountering thread
-// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
-// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
-// returns:
-//
-// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
-// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
-kmp_int32
-__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
-{
- kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
+// serialize_immediate: if TRUE then if the task is executed immediately its
+// execution will be serialized
+// Returns:
+// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+// be resumed later.
+// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+// resumed later.
+kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
+ bool serialize_immediate) {
+ kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#if OMPT_SUPPORT
- if (ompt_enabled) {
- new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
- __builtin_frame_address(1);
- }
+ if (ompt_enabled) {
+ new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
+ __builtin_frame_address(1);
+ }
#endif
- /* Should we execute the new task or queue it? For now, let's just always try to
- queue it. If the queue fills up, then we'll execute it. */
+/* Should we execute the new task or queue it? For now, let's just always try to
+ queue it. If the queue fills up, then we'll execute it. */
#if OMP_45_ENABLED
- if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+ if (new_taskdata->td_flags.proxy == TASK_PROXY ||
+ __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
#else
- if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+ if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
#endif
- { // Execute this task immediately
- kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
- if ( serialize_immediate )
- new_taskdata -> td_flags.task_serial = 1;
- __kmp_invoke_task( gtid, new_task, current_task );
- }
+ { // Execute this task immediately
+ kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
+ if (serialize_immediate)
+ new_taskdata->td_flags.task_serial = 1;
+ __kmp_invoke_task(gtid, new_task, current_task);
+ }
#if OMPT_SUPPORT
- if (ompt_enabled) {
- new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
- }
+ if (ompt_enabled) {
+ new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
+ }
#endif
- ANNOTATE_HAPPENS_BEFORE(new_task);
- return TASK_CURRENT_NOT_QUEUED;
+ ANNOTATE_HAPPENS_BEFORE(new_task);
+ return TASK_CURRENT_NOT_QUEUED;
}
-//---------------------------------------------------------------------
-// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
-// the parent thread only!
+// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
+// non-thread-switchable task from the parent thread only!
+//
// loc_ref: location of original task pragma (ignored)
// gtid: Global Thread ID of encountering thread
-// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
-// returns:
-//
-// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
-// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
-
-kmp_int32
-__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
-{
- kmp_int32 res;
- KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
+// new_task: non-thread-switchable task thunk allocated by
+// __kmp_omp_task_alloc()
+// Returns:
+// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+// be resumed later.
+// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+// resumed later.
+kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task) {
+ kmp_int32 res;
+ KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
#if KMP_DEBUG
- kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+ kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
#endif
- KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
- gtid, loc_ref, new_taskdata ) );
+ KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
+ new_taskdata));
- res = __kmp_omp_task(gtid,new_task,true);
+ res = __kmp_omp_task(gtid, new_task, true);
- KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
- gtid, loc_ref, new_taskdata ) );
- return res;
+ KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
+ "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
+ gtid, loc_ref, new_taskdata));
+ return res;
}
-//-------------------------------------------------------------------------------------
-// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
+// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
+// complete
+kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
+ kmp_taskdata_t *taskdata;
+ kmp_info_t *thread;
+ int thread_finished = FALSE;
+ KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
-kmp_int32
-__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
-{
- kmp_taskdata_t * taskdata;
- kmp_info_t * thread;
- int thread_finished = FALSE;
- KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
-
- KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
+ KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
- if ( __kmp_tasking_mode != tskm_immediate_exec ) {
- // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
+ // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+ // begin wait?
- thread = __kmp_threads[ gtid ];
- taskdata = thread -> th.th_current_task;
+ thread = __kmp_threads[gtid];
+ taskdata = thread->th.th_current_task;
#if OMPT_SUPPORT && OMPT_TRACE
- ompt_task_id_t my_task_id;
- ompt_parallel_id_t my_parallel_id;
+ ompt_task_id_t my_task_id;
+ ompt_parallel_id_t my_parallel_id;
- if (ompt_enabled) {
- kmp_team_t *team = thread->th.th_team;
- my_task_id = taskdata->ompt_task_info.task_id;
- my_parallel_id = team->t.ompt_team_info.parallel_id;
-
- taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
- if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
- my_parallel_id, my_task_id);
- }
- }
+ if (ompt_enabled) {
+ kmp_team_t *team = thread->th.th_team;
+ my_task_id = taskdata->ompt_task_info.task_id;
+ my_parallel_id = team->t.ompt_team_info.parallel_id;
+
+ taskdata->ompt_task_info.frame.reenter_runtime_frame =
+ __builtin_frame_address(1);
+ if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
+ ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
+ my_task_id);
+ }
+ }
#endif
- // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
+// Debugger: The taskwait is active. Store location and thread encountered the
+// taskwait.
#if USE_ITT_BUILD
- // Note: These values are used by ITT events as well.
+// Note: These values are used by ITT events as well.
#endif /* USE_ITT_BUILD */
- taskdata->td_taskwait_counter += 1;
- taskdata->td_taskwait_ident = loc_ref;
- taskdata->td_taskwait_thread = gtid + 1;
+ taskdata->td_taskwait_counter += 1;
+ taskdata->td_taskwait_ident = loc_ref;
+ taskdata->td_taskwait_thread = gtid + 1;
#if USE_ITT_BUILD
- void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
- if ( itt_sync_obj != NULL )
- __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+ void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
+ if (itt_sync_obj != NULL)
+ __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
- bool must_wait = ! taskdata->td_flags.team_serial && ! taskdata->td_flags.final;
+ bool must_wait =
+ !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
#if OMP_45_ENABLED
- must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks);
+ must_wait = must_wait || (thread->th.th_task_team != NULL &&
+ thread->th.th_task_team->tt.tt_found_proxy_tasks);
#endif
- if (must_wait)
- {
- kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
- while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
- flag.execute_tasks(thread, gtid, FALSE, &thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
- }
- }
+ if (must_wait) {
+ kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
+ while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) {
+ flag.execute_tasks(thread, gtid, FALSE,
+ &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
+ __kmp_task_stealing_constraint);
+ }
+ }
#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+ if (itt_sync_obj != NULL)
+ __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
- // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
- // Debugger: The taskwait is completed. Location remains, but thread is negated.
- taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
+ // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+ // end of wait?
+ // Debugger: The taskwait is completed. Location remains, but thread is
+ // negated.
+ taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
#if OMPT_SUPPORT && OMPT_TRACE
- if (ompt_enabled) {
- if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
- ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
- my_parallel_id, my_task_id);
- }
- taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
- }
-#endif
- ANNOTATE_HAPPENS_AFTER(taskdata);
+ if (ompt_enabled) {
+ if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
+ ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
+ my_task_id);
+ }
+ taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
}
+#endif
+ ANNOTATE_HAPPENS_AFTER(taskdata);
+ }
- KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
- "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
+ KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
+ "returning TASK_CURRENT_NOT_QUEUED\n",
+ gtid, taskdata));
- return TASK_CURRENT_NOT_QUEUED;
+ return TASK_CURRENT_NOT_QUEUED;
}
-
-//-------------------------------------------------
// __kmpc_omp_taskyield: switch to a different task
-
-kmp_int32
-__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
-{
- kmp_taskdata_t * taskdata;
- kmp_info_t * thread;
- int thread_finished = FALSE;
-
- KMP_COUNT_BLOCK(OMP_TASKYIELD);
- KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
-
- KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
- gtid, loc_ref, end_part) );
-
- if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
- // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
-
- thread = __kmp_threads[ gtid ];
- taskdata = thread -> th.th_current_task;
- // Should we model this as a task wait or not?
- // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
+kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
+ kmp_taskdata_t *taskdata;
+ kmp_info_t *thread;
+ int thread_finished = FALSE;
+
+ KMP_COUNT_BLOCK(OMP_TASKYIELD);
+ KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
+
+ KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
+ gtid, loc_ref, end_part));
+
+ if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
+ // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+ // begin wait?
+
+ thread = __kmp_threads[gtid];
+ taskdata = thread->th.th_current_task;
+// Should we model this as a task wait or not?
+// Debugger: The taskwait is active. Store location and thread encountered the
+// taskwait.
#if USE_ITT_BUILD
- // Note: These values are used by ITT events as well.
+// Note: These values are used by ITT events as well.
#endif /* USE_ITT_BUILD */
- taskdata->td_taskwait_counter += 1;
- taskdata->td_taskwait_ident = loc_ref;
- taskdata->td_taskwait_thread = gtid + 1;
+ taskdata->td_taskwait_counter += 1;
+ taskdata->td_taskwait_ident = loc_ref;
+ taskdata->td_taskwait_thread = gtid + 1;
#if USE_ITT_BUILD
- void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
- if ( itt_sync_obj != NULL )
- __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+ void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
+ if (itt_sync_obj != NULL)
+ __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
- if ( ! taskdata->td_flags.team_serial ) {
- kmp_task_team_t * task_team = thread->th.th_task_team;
- if (task_team != NULL) {
- if (KMP_TASKING_ENABLED(task_team)) {
- __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
- }
- }
+ if (!taskdata->td_flags.team_serial) {
+ kmp_task_team_t *task_team = thread->th.th_task_team;
+ if (task_team != NULL) {
+ if (KMP_TASKING_ENABLED(task_team)) {
+ __kmp_execute_tasks_32(
+ thread, gtid, NULL, FALSE,
+ &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
+ __kmp_task_stealing_constraint);
}
+ }
+ }
#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+ if (itt_sync_obj != NULL)
+ __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
- // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
- // Debugger: The taskwait is completed. Location remains, but thread is negated.
- taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
- }
+ // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+ // end of wait?
+ // Debugger: The taskwait is completed. Location remains, but thread is
+ // negated.
+ taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
+ }
+
+ KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
+ "returning TASK_CURRENT_NOT_QUEUED\n",
+ gtid, taskdata));
- KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
- "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
-
- return TASK_CURRENT_NOT_QUEUED;
+ return TASK_CURRENT_NOT_QUEUED;
}
// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
#if OMP_45_ENABLED
-//
// Task Reduction implementation
-//
typedef struct kmp_task_red_flags {
- unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects)
- unsigned reserved31 : 31;
+ unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects)
+ unsigned reserved31 : 31;
} kmp_task_red_flags_t;
// internal structure for reduction data item related info
typedef struct kmp_task_red_data {
- void *reduce_shar; // shared reduction item
- size_t reduce_size; // size of data item
- void *reduce_priv; // thread specific data
- void *reduce_pend; // end of private data for comparison op
- void *reduce_init; // data initialization routine
- void *reduce_fini; // data finalization routine
- void *reduce_comb; // data combiner routine
- kmp_task_red_flags_t flags; // flags for additional info from compiler
+ void *reduce_shar; // shared reduction item
+ size_t reduce_size; // size of data item
+ void *reduce_priv; // thread specific data
+ void *reduce_pend; // end of private data for comparison op
+ void *reduce_init; // data initialization routine
+ void *reduce_fini; // data finalization routine
+ void *reduce_comb; // data combiner routine
+ kmp_task_red_flags_t flags; // flags for additional info from compiler
} kmp_task_red_data_t;
// structure sent us by compiler - one per reduction item
typedef struct kmp_task_red_input {
- void *reduce_shar; // shared reduction item
- size_t reduce_size; // size of data item
- void *reduce_init; // data initialization routine
- void *reduce_fini; // data finalization routine
- void *reduce_comb; // data combiner routine
- kmp_task_red_flags_t flags; // flags for additional info from compiler
+ void *reduce_shar; // shared reduction item
+ size_t reduce_size; // size of data item
+ void *reduce_init; // data initialization routine
+ void *reduce_fini; // data finalization routine
+ void *reduce_comb; // data combiner routine
+ kmp_task_red_flags_t flags; // flags for additional info from compiler
} kmp_task_red_input_t;
/*!
@@ -1638,58 +1655,57 @@ typedef struct kmp_task_red_input {
Initialize task reduction for the taskgroup.
*/
-void*
-__kmpc_task_reduction_init(int gtid, int num, void *data)
-{
- kmp_info_t * thread = __kmp_threads[gtid];
- kmp_taskgroup_t * tg = thread->th.th_current_task->td_taskgroup;
- kmp_int32 nth = thread->th.th_team_nproc;
- kmp_task_red_input_t *input = (kmp_task_red_input_t*)data;
- kmp_task_red_data_t *arr;
-
- // check input data just in case
- KMP_ASSERT(tg != NULL);
- KMP_ASSERT(data != NULL);
- KMP_ASSERT(num > 0);
- if (nth == 1) {
- KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
- gtid, tg));
- return (void*)tg;
- }
- KA_TRACE(10,("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
- gtid, tg, num));
- arr = (kmp_task_red_data_t*)__kmp_thread_malloc(thread, num * sizeof(kmp_task_red_data_t));
- for (int i = 0; i < num; ++i) {
- void(*f_init)(void*) = (void(*)(void*))(input[i].reduce_init);
- size_t size = input[i].reduce_size - 1;
- // round the size up to cache line per thread-specific item
- size += CACHE_LINE - size % CACHE_LINE;
- KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
- arr[i].reduce_shar = input[i].reduce_shar;
- arr[i].reduce_size = size;
- arr[i].reduce_init = input[i].reduce_init;
- arr[i].reduce_fini = input[i].reduce_fini;
- arr[i].reduce_comb = input[i].reduce_comb;
- arr[i].flags = input[i].flags;
- if (!input[i].flags.lazy_priv) {
- // allocate cache-line aligned block and fill it with zeros
- arr[i].reduce_priv = __kmp_allocate(nth * size);
- arr[i].reduce_pend = (char*)(arr[i].reduce_priv) + nth * size;
- if (f_init != NULL) {
- // initialize thread-specific items
- for (int j = 0; j < nth; ++j) {
- f_init((char*)(arr[i].reduce_priv) + j * size);
- }
- }
- } else {
- // only allocate space for pointers now,
- // objects will be lazily allocated/initialized once requested
- arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void*));
+void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
+ kmp_int32 nth = thread->th.th_team_nproc;
+ kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
+ kmp_task_red_data_t *arr;
+
+ // check input data just in case
+ KMP_ASSERT(tg != NULL);
+ KMP_ASSERT(data != NULL);
+ KMP_ASSERT(num > 0);
+ if (nth == 1) {
+ KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
+ gtid, tg));
+ return (void *)tg;
+ }
+ KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
+ gtid, tg, num));
+ arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
+ thread, num * sizeof(kmp_task_red_data_t));
+ for (int i = 0; i < num; ++i) {
+ void (*f_init)(void *) = (void (*)(void *))(input[i].reduce_init);
+ size_t size = input[i].reduce_size - 1;
+ // round the size up to cache line per thread-specific item
+ size += CACHE_LINE - size % CACHE_LINE;
+ KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
+ arr[i].reduce_shar = input[i].reduce_shar;
+ arr[i].reduce_size = size;
+ arr[i].reduce_init = input[i].reduce_init;
+ arr[i].reduce_fini = input[i].reduce_fini;
+ arr[i].reduce_comb = input[i].reduce_comb;
+ arr[i].flags = input[i].flags;
+ if (!input[i].flags.lazy_priv) {
+ // allocate cache-line aligned block and fill it with zeros
+ arr[i].reduce_priv = __kmp_allocate(nth * size);
+ arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
+ if (f_init != NULL) {
+ // initialize thread-specific items
+ for (int j = 0; j < nth; ++j) {
+ f_init((char *)(arr[i].reduce_priv) + j * size);
}
- }
- tg->reduce_data = (void*)arr;
- tg->reduce_num_data = num;
- return (void*)tg;
+ }
+ } else {
+ // only allocate space for pointers now,
+ // objects will be lazily allocated/initialized once requested
+ arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
+ }
+ }
+ tg->reduce_data = (void *)arr;
+ tg->reduce_num_data = num;
+ return (void *)tg;
}
/*!
@@ -1701,370 +1717,386 @@ __kmpc_task_reduction_init(int gtid, int
Get thread-specific location of data item
*/
-void*
-__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
-{
- kmp_info_t * thread = __kmp_threads[gtid];
- kmp_int32 nth = thread->th.th_team_nproc;
- if (nth == 1)
- return data; // nothing to do
-
- kmp_taskgroup_t *tg = (kmp_taskgroup_t*)tskgrp;
- if (tg == NULL)
- tg = thread->th.th_current_task->td_taskgroup;
- KMP_ASSERT(tg != NULL);
- kmp_task_red_data_t *arr = (kmp_task_red_data_t*)(tg->reduce_data);
- kmp_int32 num = tg->reduce_num_data;
- kmp_int32 tid = thread->th.th_info.ds.ds_tid;
-
- KMP_ASSERT(data != NULL);
- while (tg != NULL) {
- for (int i = 0; i < num; ++i) {
- if (!arr[i].flags.lazy_priv) {
- if (data == arr[i].reduce_shar ||
- (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
- return (char*)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
- } else {
- // check shared location first
- void **p_priv = (void**)(arr[i].reduce_priv);
- if (data == arr[i].reduce_shar)
+void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_int32 nth = thread->th.th_team_nproc;
+ if (nth == 1)
+ return data; // nothing to do
+
+ kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
+ if (tg == NULL)
+ tg = thread->th.th_current_task->td_taskgroup;
+ KMP_ASSERT(tg != NULL);
+ kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
+ kmp_int32 num = tg->reduce_num_data;
+ kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+
+ KMP_ASSERT(data != NULL);
+ while (tg != NULL) {
+ for (int i = 0; i < num; ++i) {
+ if (!arr[i].flags.lazy_priv) {
+ if (data == arr[i].reduce_shar ||
+ (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
+ return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
+ } else {
+ // check shared location first
+ void **p_priv = (void **)(arr[i].reduce_priv);
+ if (data == arr[i].reduce_shar)
+ goto found;
+ // check if we get some thread specific location as parameter
+ for (int j = 0; j < nth; ++j)
+ if (data == p_priv[j])
goto found;
- // check if we get some thread specific location as parameter
- for (int j = 0; j < nth; ++j)
- if (data == p_priv[j])
- goto found;
- continue; // not found, continue search
- found:
- if (p_priv[tid] == NULL) {
- // allocate thread specific object lazily
- void(*f_init)(void*) = (void(*)(void*))(arr[i].reduce_init);
- p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
- if (f_init != NULL) {
- f_init(p_priv[tid]);
- }
+ continue; // not found, continue search
+ found:
+ if (p_priv[tid] == NULL) {
+ // allocate thread specific object lazily
+ void (*f_init)(void *) = (void (*)(void *))(arr[i].reduce_init);
+ p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
+ if (f_init != NULL) {
+ f_init(p_priv[tid]);
}
- return p_priv[tid];
}
+ return p_priv[tid];
}
- tg = tg->parent;
- arr = (kmp_task_red_data_t*)(tg->reduce_data);
- num = tg->reduce_num_data;
}
- KMP_ASSERT2(0, "Unknown task reduction item");
- return NULL; // ERROR, this line never executed
+ tg = tg->parent;
+ arr = (kmp_task_red_data_t *)(tg->reduce_data);
+ num = tg->reduce_num_data;
+ }
+ KMP_ASSERT2(0, "Unknown task reduction item");
+ return NULL; // ERROR, this line never executed
}
// Finalize task reduction.
// Called from __kmpc_end_taskgroup()
-static void
-__kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg)
-{
- kmp_int32 nth = th->th.th_team_nproc;
- KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
- kmp_task_red_data_t *arr = (kmp_task_red_data_t*)tg->reduce_data;
- kmp_int32 num = tg->reduce_num_data;
- for (int i = 0; i < num; ++i) {
- void *sh_data = arr[i].reduce_shar;
- void(*f_fini)(void*) = (void(*)(void*))(arr[i].reduce_fini);
- void(*f_comb)(void*,void*) = (void(*)(void*,void*))(arr[i].reduce_comb);
- if (!arr[i].flags.lazy_priv) {
- void *pr_data = arr[i].reduce_priv;
- size_t size = arr[i].reduce_size;
- for (int j = 0; j < nth; ++j) {
- void * priv_data = (char*)pr_data + j * size;
- f_comb(sh_data, priv_data); // combine results
- if (f_fini)
- f_fini(priv_data); // finalize if needed
- }
- } else {
- void **pr_data = (void**)(arr[i].reduce_priv);
- for (int j = 0; j < nth; ++j) {
- if (pr_data[j] != NULL) {
- f_comb(sh_data, pr_data[j]); // combine results
- if (f_fini)
- f_fini(pr_data[j]); // finalize if needed
- __kmp_free(pr_data[j]);
- }
- }
+static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
+ kmp_int32 nth = th->th.th_team_nproc;
+ KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
+ kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
+ kmp_int32 num = tg->reduce_num_data;
+ for (int i = 0; i < num; ++i) {
+ void *sh_data = arr[i].reduce_shar;
+ void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
+ void (*f_comb)(void *, void *) =
+ (void (*)(void *, void *))(arr[i].reduce_comb);
+ if (!arr[i].flags.lazy_priv) {
+ void *pr_data = arr[i].reduce_priv;
+ size_t size = arr[i].reduce_size;
+ for (int j = 0; j < nth; ++j) {
+ void *priv_data = (char *)pr_data + j * size;
+ f_comb(sh_data, priv_data); // combine results
+ if (f_fini)
+ f_fini(priv_data); // finalize if needed
+ }
+ } else {
+ void **pr_data = (void **)(arr[i].reduce_priv);
+ for (int j = 0; j < nth; ++j) {
+ if (pr_data[j] != NULL) {
+ f_comb(sh_data, pr_data[j]); // combine results
+ if (f_fini)
+ f_fini(pr_data[j]); // finalize if needed
+ __kmp_free(pr_data[j]);
}
- __kmp_free(arr[i].reduce_priv);
+ }
}
- __kmp_thread_free(th, arr);
- tg->reduce_data = NULL;
- tg->reduce_num_data = 0;
+ __kmp_free(arr[i].reduce_priv);
+ }
+ __kmp_thread_free(th, arr);
+ tg->reduce_data = NULL;
+ tg->reduce_num_data = 0;
}
#endif
#if OMP_40_ENABLED
-//-------------------------------------------------------------------------------------
// __kmpc_taskgroup: Start a new taskgroup
-
-void
-__kmpc_taskgroup( ident_t* loc, int gtid )
-{
- kmp_info_t * thread = __kmp_threads[ gtid ];
- kmp_taskdata_t * taskdata = thread->th.th_current_task;
- kmp_taskgroup_t * tg_new =
- (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
- KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
- tg_new->count = 0;
- tg_new->cancel_request = cancel_noreq;
- tg_new->parent = taskdata->td_taskgroup;
+void __kmpc_taskgroup(ident_t *loc, int gtid) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *taskdata = thread->th.th_current_task;
+ kmp_taskgroup_t *tg_new =
+ (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
+ KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
+ tg_new->count = 0;
+ tg_new->cancel_request = cancel_noreq;
+ tg_new->parent = taskdata->td_taskgroup;
// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
#if OMP_45_ENABLED
- tg_new->reduce_data = NULL;
- tg_new->reduce_num_data = 0;
+ tg_new->reduce_data = NULL;
+ tg_new->reduce_num_data = 0;
#endif
- taskdata->td_taskgroup = tg_new;
+ taskdata->td_taskgroup = tg_new;
}
-
-//-------------------------------------------------------------------------------------
// __kmpc_end_taskgroup: Wait until all tasks generated by the current task
// and its descendants are complete
+void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *taskdata = thread->th.th_current_task;
+ kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
+ int thread_finished = FALSE;
+
+ KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
+ KMP_DEBUG_ASSERT(taskgroup != NULL);
+ KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
-void
-__kmpc_end_taskgroup( ident_t* loc, int gtid )
-{
- kmp_info_t * thread = __kmp_threads[ gtid ];
- kmp_taskdata_t * taskdata = thread->th.th_current_task;
- kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
- int thread_finished = FALSE;
-
- KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
- KMP_DEBUG_ASSERT( taskgroup != NULL );
- KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
-
- if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
#if USE_ITT_BUILD
- // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
- void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
- if ( itt_sync_obj != NULL )
- __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+ // For ITT the taskgroup wait is similar to taskwait until we need to
+ // distinguish them
+ void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
+ if (itt_sync_obj != NULL)
+ __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
#if OMP_45_ENABLED
- if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
+ if (!taskdata->td_flags.team_serial ||
+ (thread->th.th_task_team != NULL &&
+ thread->th.th_task_team->tt.tt_found_proxy_tasks))
#else
- if ( ! taskdata->td_flags.team_serial )
+ if (!taskdata->td_flags.team_serial)
#endif
- {
- kmp_flag_32 flag(&(taskgroup->count), 0U);
- while ( TCR_4(taskgroup->count) != 0 ) {
- flag.execute_tasks(thread, gtid, FALSE, &thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
- }
- }
+ {
+ kmp_flag_32 flag(&(taskgroup->count), 0U);
+ while (TCR_4(taskgroup->count) != 0) {
+ flag.execute_tasks(thread, gtid, FALSE,
+ &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
+ __kmp_task_stealing_constraint);
+ }
+ }
#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL )
- __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+ if (itt_sync_obj != NULL)
+ __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
#endif /* USE_ITT_BUILD */
- }
- KMP_DEBUG_ASSERT( taskgroup->count == 0 );
+ }
+ KMP_DEBUG_ASSERT(taskgroup->count == 0);
// TODO: change to OMP_50_ENABLED, need to change build tools for this to work
#if OMP_45_ENABLED
- if( taskgroup->reduce_data != NULL ) // need to reduce?
- __kmp_task_reduction_fini(thread, taskgroup);
+ if (taskgroup->reduce_data != NULL) // need to reduce?
+ __kmp_task_reduction_fini(thread, taskgroup);
#endif
- // Restore parent taskgroup for the current task
- taskdata->td_taskgroup = taskgroup->parent;
- __kmp_thread_free( thread, taskgroup );
-
- KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
- ANNOTATE_HAPPENS_AFTER(taskdata);
+ // Restore parent taskgroup for the current task
+ taskdata->td_taskgroup = taskgroup->parent;
+ __kmp_thread_free(thread, taskgroup);
+
+ KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
+ gtid, taskdata));
+ ANNOTATE_HAPPENS_AFTER(taskdata);
}
#endif
-
-//------------------------------------------------------
// __kmp_remove_my_task: remove a task from my own deque
+static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
+ kmp_task_team_t *task_team,
+ kmp_int32 is_constrained) {
+ kmp_task_t *task;
+ kmp_taskdata_t *taskdata;
+ kmp_thread_data_t *thread_data;
+ kmp_uint32 tail;
+
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+ KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
+ NULL); // Caller should check this condition
+
+ thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
+
+ KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+
+ if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
+ KA_TRACE(10,
+ ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+ return NULL;
+ }
+
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+ if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(10,
+ ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+ return NULL;
+ }
+
+ tail = (thread_data->td.td_deque_tail - 1) &
+ TASK_DEQUE_MASK(thread_data->td); // Wrap index.
+ taskdata = thread_data->td.td_deque[tail];
+
+ if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
+ // we need to check if the candidate obeys task scheduling constraint:
+ // only child of current task can be scheduled
+ kmp_taskdata_t *current = thread->th.th_current_task;
+ kmp_int32 level = current->td_level;
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ while (parent != current && parent->td_level > level) {
+ parent = parent->td_parent; // check generation up to the level of the
+ // current task
+ KMP_DEBUG_ASSERT(parent != NULL);
+ }
+ if (parent != current) {
+ // If the tail task is not a child, then no other child can appear in the
+ // deque.
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KA_TRACE(10,
+ ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+ return NULL;
+ }
+ }
+
+ thread_data->td.td_deque_tail = tail;
+ TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
+
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+ KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, taskdata, thread_data->td.td_deque_ntasks,
+ thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
-static kmp_task_t *
-__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
- kmp_int32 is_constrained )
-{
- kmp_task_t * task;
- kmp_taskdata_t * taskdata;
- kmp_thread_data_t *thread_data;
- kmp_uint32 tail;
-
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
-
- thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
-
- KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
- gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
- thread_data->td.td_deque_tail) );
-
- if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
- KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
- gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
- thread_data->td.td_deque_tail) );
- return NULL;
- }
-
- __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
-
- if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
- __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
- KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
- gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
- thread_data->td.td_deque_tail) );
- return NULL;
- }
-
- tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index.
- taskdata = thread_data -> td.td_deque[ tail ];
-
- if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
- // we need to check if the candidate obeys task scheduling constraint:
- // only child of current task can be scheduled
- kmp_taskdata_t * current = thread->th.th_current_task;
- kmp_int32 level = current->td_level;
- kmp_taskdata_t * parent = taskdata->td_parent;
- while ( parent != current && parent->td_level > level ) {
- parent = parent->td_parent; // check generation up to the level of the current task
- KMP_DEBUG_ASSERT(parent != NULL);
- }
- if ( parent != current ) {
- // If the tail task is not a child, then no other child can appear in the deque.
- __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
- KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
- gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
- thread_data->td.td_deque_tail) );
- return NULL;
- }
- }
-
- thread_data -> td.td_deque_tail = tail;
- TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
-
- __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
-
- KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
- gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
- thread_data->td.td_deque_tail) );
-
- task = KMP_TASKDATA_TO_TASK( taskdata );
- return task;
+ task = KMP_TASKDATA_TO_TASK(taskdata);
+ return task;
}
-
-//-----------------------------------------------------------
// __kmp_steal_task: remove a task from another thread's deque
// Assume that calling thread has already checked existence of
// task_team thread_data before calling this routine.
-
static kmp_task_t *
-__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
- volatile kmp_uint32 *unfinished_threads, int *thread_finished,
- kmp_int32 is_constrained )
-{
- kmp_task_t * task;
- kmp_taskdata_t * taskdata;
- kmp_thread_data_t *victim_td, *threads_data;
- kmp_int32 victim_tid;
-
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-
- threads_data = task_team -> tt.tt_threads_data;
- KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
-
- victim_tid = victim->th.th_info.ds.ds_tid;
- victim_td = & threads_data[ victim_tid ];
-
- KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
- "head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
- victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
-
- if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
- (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
- {
- KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
- victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
- return NULL;
- }
-
- __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
-
- // Check again after we acquire the lock
- if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
- (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
- {
- __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
- KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
- victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
- return NULL;
- }
-
- KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
-
- taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
- if ( is_constrained ) {
- // we need to check if the candidate obeys task scheduling constraint:
- // only descendant of current task can be scheduled
- kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
- kmp_int32 level = current->td_level;
- kmp_taskdata_t * parent = taskdata->td_parent;
- while ( parent != current && parent->td_level > level ) {
- parent = parent->td_parent; // check generation up to the level of the current task
- KMP_DEBUG_ASSERT(parent != NULL);
- }
- if ( parent != current ) {
- // If the head task is not a descendant of the current task then do not
- // steal it. No other task in victim's deque can be a descendant of the
- // current task.
- __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
- KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
- "ntasks=%d head=%u tail=%u\n",
- gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
- task_team, victim_td->td.td_deque_ntasks,
- victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
- return NULL;
- }
- }
- // Bump head pointer and Wrap.
- victim_td->td.td_deque_head = (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
- if (*thread_finished) {
- // We need to un-mark this victim as a finished victim. This must be done before
- // releasing the lock, or else other threads (starting with the master victim)
- // might be prematurely released from the barrier!!!
- kmp_uint32 count;
-
- count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
-
- KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
- gtid, count + 1, task_team) );
-
- *thread_finished = FALSE;
- }
- TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
-
- __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
-
- KMP_COUNT_BLOCK(TASK_stolen);
- KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
+__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
+ volatile kmp_uint32 *unfinished_threads, int *thread_finished,
+ kmp_int32 is_constrained)
+{
+ kmp_task_t *task;
+ kmp_taskdata_t *taskdata;
+ kmp_thread_data_t *victim_td, *threads_data;
+ kmp_int32 victim_tid;
+
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+
+ threads_data = task_team->tt.tt_threads_data;
+ KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
+
+ victim_tid = victim->th.th_info.ds.ds_tid;
+ victim_td = &threads_data[victim_tid];
+
+ KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
+ "task_team=%p ntasks=%d "
+ "head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim), task_team,
+ victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+ victim_td->td.td_deque_tail));
+
+ if ((TCR_4(victim_td->td.td_deque_ntasks) ==
+ 0) || // Caller should not check this condition
+ (TCR_PTR(victim->th.th_task_team) !=
+ task_team)) // GEH: why would this happen?
+ {
+ KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
+ "task_team=%p "
"ntasks=%d head=%u tail=%u\n",
- gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
+ gtid, __kmp_gtid_from_thread(victim), task_team,
victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
- victim_td->td.td_deque_tail) );
+ victim_td->td.td_deque_tail));
+ return NULL;
+ }
+
+ __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
+
+ // Check again after we acquire the lock
+ if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) ||
+ (TCR_PTR(victim->th.th_task_team) !=
+ task_team)) // GEH: why would this happen?
+ {
+ __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+ KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
+ "task_team=%p "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, __kmp_gtid_from_thread(victim), task_team,
+ victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+ victim_td->td.td_deque_tail));
+ return NULL;
+ }
+
+ KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
+
+ taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
+ if (is_constrained) {
+ // we need to check if the candidate obeys task scheduling constraint:
+ // only descendant of current task can be scheduled
+ kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task;
+ kmp_int32 level = current->td_level;
+ kmp_taskdata_t *parent = taskdata->td_parent;
+ while (parent != current && parent->td_level > level) {
+ parent = parent->td_parent; // check generation up to the level of the
+ // current task
+ KMP_DEBUG_ASSERT(parent != NULL);
+ }
+ if (parent != current) {
+ // If the head task is not a descendant of the current task then do not
+ // steal it. No other task in victim's deque can be a descendant of the
+ // current task.
+ __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+ KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from "
+ "T#%d: task_team=%p "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid,
+ __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr),
+ task_team, victim_td->td.td_deque_ntasks,
+ victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
+ return NULL;
+ }
+ }
+ // Bump head pointer and Wrap.
+ victim_td->td.td_deque_head =
+ (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
+ if (*thread_finished) {
+ // We need to un-mark this victim as a finished victim. This must be done
+ // before releasing the lock, or else other threads (starting with the
+ // master victim) might be prematurely released from the barrier!!!
+ kmp_uint32 count;
+
+ count = KMP_TEST_THEN_INC32((kmp_int32 *)unfinished_threads);
+
+ KA_TRACE(
+ 20,
+ ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
+ gtid, count + 1, task_team));
+
+ *thread_finished = FALSE;
+ }
+ TCW_4(victim_td->td.td_deque_ntasks,
+ TCR_4(victim_td->td.td_deque_ntasks) - 1);
+
+
+ __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+
+ KMP_COUNT_BLOCK(TASK_stolen);
+ KA_TRACE(
+ 10,
+ ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
+ "ntasks=%d head=%u tail=%u\n",
+ gtid, taskdata, __kmp_gtid_from_thread(victim), task_team,
+ victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+ victim_td->td.td_deque_tail));
- task = KMP_TASKDATA_TO_TASK( taskdata );
- return task;
+ task = KMP_TASKDATA_TO_TASK(taskdata);
+ return task;
}
-//-----------------------------------------------------------------------------
-// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
-// is statisfied (return true) or there are none left (return false).
+// __kmp_execute_tasks_template: Choose and execute tasks until either the
+// condition is statisfied (return true) or there are none left (return false).
+//
// final_spin is TRUE if this is the spin at the release barrier.
// thread_finished indicates whether the thread is finished executing all
// the tasks it has on its deque, and is at the release barrier.
@@ -2072,289 +2104,318 @@ __kmp_steal_task( kmp_info_t *victim, km
// spinner == NULL means only execute a single task and return.
// checker is the value to check to terminate the spin.
template <class C>
-static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
- int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
- kmp_task_team_t * task_team = thread->th.th_task_team;
- kmp_thread_data_t * threads_data;
- kmp_task_t * task;
- kmp_info_t * other_thread;
- kmp_taskdata_t * current_task = thread -> th.th_current_task;
- volatile kmp_uint32 * unfinished_threads;
- kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
-
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
-
- if (task_team == NULL) return FALSE;
-
- KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
- gtid, final_spin, *thread_finished) );
-
- thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
- threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
- KMP_DEBUG_ASSERT( threads_data != NULL );
+static inline int __kmp_execute_tasks_template(
+ kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ kmp_task_team_t *task_team = thread->th.th_task_team;
+ kmp_thread_data_t *threads_data;
+ kmp_task_t *task;
+ kmp_info_t *other_thread;
+ kmp_taskdata_t *current_task = thread->th.th_current_task;
+ volatile kmp_uint32 *unfinished_threads;
+ kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0,
+ tid = thread->th.th_info.ds.ds_tid;
+
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+ KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
+
+ if (task_team == NULL)
+ return FALSE;
+
+ KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
+ "*thread_finished=%d\n",
+ gtid, final_spin, *thread_finished));
+
+ thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
+ threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
+ KMP_DEBUG_ASSERT(threads_data != NULL);
- nthreads = task_team -> tt.tt_nproc;
- unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
+ nthreads = task_team->tt.tt_nproc;
+ unfinished_threads = &(task_team->tt.tt_unfinished_threads);
#if OMP_45_ENABLED
- KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
+ KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
#else
- KMP_DEBUG_ASSERT( nthreads > 1 );
+ KMP_DEBUG_ASSERT(nthreads > 1);
#endif
- KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
+ KMP_DEBUG_ASSERT((int)(TCR_4(*unfinished_threads)) >= 0);
- while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
- while (1) { // Inner loop to find a task and execute it
- task = NULL;
- if (use_own_tasks) { // check on own queue first
- task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
- }
- if ((task == NULL) && (nthreads > 1)) { // Steal a task
- int asleep = 1;
- use_own_tasks = 0;
- // Try to steal from the last place I stole from successfully.
- if (victim == -2) { // haven't stolen anything yet
- victim = threads_data[tid].td.td_deque_last_stolen;
- if (victim != -1) // if we have a last stolen from victim, get the thread
- other_thread = threads_data[victim].td.td_thr;
- }
- if (victim != -1) { // found last victim
- asleep = 0;
- }
- else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
- do { // Find a different thread to steal work from.
- // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
- // we tried to steal from every thread, and failed. Arch says that's not such a great idea.
- victim = __kmp_get_random(thread) % (nthreads - 1);
- if (victim >= tid) {
- ++victim; // Adjusts random distribution to exclude self
- }
- // Found a potential victim
- other_thread = threads_data[victim].td.td_thr;
- // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
- // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to
- // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
- // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
- // do not sleep at the separate tasking barrier, so this isn't a problem.
- asleep = 0;
- if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
- (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
- (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
- asleep = 1;
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
- // A sleeping thread should not have any tasks on it's queue. There is a slight
- // possibility that it resumes, steals a task from another thread, which spawns more
- // tasks, all in the time that it takes this thread to check => don't write an assertion
- // that the victim's queue is empty. Try stealing from a different thread.
- }
- } while (asleep);
- }
-
- if (!asleep) {
- // We have a victim to try to steal from
- task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
- }
- if (task != NULL) { // set last stolen to victim
- if (threads_data[tid].td.td_deque_last_stolen != victim) {
- threads_data[tid].td.td_deque_last_stolen = victim;
- // The pre-refactored code did not try more than 1 successful new vicitm,
- // unless the last one generated more local tasks; new_victim keeps track of this
- new_victim = 1;
- }
- }
- else { // No tasks found; unset last_stolen
- KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
- victim = -2; // no successful victim found
- }
- }
+ while (1) { // Outer loop keeps trying to find tasks in case of single thread
+ // getting tasks from target constructs
+ while (1) { // Inner loop to find a task and execute it
+ task = NULL;
+ if (use_own_tasks) { // check on own queue first
+ task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
+ }
+ if ((task == NULL) && (nthreads > 1)) { // Steal a task
+ int asleep = 1;
+ use_own_tasks = 0;
+ // Try to steal from the last place I stole from successfully.
+ if (victim == -2) { // haven't stolen anything yet
+ victim = threads_data[tid].td.td_deque_last_stolen;
+ if (victim !=
+ -1) // if we have a last stolen from victim, get the thread
+ other_thread = threads_data[victim].td.td_thr;
+ }
+ if (victim != -1) { // found last victim
+ asleep = 0;
+ } else if (!new_victim) { // no recent steals and we haven't already
+ // used a new victim; select a random thread
+ do { // Find a different thread to steal work from.
+ // Pick a random thread. Initial plan was to cycle through all the
+ // threads, and only return if we tried to steal from every thread,
+ // and failed. Arch says that's not such a great idea.
+ victim = __kmp_get_random(thread) % (nthreads - 1);
+ if (victim >= tid) {
+ ++victim; // Adjusts random distribution to exclude self
+ }
+ // Found a potential victim
+ other_thread = threads_data[victim].td.td_thr;
+ // There is a slight chance that __kmp_enable_tasking() did not wake
+ // up all threads waiting at the barrier. If victim is sleeping,
+ // then wake it up. Since we were going to pay the cache miss
+ // penalty for referencing another thread's kmp_info_t struct
+ // anyway,
+ // the check shouldn't cost too much performance at this point. In
+ // extra barrier mode, tasks do not sleep at the separate tasking
+ // barrier, so this isn't a problem.
+ asleep = 0;
+ if ((__kmp_tasking_mode == tskm_task_teams) &&
+ (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
+ (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
+ asleep = 1;
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
+ other_thread->th.th_sleep_loc);
+ // A sleeping thread should not have any tasks on it's queue.
+ // There is a slight possibility that it resumes, steals a task
+ // from another thread, which spawns more tasks, all in the time
+ // that it takes this thread to check => don't write an assertion
+ // that the victim's queue is empty. Try stealing from a
+ // different thread.
+ }
+ } while (asleep);
+ }
+
+ if (!asleep) {
+ // We have a victim to try to steal from
+ task = __kmp_steal_task(other_thread, gtid, task_team,
+ unfinished_threads, thread_finished,
+ is_constrained);
+ }
+ if (task != NULL) { // set last stolen to victim
+ if (threads_data[tid].td.td_deque_last_stolen != victim) {
+ threads_data[tid].td.td_deque_last_stolen = victim;
+ // The pre-refactored code did not try more than 1 successful new
+ // vicitm, unless the last one generated more local tasks;
+ // new_victim keeps track of this
+ new_victim = 1;
+ }
+ } else { // No tasks found; unset last_stolen
+ KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
+ victim = -2; // no successful victim found
+ }
+ }
- if (task == NULL) // break out of tasking loop
- break;
+ if (task == NULL) // break out of tasking loop
+ break;
- // Found a task; execute it
+// Found a task; execute it
#if USE_ITT_BUILD && USE_ITT_NOTIFY
- if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
- if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
- itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
- }
- __kmp_itt_task_starting( itt_sync_obj );
- }
+ if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
+ if (itt_sync_obj == NULL) { // we are at fork barrier where we could not
+ // get the object reliably
+ itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+ }
+ __kmp_itt_task_starting(itt_sync_obj);
+ }
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
- __kmp_invoke_task( gtid, task, current_task );
+ __kmp_invoke_task(gtid, task, current_task);
#if USE_ITT_BUILD
- if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
+ if (itt_sync_obj != NULL)
+ __kmp_itt_task_finished(itt_sync_obj);
#endif /* USE_ITT_BUILD */
- // If this thread is only partway through the barrier and the condition is met, then return now,
- // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
- // in the barrier, waiting to be released, we know that the termination condition will not be
- // satisified, so don't waste any cycles checking it.
- if (flag == NULL || (!final_spin && flag->done_check())) {
- KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
- if (thread->th.th_task_team == NULL) {
- break;
- }
- KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
- // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
- if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
- KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
- use_own_tasks = 1;
- new_victim = 0;
- }
- }
+ // If this thread is only partway through the barrier and the condition is
+ // met, then return now, so that the barrier gather/release pattern can
+ // proceed. If this thread is in the last spin loop in the barrier,
+ // waiting to be released, we know that the termination condition will not
+ // be satisified, so don't waste any cycles checking it.
+ if (flag == NULL || (!final_spin && flag->done_check())) {
+ KA_TRACE(
+ 15,
+ ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
+ gtid));
+ return TRUE;
+ }
+ if (thread->th.th_task_team == NULL) {
+ break;
+ }
+ // Yield before executing next task
+ KMP_YIELD(__kmp_library == library_throughput);
+ // If execution of a stolen task results in more tasks being placed on our
+ // run queue, reset use_own_tasks
+ if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
+ KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
+ "other tasks, restart\n",
+ gtid));
+ use_own_tasks = 1;
+ new_victim = 0;
+ }
+ }
- // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
+// The task source has been exhausted. If in final spin loop of barrier, check
+// if termination condition is satisfied.
#if OMP_45_ENABLED
- // The work queue may be empty but there might be proxy tasks still executing
- if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
+ // The work queue may be empty but there might be proxy tasks still
+ // executing
+ if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
#else
- if (final_spin)
+ if (final_spin)
#endif
- {
- // First, decrement the #unfinished threads, if that has not already been done. This decrement
- // might be to the spin location, and result in the termination condition being satisfied.
- if (! *thread_finished) {
- kmp_uint32 count;
-
- count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
- KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
- gtid, count, task_team) );
- *thread_finished = TRUE;
- }
-
- // It is now unsafe to reference thread->th.th_team !!!
- // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
- // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
- // If we can steal more work, we know that this has not happened yet.
- if (flag != NULL && flag->done_check()) {
- KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
- return TRUE;
- }
- }
+ {
+ // First, decrement the #unfinished threads, if that has not already been
+ // done. This decrement might be to the spin location, and result in the
+ // termination condition being satisfied.
+ if (!*thread_finished) {
+ kmp_uint32 count;
- // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
- if (thread->th.th_task_team == NULL) {
- KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
- return FALSE;
- }
+ count = KMP_TEST_THEN_DEC32((kmp_int32 *)unfinished_threads) - 1;
+ KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
+ "unfinished_threads to %d task_team=%p\n",
+ gtid, count, task_team));
+ *thread_finished = TRUE;
+ }
-#if OMP_45_ENABLED
- // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
- // tasks from own queue
- if (nthreads == 1)
- use_own_tasks = 1;
- else
-#endif
- {
- KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
- return FALSE;
- }
+ // It is now unsafe to reference thread->th.th_team !!!
+ // Decrementing task_team->tt.tt_unfinished_threads can allow the master
+ // thread to pass through the barrier, where it might reset each thread's
+ // th.th_team field for the next parallel region. If we can steal more
+ // work, we know that this has not happened yet.
+ if (flag != NULL && flag->done_check()) {
+ KA_TRACE(
+ 15,
+ ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
+ gtid));
+ return TRUE;
+ }
}
-}
-
-int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
- int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
- return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-}
-int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
- int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
- return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-}
+ // If this thread's task team is NULL, master has recognized that there are
+ // no more tasks; bail out
+ if (thread->th.th_task_team == NULL) {
+ KA_TRACE(15,
+ ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
+ return FALSE;
+ }
-int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
- int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
- return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+#if OMP_45_ENABLED
+ // We could be getting tasks from target constructs; if this is the only
+ // thread, keep trying to execute tasks from own queue
+ if (nthreads == 1)
+ use_own_tasks = 1;
+ else
+#endif
+ {
+ KA_TRACE(15,
+ ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
+ return FALSE;
+ }
+ }
+}
+
+int __kmp_execute_tasks_32(
+ kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_template(
+ thread, gtid, flag, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+int __kmp_execute_tasks_64(
+ kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_template(
+ thread, gtid, flag, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+int __kmp_execute_tasks_oncore(
+ kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_template(
+ thread, gtid, flag, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
}
-
-
-//-----------------------------------------------------------------------------
// __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
// next barrier so they can assist in executing enqueued tasks.
// First thread in allocates the task team atomically.
+static void __kmp_enable_tasking(kmp_task_team_t *task_team,
+ kmp_info_t *this_thr) {
+ kmp_thread_data_t *threads_data;
+ int nthreads, i, is_init_thread;
+
+ KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
+ __kmp_gtid_from_thread(this_thr)));
+
+ KMP_DEBUG_ASSERT(task_team != NULL);
+ KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
+
+ nthreads = task_team->tt.tt_nproc;
+ KMP_DEBUG_ASSERT(nthreads > 0);
+ KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
+
+ // Allocate or increase the size of threads_data if necessary
+ is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
+
+ if (!is_init_thread) {
+ // Some other thread already set up the array.
+ KA_TRACE(
+ 20,
+ ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
+ __kmp_gtid_from_thread(this_thr)));
+ return;
+ }
+ threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
+ KMP_DEBUG_ASSERT(threads_data != NULL);
+
+ if ((__kmp_tasking_mode == tskm_task_teams) &&
+ (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
+ // Release any threads sleeping at the barrier, so that they can steal
+ // tasks and execute them. In extra barrier mode, tasks do not sleep
+ // at the separate tasking barrier, so this isn't a problem.
+ for (i = 0; i < nthreads; i++) {
+ volatile void *sleep_loc;
+ kmp_info_t *thread = threads_data[i].td.td_thr;
-static void
-__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
-{
- kmp_thread_data_t *threads_data;
- int nthreads, i, is_init_thread;
-
- KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
- __kmp_gtid_from_thread( this_thr ) ) );
-
- KMP_DEBUG_ASSERT(task_team != NULL);
- KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
-
- nthreads = task_team->tt.tt_nproc;
- KMP_DEBUG_ASSERT(nthreads > 0);
- KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
-
- // Allocate or increase the size of threads_data if necessary
- is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
-
- if (!is_init_thread) {
- // Some other thread already set up the array.
- KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
- __kmp_gtid_from_thread( this_thr ) ) );
- return;
- }
- threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
- KMP_DEBUG_ASSERT( threads_data != NULL );
-
- if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
- ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
- {
- // Release any threads sleeping at the barrier, so that they can steal
- // tasks and execute them. In extra barrier mode, tasks do not sleep
- // at the separate tasking barrier, so this isn't a problem.
- for (i = 0; i < nthreads; i++) {
- volatile void *sleep_loc;
- kmp_info_t *thread = threads_data[i].td.td_thr;
-
- if (i == this_thr->th.th_info.ds.ds_tid) {
- continue;
- }
- // Since we haven't locked the thread's suspend mutex lock at this
- // point, there is a small window where a thread might be putting
- // itself to sleep, but hasn't set the th_sleep_loc field yet.
- // To work around this, __kmp_execute_tasks_template() periodically checks
- // see if other threads are sleeping (using the same random
- // mechanism that is used for task stealing) and awakens them if
- // they are.
- if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
- {
- KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
- __kmp_gtid_from_thread( this_thr ),
- __kmp_gtid_from_thread( thread ) ) );
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
- }
- else {
- KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
- __kmp_gtid_from_thread( this_thr ),
- __kmp_gtid_from_thread( thread ) ) );
- }
- }
+ if (i == this_thr->th.th_info.ds.ds_tid) {
+ continue;
+ }
+ // Since we haven't locked the thread's suspend mutex lock at this
+ // point, there is a small window where a thread might be putting
+ // itself to sleep, but hasn't set the th_sleep_loc field yet.
+ // To work around this, __kmp_execute_tasks_template() periodically checks
+ // see if other threads are sleeping (using the same random mechanism that
+ // is used for task stealing) and awakens them if they are.
+ if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) {
+ KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
+ __kmp_gtid_from_thread(this_thr),
+ __kmp_gtid_from_thread(thread)));
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+ } else {
+ KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
+ __kmp_gtid_from_thread(this_thr),
+ __kmp_gtid_from_thread(thread)));
+ }
}
+ }
- KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
- __kmp_gtid_from_thread( this_thr ) ) );
+ KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
+ __kmp_gtid_from_thread(this_thr)));
}
-
-/* ------------------------------------------------------------------------ */
/* // TODO: Check the comment consistency
* Utility routines for "task teams". A task team (kmp_task_t) is kind of
* like a shadow of the kmp_team_t data struct, with a different lifetime.
@@ -2389,685 +2450,683 @@ __kmp_enable_tasking( kmp_task_team_t *t
* barriers, when no explicit tasks were spawned (pushed, actually).
*/
-
-static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
+static kmp_task_team_t *__kmp_free_task_teams =
+ NULL; // Free list for task_team data structures
// Lock for task team data structures
-static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
-
+static kmp_bootstrap_lock_t __kmp_task_team_lock =
+ KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
-//------------------------------------------------------------------------------
// __kmp_alloc_task_deque:
// Allocates a task deque for a particular thread, and initialize the necessary
// data structures relating to the deque. This only happens once per thread
-// per task team since task teams are recycled.
-// No lock is needed during allocation since each thread allocates its own
-// deque.
-
-static void
-__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
-{
- __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
- KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
-
- // Initialize last stolen task field to "none"
- thread_data -> td.td_deque_last_stolen = -1;
-
- KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
- KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
- KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
-
- KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
- __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
- // Allocate space for task deque, and zero the deque
- // Cannot use __kmp_thread_calloc() because threads not around for
- // kmp_reap_task_team( ).
- thread_data -> td.td_deque = (kmp_taskdata_t **)
- __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
- thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
+// per task team since task teams are recycled. No lock is needed during
+// allocation since each thread allocates its own deque.
+static void __kmp_alloc_task_deque(kmp_info_t *thread,
+ kmp_thread_data_t *thread_data) {
+ __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
+ KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
+
+ // Initialize last stolen task field to "none"
+ thread_data->td.td_deque_last_stolen = -1;
+
+ KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
+ KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
+ KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
+
+ KE_TRACE(
+ 10,
+ ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
+ __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
+ // Allocate space for task deque, and zero the deque
+ // Cannot use __kmp_thread_calloc() because threads not around for
+ // kmp_reap_task_team( ).
+ thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
+ INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
+ thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
}
-//------------------------------------------------------------------------------
// __kmp_realloc_task_deque:
-// Re-allocates a task deque for a particular thread, copies the content from the old deque
-// and adjusts the necessary data structures relating to the deque.
-// This operation must be done with a the deque_lock being held
-
-static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
-{
- kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
- kmp_int32 new_size = 2 * size;
-
- KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
- __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
-
- kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
-
- int i,j;
- for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
- new_deque[j] = thread_data->td.td_deque[i];
-
- __kmp_free(thread_data->td.td_deque);
-
- thread_data -> td.td_deque_head = 0;
- thread_data -> td.td_deque_tail = size;
- thread_data -> td.td_deque = new_deque;
- thread_data -> td.td_deque_size = new_size;
+// Re-allocates a task deque for a particular thread, copies the content from
+// the old deque and adjusts the necessary data structures relating to the
+// deque. This operation must be done with a the deque_lock being held
+static void __kmp_realloc_task_deque(kmp_info_t *thread,
+ kmp_thread_data_t *thread_data) {
+ kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
+ kmp_int32 new_size = 2 * size;
+
+ KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
+ "%d] for thread_data %p\n",
+ __kmp_gtid_from_thread(thread), size, new_size, thread_data));
+
+ kmp_taskdata_t **new_deque =
+ (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
+
+ int i, j;
+ for (i = thread_data->td.td_deque_head, j = 0; j < size;
+ i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
+ new_deque[j] = thread_data->td.td_deque[i];
+
+ __kmp_free(thread_data->td.td_deque);
+
+ thread_data->td.td_deque_head = 0;
+ thread_data->td.td_deque_tail = size;
+ thread_data->td.td_deque = new_deque;
+ thread_data->td.td_deque_size = new_size;
}
-//------------------------------------------------------------------------------
// __kmp_free_task_deque:
-// Deallocates a task deque for a particular thread.
-// Happens at library deallocation so don't need to reset all thread data fields.
-
-static void
-__kmp_free_task_deque( kmp_thread_data_t *thread_data )
-{
- __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
+// Deallocates a task deque for a particular thread. Happens at library
+// deallocation so don't need to reset all thread data fields.
+static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
- if ( thread_data -> td.td_deque != NULL ) {
- TCW_4(thread_data -> td.td_deque_ntasks, 0);
- __kmp_free( thread_data -> td.td_deque );
- thread_data -> td.td_deque = NULL;
- }
- __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+ if (thread_data->td.td_deque != NULL) {
+ TCW_4(thread_data->td.td_deque_ntasks, 0);
+ __kmp_free(thread_data->td.td_deque);
+ thread_data->td.td_deque = NULL;
+ }
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
#ifdef BUILD_TIED_TASK_STACK
- // GEH: Figure out what to do here for td_susp_tied_tasks
- if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
- __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
- }
+ // GEH: Figure out what to do here for td_susp_tied_tasks
+ if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
+ __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
+ }
#endif // BUILD_TIED_TASK_STACK
}
-
-//------------------------------------------------------------------------------
// __kmp_realloc_task_threads_data:
-// Allocates a threads_data array for a task team, either by allocating an initial
-// array or enlarging an existing array. Only the first thread to get the lock
-// allocs or enlarges the array and re-initializes the array eleemnts.
+// Allocates a threads_data array for a task team, either by allocating an
+// initial array or enlarging an existing array. Only the first thread to get
+// the lock allocs or enlarges the array and re-initializes the array eleemnts.
// That thread returns "TRUE", the rest return "FALSE".
// Assumes that the new array size is given by task_team -> tt.tt_nproc.
// The current size is given by task_team -> tt.tt_max_threads.
+static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
+ kmp_task_team_t *task_team) {
+ kmp_thread_data_t **threads_data_p;
+ kmp_int32 nthreads, maxthreads;
+ int is_init_thread = FALSE;
+
+ if (TCR_4(task_team->tt.tt_found_tasks)) {
+ // Already reallocated and initialized.
+ return FALSE;
+ }
+
+ threads_data_p = &task_team->tt.tt_threads_data;
+ nthreads = task_team->tt.tt_nproc;
+ maxthreads = task_team->tt.tt_max_threads;
+
+ // All threads must lock when they encounter the first task of the implicit
+ // task region to make sure threads_data fields are (re)initialized before
+ // used.
+ __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
-static int
-__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
-{
- kmp_thread_data_t ** threads_data_p;
- kmp_int32 nthreads, maxthreads;
- int is_init_thread = FALSE;
-
- if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
- // Already reallocated and initialized.
- return FALSE;
- }
-
- threads_data_p = & task_team -> tt.tt_threads_data;
- nthreads = task_team -> tt.tt_nproc;
- maxthreads = task_team -> tt.tt_max_threads;
-
- // All threads must lock when they encounter the first task of the implicit task
- // region to make sure threads_data fields are (re)initialized before used.
- __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
-
- if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
- // first thread to enable tasking
- kmp_team_t *team = thread -> th.th_team;
- int i;
-
- is_init_thread = TRUE;
- if ( maxthreads < nthreads ) {
-
- if ( *threads_data_p != NULL ) {
- kmp_thread_data_t *old_data = *threads_data_p;
- kmp_thread_data_t *new_data = NULL;
-
- KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
- "threads data for task_team %p, new_size = %d, old_size = %d\n",
- __kmp_gtid_from_thread( thread ), task_team,
- nthreads, maxthreads ) );
- // Reallocate threads_data to have more elements than current array
- // Cannot use __kmp_thread_realloc() because threads not around for
- // kmp_reap_task_team( ). Note all new array entries are initialized
- // to zero by __kmp_allocate().
- new_data = (kmp_thread_data_t *)
- __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
- // copy old data to new data
- KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
- (void *) old_data,
- maxthreads * sizeof(kmp_taskdata_t *) );
+ if (!TCR_4(task_team->tt.tt_found_tasks)) {
+ // first thread to enable tasking
+ kmp_team_t *team = thread->th.th_team;
+ int i;
+
+ is_init_thread = TRUE;
+ if (maxthreads < nthreads) {
+
+ if (*threads_data_p != NULL) {
+ kmp_thread_data_t *old_data = *threads_data_p;
+ kmp_thread_data_t *new_data = NULL;
+
+ KE_TRACE(
+ 10,
+ ("__kmp_realloc_task_threads_data: T#%d reallocating "
+ "threads data for task_team %p, new_size = %d, old_size = %d\n",
+ __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
+ // Reallocate threads_data to have more elements than current array
+ // Cannot use __kmp_thread_realloc() because threads not around for
+ // kmp_reap_task_team( ). Note all new array entries are initialized
+ // to zero by __kmp_allocate().
+ new_data = (kmp_thread_data_t *)__kmp_allocate(
+ nthreads * sizeof(kmp_thread_data_t));
+ // copy old data to new data
+ KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
+ (void *)old_data, maxthreads * sizeof(kmp_taskdata_t *));
#ifdef BUILD_TIED_TASK_STACK
- // GEH: Figure out if this is the right thing to do
- for (i = maxthreads; i < nthreads; i++) {
- kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
- __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
- }
+ // GEH: Figure out if this is the right thing to do
+ for (i = maxthreads; i < nthreads; i++) {
+ kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
+ __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
+ }
#endif // BUILD_TIED_TASK_STACK
- // Install the new data and free the old data
- (*threads_data_p) = new_data;
- __kmp_free( old_data );
- }
- else {
- KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
- "threads data for task_team %p, size = %d\n",
- __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
- // Make the initial allocate for threads_data array, and zero entries
- // Cannot use __kmp_thread_calloc() because threads not around for
- // kmp_reap_task_team( ).
- ANNOTATE_IGNORE_WRITES_BEGIN();
- *threads_data_p = (kmp_thread_data_t *)
- __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
- ANNOTATE_IGNORE_WRITES_END();
+ // Install the new data and free the old data
+ (*threads_data_p) = new_data;
+ __kmp_free(old_data);
+ } else {
+ KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
+ "threads data for task_team %p, size = %d\n",
+ __kmp_gtid_from_thread(thread), task_team, nthreads));
+ // Make the initial allocate for threads_data array, and zero entries
+ // Cannot use __kmp_thread_calloc() because threads not around for
+ // kmp_reap_task_team( ).
+ ANNOTATE_IGNORE_WRITES_BEGIN();
+ *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
+ nthreads * sizeof(kmp_thread_data_t));
+ ANNOTATE_IGNORE_WRITES_END();
#ifdef BUILD_TIED_TASK_STACK
- // GEH: Figure out if this is the right thing to do
- for (i = 0; i < nthreads; i++) {
- kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
- __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
- }
-#endif // BUILD_TIED_TASK_STACK
- }
- task_team -> tt.tt_max_threads = nthreads;
- }
- else {
- // If array has (more than) enough elements, go ahead and use it
- KMP_DEBUG_ASSERT( *threads_data_p != NULL );
- }
-
- // initialize threads_data pointers back to thread_info structures
+ // GEH: Figure out if this is the right thing to do
for (i = 0; i < nthreads; i++) {
- kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
- thread_data -> td.td_thr = team -> t.t_threads[i];
-
- if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
- // The last stolen field survives across teams / barrier, and the number
- // of threads may have changed. It's possible (likely?) that a new
- // parallel region will exhibit the same behavior as the previous region.
- thread_data -> td.td_deque_last_stolen = -1;
- }
+ kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
+ __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
}
+#endif // BUILD_TIED_TASK_STACK
+ }
+ task_team->tt.tt_max_threads = nthreads;
+ } else {
+ // If array has (more than) enough elements, go ahead and use it
+ KMP_DEBUG_ASSERT(*threads_data_p != NULL);
+ }
- KMP_MB();
- TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
+ // initialize threads_data pointers back to thread_info structures
+ for (i = 0; i < nthreads; i++) {
+ kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
+ thread_data->td.td_thr = team->t.t_threads[i];
+
+ if (thread_data->td.td_deque_last_stolen >= nthreads) {
+ // The last stolen field survives across teams / barrier, and the number
+ // of threads may have changed. It's possible (likely?) that a new
+ // parallel region will exhibit the same behavior as previous region.
+ thread_data->td.td_deque_last_stolen = -1;
+ }
}
- __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
- return is_init_thread;
-}
+ KMP_MB();
+ TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
+ }
+ __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
+ return is_init_thread;
+}
-//------------------------------------------------------------------------------
// __kmp_free_task_threads_data:
// Deallocates a threads_data array for a task team, including any attached
// tasking deques. Only occurs at library shutdown.
-
-static void
-__kmp_free_task_threads_data( kmp_task_team_t *task_team )
-{
- __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
- if ( task_team -> tt.tt_threads_data != NULL ) {
- int i;
- for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
- __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
- }
- __kmp_free( task_team -> tt.tt_threads_data );
- task_team -> tt.tt_threads_data = NULL;
- }
- __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
+ __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
+ if (task_team->tt.tt_threads_data != NULL) {
+ int i;
+ for (i = 0; i < task_team->tt.tt_max_threads; i++) {
+ __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
+ }
+ __kmp_free(task_team->tt.tt_threads_data);
+ task_team->tt.tt_threads_data = NULL;
+ }
+ __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
}
-
-//------------------------------------------------------------------------------
// __kmp_allocate_task_team:
// Allocates a task team associated with a specific team, taking it from
-// the global task team free list if possible. Also initializes data structures.
-
-static kmp_task_team_t *
-__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
-{
- kmp_task_team_t *task_team = NULL;
- int nthreads;
-
- KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
- (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
-
- if (TCR_PTR(__kmp_free_task_teams) != NULL) {
- // Take a task team from the task team pool
- __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
- if (__kmp_free_task_teams != NULL) {
- task_team = __kmp_free_task_teams;
- TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
- task_team -> tt.tt_next = NULL;
- }
- __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
- }
-
- if (task_team == NULL) {
- KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
- "task team for team %p\n",
- __kmp_gtid_from_thread( thread ), team ) );
- // Allocate a new task team if one is not available.
- // Cannot use __kmp_thread_malloc() because threads not around for
- // kmp_reap_task_team( ).
- task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
- __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
- //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
- //task_team -> tt.tt_max_threads = 0;
- //task_team -> tt.tt_next = NULL;
- }
-
- TCW_4(task_team -> tt.tt_found_tasks, FALSE);
-#if OMP_45_ENABLED
- TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
-#endif
- task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
-
- TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
- TCW_4( task_team -> tt.tt_active, TRUE );
-
- KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
- (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
- return task_team;
+// the global task team free list if possible. Also initializes data
+// structures.
+static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
+ kmp_team_t *team) {
+ kmp_task_team_t *task_team = NULL;
+ int nthreads;
+
+ KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
+ (thread ? __kmp_gtid_from_thread(thread) : -1), team));
+
+ if (TCR_PTR(__kmp_free_task_teams) != NULL) {
+ // Take a task team from the task team pool
+ __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
+ if (__kmp_free_task_teams != NULL) {
+ task_team = __kmp_free_task_teams;
+ TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
+ task_team->tt.tt_next = NULL;
+ }
+ __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
+ }
+
+ if (task_team == NULL) {
+ KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
+ "task team for team %p\n",
+ __kmp_gtid_from_thread(thread), team));
+ // Allocate a new task team if one is not available.
+ // Cannot use __kmp_thread_malloc() because threads not around for
+ // kmp_reap_task_team( ).
+ task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
+ __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
+ // AC: __kmp_allocate zeroes returned memory
+ // task_team -> tt.tt_threads_data = NULL;
+ // task_team -> tt.tt_max_threads = 0;
+ // task_team -> tt.tt_next = NULL;
+ }
+
+ TCW_4(task_team->tt.tt_found_tasks, FALSE);
+#if OMP_45_ENABLED
+ TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+#endif
+ task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
+
+ TCW_4(task_team->tt.tt_unfinished_threads, nthreads);
+ TCW_4(task_team->tt.tt_active, TRUE);
+
+ KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
+ "unfinished_threads init'd to %d\n",
+ (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
+ task_team->tt.tt_unfinished_threads));
+ return task_team;
}
-
-//------------------------------------------------------------------------------
// __kmp_free_task_team:
// Frees the task team associated with a specific thread, and adds it
// to the global task team free list.
+void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
+ KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
+ thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
+
+ // Put task team back on free list
+ __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
+
+ KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
+ task_team->tt.tt_next = __kmp_free_task_teams;
+ TCW_PTR(__kmp_free_task_teams, task_team);
-void
-__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
-{
- KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
- thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
-
- // Put task team back on free list
- __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
-
- KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
- task_team -> tt.tt_next = __kmp_free_task_teams;
- TCW_PTR(__kmp_free_task_teams, task_team);
-
- __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
+ __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
}
-
-//------------------------------------------------------------------------------
// __kmp_reap_task_teams:
// Free all the task teams on the task team free list.
// Should only be done during library shutdown.
-// Cannot do anything that needs a thread structure or gtid since they are already gone.
-
-void
-__kmp_reap_task_teams( void )
-{
- kmp_task_team_t *task_team;
-
- if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
- // Free all task_teams on the free list
- __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
- while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
- __kmp_free_task_teams = task_team -> tt.tt_next;
- task_team -> tt.tt_next = NULL;
-
- // Free threads_data if necessary
- if ( task_team -> tt.tt_threads_data != NULL ) {
- __kmp_free_task_threads_data( task_team );
- }
- __kmp_free( task_team );
- }
- __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
+// Cannot do anything that needs a thread structure or gtid since they are
+// already gone.
+void __kmp_reap_task_teams(void) {
+ kmp_task_team_t *task_team;
+
+ if (TCR_PTR(__kmp_free_task_teams) != NULL) {
+ // Free all task_teams on the free list
+ __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
+ while ((task_team = __kmp_free_task_teams) != NULL) {
+ __kmp_free_task_teams = task_team->tt.tt_next;
+ task_team->tt.tt_next = NULL;
+
+ // Free threads_data if necessary
+ if (task_team->tt.tt_threads_data != NULL) {
+ __kmp_free_task_threads_data(task_team);
+ }
+ __kmp_free(task_team);
}
+ __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
+ }
}
-//------------------------------------------------------------------------------
// __kmp_wait_to_unref_task_teams:
// Some threads could still be in the fork barrier release code, possibly
// trying to steal tasks. Wait for each thread to unreference its task team.
-//
-void
-__kmp_wait_to_unref_task_teams(void)
-{
- kmp_info_t *thread;
- kmp_uint32 spins;
- int done;
-
- KMP_INIT_YIELD( spins );
-
- for (;;) {
- done = TRUE;
-
- // TODO: GEH - this may be is wrong because some sync would be necessary
- // in case threads are added to the pool during the traversal.
- // Need to verify that lock for thread pool is held when calling
- // this routine.
- for (thread = (kmp_info_t *)__kmp_thread_pool;
- thread != NULL;
- thread = thread->th.th_next_pool)
- {
+void __kmp_wait_to_unref_task_teams(void) {
+ kmp_info_t *thread;
+ kmp_uint32 spins;
+ int done;
+
+ KMP_INIT_YIELD(spins);
+
+ for (;;) {
+ done = TRUE;
+
+ // TODO: GEH - this may be is wrong because some sync would be necessary
+ // in case threads are added to the pool during the traversal. Need to
+ // verify that lock for thread pool is held when calling this routine.
+ for (thread = (kmp_info_t *)__kmp_thread_pool; thread != NULL;
+ thread = thread->th.th_next_pool) {
#if KMP_OS_WINDOWS
- DWORD exit_val;
+ DWORD exit_val;
#endif
- if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
- KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
- __kmp_gtid_from_thread( thread ) ) );
- continue;
- }
+ if (TCR_PTR(thread->th.th_task_team) == NULL) {
+ KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
+ __kmp_gtid_from_thread(thread)));
+ continue;
+ }
#if KMP_OS_WINDOWS
- // TODO: GEH - add this check for Linux* OS / OS X* as well?
- if (!__kmp_is_thread_alive(thread, &exit_val)) {
- thread->th.th_task_team = NULL;
- continue;
- }
+ // TODO: GEH - add this check for Linux* OS / OS X* as well?
+ if (!__kmp_is_thread_alive(thread, &exit_val)) {
+ thread->th.th_task_team = NULL;
+ continue;
+ }
#endif
- done = FALSE; // Because th_task_team pointer is not NULL for this thread
-
- KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
- __kmp_gtid_from_thread( thread ) ) );
+ done = FALSE; // Because th_task_team pointer is not NULL for this thread
- if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
- volatile void *sleep_loc;
- // If the thread is sleeping, awaken it.
- if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
- KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
- __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
- __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
- }
- }
+ KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
+ "unreference task_team\n",
+ __kmp_gtid_from_thread(thread)));
+
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+ volatile void *sleep_loc;
+ // If the thread is sleeping, awaken it.
+ if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) {
+ KA_TRACE(
+ 10,
+ ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
+ __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
+ __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
}
- if (done) {
- break;
- }
-
- // If we are oversubscribed,
- // or have waited a bit (and library mode is throughput), yield.
- // Pause is in the following code.
- KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
- KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
+ }
+ }
+ if (done) {
+ break;
}
-}
+ // If we are oversubscribed, or have waited a bit (and library mode is
+ // throughput), yield. Pause is in the following code.
+ KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
+ KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
+ }
+}
-//------------------------------------------------------------------------------
// __kmp_task_team_setup: Create a task_team for the current team, but use
// an already created, unused one if it already exists.
-void
-__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
-{
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
- // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
- // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
- if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
- team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
- KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
- __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
- ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
- }
-
- // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
- // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
- // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
- // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
- // serialized teams.
- if (team->t.t_nproc > 1) {
- int other_team = 1 - this_thr->th.th_task_state;
- if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
- team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
- KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
- __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
- ((team != NULL) ? team->t.t_id : -1), other_team ));
- }
- else { // Leave the old task team struct in place for the upcoming region; adjust as needed
- kmp_task_team_t *task_team = team->t.t_task_team[other_team];
- if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
- TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
- TCW_4(task_team->tt.tt_found_tasks, FALSE);
+ // If this task_team hasn't been created yet, allocate it. It will be used in
+ // the region after the next.
+ // If it exists, it is the current task team and shouldn't be touched yet as
+ // it may still be in use.
+ if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
+ (always || team->t.t_nproc > 1)) {
+ team->t.t_task_team[this_thr->th.th_task_state] =
+ __kmp_allocate_task_team(this_thr, team);
+ KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p "
+ "for team %d at parity=%d\n",
+ __kmp_gtid_from_thread(this_thr),
+ team->t.t_task_team[this_thr->th.th_task_state],
+ ((team != NULL) ? team->t.t_id : -1),
+ this_thr->th.th_task_state));
+ }
+
+ // After threads exit the release, they will call sync, and then point to this
+ // other task_team; make sure it is allocated and properly initialized. As
+ // threads spin in the barrier release phase, they will continue to use the
+ // previous task_team struct(above), until they receive the signal to stop
+ // checking for tasks (they can't safely reference the kmp_team_t struct,
+ // which could be reallocated by the master thread). No task teams are formed
+ // for serialized teams.
+ if (team->t.t_nproc > 1) {
+ int other_team = 1 - this_thr->th.th_task_state;
+ if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
+ team->t.t_task_team[other_team] =
+ __kmp_allocate_task_team(this_thr, team);
+ KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new "
+ "task_team %p for team %d at parity=%d\n",
+ __kmp_gtid_from_thread(this_thr),
+ team->t.t_task_team[other_team],
+ ((team != NULL) ? team->t.t_id : -1), other_team));
+ } else { // Leave the old task team struct in place for the upcoming region;
+ // adjust as needed
+ kmp_task_team_t *task_team = team->t.t_task_team[other_team];
+ if (!task_team->tt.tt_active ||
+ team->t.t_nproc != task_team->tt.tt_nproc) {
+ TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
+ TCW_4(task_team->tt.tt_found_tasks, FALSE);
#if OMP_45_ENABLED
- TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+ TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
#endif
- TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
- TCW_4(task_team->tt.tt_active, TRUE );
- }
- // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
- KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
- __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
- ((team != NULL) ? team->t.t_id : -1), other_team ));
- }
+ TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc);
+ TCW_4(task_team->tt.tt_active, TRUE);
+ }
+ // if team size has changed, the first thread to enable tasking will
+ // realloc threads_data if necessary
+ KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team "
+ "%p for team %d at parity=%d\n",
+ __kmp_gtid_from_thread(this_thr),
+ team->t.t_task_team[other_team],
+ ((team != NULL) ? team->t.t_id : -1), other_team));
}
+ }
}
-
-//------------------------------------------------------------------------------
// __kmp_task_team_sync: Propagation of task team data from team to threads
// which happens just after the release phase of a team barrier. This may be
// called by any thread, but only for teams with # threads > 1.
+void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
-void
-__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
-{
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-
- // Toggle the th_task_state field, to switch which task_team this thread refers to
- this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
- // It is now safe to propagate the task team pointer from the team struct to the current thread.
- TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
- KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
- __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
- ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
-}
-
-
-//--------------------------------------------------------------------------------------------
-// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
-// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created.
-// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
-// optionally as the last argument. When wait is zero, master thread does not wait for
-// unfinished_threads to reach 0.
-void
-__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
- USE_ITT_BUILD_ARG(void * itt_sync_obj)
- , int wait)
-{
- kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
-
- KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
- KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
-
- if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
- if (wait) {
- KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
- __kmp_gtid_from_thread(this_thr), task_team));
- // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
- // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
- kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
- flag.wait(this_thr, TRUE
- USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
- KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
- "setting active to false, setting local and team's pointer to NULL\n",
- __kmp_gtid_from_thread(this_thr), task_team));
-#if OMP_45_ENABLED
- KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
- TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
+ // Toggle the th_task_state field, to switch which task_team this thread
+ // refers to
+ this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
+ // It is now safe to propagate the task team pointer from the team struct to
+ // the current thread.
+ TCW_PTR(this_thr->th.th_task_team,
+ team->t.t_task_team[this_thr->th.th_task_state]);
+ KA_TRACE(20,
+ ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
+ "%p from Team #%d (parity=%d)\n",
+ __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
+ ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
+}
+
+// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
+// barrier gather phase. Only called by master thread if #threads in team > 1 or
+// if proxy tasks were created.
+//
+// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
+// by passing in 0 optionally as the last argument. When wait is zero, master
+// thread does not wait for unfinished_threads to reach 0.
+void __kmp_task_team_wait(
+ kmp_info_t *this_thr,
+ kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
+ kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
+
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+ KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
+
+ if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
+ if (wait) {
+ KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks "
+ "(for unfinished_threads to reach 0) on task_team = %p\n",
+ __kmp_gtid_from_thread(this_thr), task_team));
+ // Worker threads may have dropped through to release phase, but could
+ // still be executing tasks. Wait here for tasks to complete. To avoid
+ // memory contention, only master thread checks termination condition.
+ kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
+ flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
+ }
+ // Deactivate the old task team, so that the worker threads will stop
+ // referencing it while spinning.
+ KA_TRACE(
+ 20,
+ ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
+ "setting active to false, setting local and team's pointer to NULL\n",
+ __kmp_gtid_from_thread(this_thr), task_team));
+#if OMP_45_ENABLED
+ KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
+ task_team->tt.tt_found_proxy_tasks == TRUE);
+ TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
#else
- KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
+ KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
#endif
- TCW_SYNC_4( task_team->tt.tt_active, FALSE );
- KMP_MB();
+ TCW_SYNC_4(task_team->tt.tt_active, FALSE);
+ KMP_MB();
- TCW_PTR(this_thr->th.th_task_team, NULL);
- }
+ TCW_PTR(this_thr->th.th_task_team, NULL);
+ }
}
-
-//------------------------------------------------------------------------------
// __kmp_tasking_barrier:
// This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
-// Internal function to execute all tasks prior to a regular barrier or a
-// join barrier. It is a full barrier itself, which unfortunately turns
-// regular barriers into double barriers and join barriers into 1 1/2
-// barriers.
-void
-__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
-{
- volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
- int flag = FALSE;
- KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
+// Internal function to execute all tasks prior to a regular barrier or a join
+// barrier. It is a full barrier itself, which unfortunately turns regular
+// barriers into double barriers and join barriers into 1 1/2 barriers.
+void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
+ volatile kmp_uint32 *spin =
+ &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
+ int flag = FALSE;
+ KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
#if USE_ITT_BUILD
- KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
+ KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL);
#endif /* USE_ITT_BUILD */
- kmp_flag_32 spin_flag(spin, 0U);
- while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
- USE_ITT_BUILD_ARG(NULL), 0 ) ) {
+ kmp_flag_32 spin_flag(spin, 0U);
+ while (!spin_flag.execute_tasks(thread, gtid, TRUE,
+ &flag USE_ITT_BUILD_ARG(NULL), 0)) {
#if USE_ITT_BUILD
- // TODO: What about itt_sync_obj??
- KMP_FSYNC_SPIN_PREPARE( spin );
+ // TODO: What about itt_sync_obj??
+ KMP_FSYNC_SPIN_PREPARE(spin);
#endif /* USE_ITT_BUILD */
- if( TCR_4(__kmp_global.g.g_done) ) {
- if( __kmp_global.g.g_abort )
- __kmp_abort_thread( );
- break;
- }
- KMP_YIELD( TRUE ); // GH: We always yield here
+ if (TCR_4(__kmp_global.g.g_done)) {
+ if (__kmp_global.g.g_abort)
+ __kmp_abort_thread();
+ break;
}
+ KMP_YIELD(TRUE); // GH: We always yield here
+ }
#if USE_ITT_BUILD
- KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
+ KMP_FSYNC_SPIN_ACQUIRED((void *)spin);
#endif /* USE_ITT_BUILD */
}
-
#if OMP_45_ENABLED
-/* __kmp_give_task puts a task into a given thread queue if:
- - the queue for that thread was created
- - there's space in that queue
-
- Because of this, __kmp_push_task needs to check if there's space after getting the lock
- */
-static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- kmp_task_team_t * task_team = taskdata->td_task_team;
-
- KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
-
- // If task_team is NULL something went really bad...
- KMP_DEBUG_ASSERT( task_team != NULL );
-
- bool result = false;
- kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
-
- if (thread_data -> td.td_deque == NULL ) {
- // There's no queue in this thread, go find another one
- // We're guaranteed that at least one thread has a queue
- KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
- return result;
- }
-
- if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
- {
- KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
-
- // if this deque is bigger than the pass ratio give a chance to another thread
- if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
-
- __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
- __kmp_realloc_task_deque(thread,thread_data);
-
- } else {
-
- __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
-
- if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
- {
- KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
-
- // if this deque is bigger than the pass ratio give a chance to another thread
- if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
- goto release_and_exit;
-
- __kmp_realloc_task_deque(thread,thread_data);
- }
- }
-
- // lock is held here, and there is space in the deque
-
- thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
- // Wrap index.
- thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
- TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
-
- result = true;
- KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
+// __kmp_give_task puts a task into a given thread queue if:
+// - the queue for that thread was created
+// - there's space in that queue
+// Because of this, __kmp_push_task needs to check if there's space after
+// getting the lock
+static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
+ kmp_int32 pass) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ kmp_task_team_t *task_team = taskdata->td_task_team;
+
+ KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
+ taskdata, tid));
+
+ // If task_team is NULL something went really bad...
+ KMP_DEBUG_ASSERT(task_team != NULL);
+
+ bool result = false;
+ kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
+
+ if (thread_data->td.td_deque == NULL) {
+ // There's no queue in this thread, go find another one
+ // We're guaranteed that at least one thread has a queue
+ KA_TRACE(30,
+ ("__kmp_give_task: thread %d has no queue while giving task %p.\n",
+ tid, taskdata));
+ return result;
+ }
+
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ KA_TRACE(
+ 30,
+ ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
+ taskdata, tid));
+
+ // if this deque is bigger than the pass ratio give a chance to another
+ // thread
+ if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
+ return result;
+
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+ __kmp_realloc_task_deque(thread, thread_data);
+
+ } else {
+
+ __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+ if (TCR_4(thread_data->td.td_deque_ntasks) >=
+ TASK_DEQUE_SIZE(thread_data->td)) {
+ KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
+ "thread %d.\n",
+ taskdata, tid));
+
+ // if this deque is bigger than the pass ratio give a chance to another
+ // thread
+ if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
+ goto release_and_exit;
+
+ __kmp_realloc_task_deque(thread, thread_data);
+ }
+ }
+
+ // lock is held here, and there is space in the deque
+
+ thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
+ // Wrap index.
+ thread_data->td.td_deque_tail =
+ (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
+ TCW_4(thread_data->td.td_deque_ntasks,
+ TCR_4(thread_data->td.td_deque_ntasks) + 1);
+
+ result = true;
+ KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
+ taskdata, tid));
release_and_exit:
- __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
+ __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
- return result;
+ return result;
}
-
-/* The finish of the a proxy tasks is divided in two pieces:
+/* The finish of the proxy tasks is divided in two pieces:
- the top half is the one that can be done from a thread outside the team
- the bottom half must be run from a them within the team
- In order to run the bottom half the task gets queued back into one of the threads of the team.
- Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
- So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
+ In order to run the bottom half the task gets queued back into one of the
+ threads of the team. Once the td_incomplete_child_task counter of the parent
+ is decremented the threads can leave the barriers. So, the bottom half needs
+ to be queued before the counter is decremented. The top half is therefore
+ divided in two parts:
- things that can be run before queuing the bottom half
- things that must be run after queuing the bottom half
- This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
- we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
-*/
-
-static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
-{
- KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
- KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
- KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
- KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
-
- taskdata -> td_flags.complete = 1; // mark the task as completed
-
- if ( taskdata->td_taskgroup )
- KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
+ This creates a second race as the bottom half can free the task before the
+ second top half is executed. To avoid this we use the
+ td_incomplete_child_task of the proxy task to synchronize the top and bottom
+ half. */
+static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+ KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
+ taskdata->td_flags.complete = 1; // mark the task as completed
+
+ if (taskdata->td_taskgroup)
+ KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
+
+ // Create an imaginary children for this task so the bottom half cannot
+ // release the task before we have completed the second top half
+ TCI_4(taskdata->td_incomplete_child_tasks);
+}
+
+static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+ kmp_int32 children = 0;
+
+ // Predecrement simulated by "- 1" calculation
+ children =
+ KMP_TEST_THEN_DEC32(
+ (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) -
+ 1;
+ KMP_DEBUG_ASSERT(children >= 0);
+
+ // Remove the imaginary children
+ TCD_4(taskdata->td_incomplete_child_tasks);
+}
+
+static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
+ kmp_info_t *thread = __kmp_threads[gtid];
+
+ KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
+ KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
+ 1); // top half must run before bottom half
+
+ // We need to wait to make sure the top half is finished
+ // Spinning here should be ok as this should happen quickly
+ while (TCR_4(taskdata->td_incomplete_child_tasks) > 0)
+ ;
- // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
- TCI_4(taskdata->td_incomplete_child_tasks);
-}
-
-static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
-{
- kmp_int32 children = 0;
-
- // Predecrement simulated by "- 1" calculation
- children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
- KMP_DEBUG_ASSERT( children >= 0 );
-
- // Remove the imaginary children
- TCD_4(taskdata->td_incomplete_child_tasks);
-}
-
-static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
- kmp_info_t * thread = __kmp_threads[ gtid ];
-
- KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
- KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
-
- // We need to wait to make sure the top half is finished
- // Spinning here should be ok as this should happen quickly
- while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
-
- __kmp_release_deps(gtid,taskdata);
- __kmp_free_task_and_ancestors(gtid, taskdata, thread);
+ __kmp_release_deps(gtid, taskdata);
+ __kmp_free_task_and_ancestors(gtid, taskdata, thread);
}
/*!
@@ -3075,132 +3134,153 @@ static void __kmp_bottom_half_finish_pro
@param gtid Global Thread ID of encountering thread
@param ptask Task which execution is completed
-Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
+Execute the completation of a proxy task from a thread of that is part of the
+team. Run first and bottom halves directly.
*/
-void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
-{
- KMP_DEBUG_ASSERT( ptask != NULL );
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
- KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
-
- KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
-
- __kmp_first_top_half_finish_proxy(taskdata);
- __kmp_second_top_half_finish_proxy(taskdata);
- __kmp_bottom_half_finish_proxy(gtid,ptask);
-
- KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
+void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
+ KMP_DEBUG_ASSERT(ptask != NULL);
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
+ KA_TRACE(
+ 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
+ gtid, taskdata));
+
+ KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
+
+ __kmp_first_top_half_finish_proxy(taskdata);
+ __kmp_second_top_half_finish_proxy(taskdata);
+ __kmp_bottom_half_finish_proxy(gtid, ptask);
+
+ KA_TRACE(10,
+ ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
+ gtid, taskdata));
}
/*!
@ingroup TASKING
@param ptask Task which execution is completed
-Execute the completation of a proxy task from a thread that could not belong to the team.
+Execute the completation of a proxy task from a thread that could not belong to
+the team.
*/
-void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
-{
- KMP_DEBUG_ASSERT( ptask != NULL );
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
+void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
+ KMP_DEBUG_ASSERT(ptask != NULL);
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
- KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
+ KA_TRACE(
+ 10,
+ ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
+ taskdata));
- KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
+ KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
- __kmp_first_top_half_finish_proxy(taskdata);
+ __kmp_first_top_half_finish_proxy(taskdata);
- // Enqueue task to complete bottom half completion from a thread within the corresponding team
- kmp_team_t * team = taskdata->td_team;
- kmp_int32 nthreads = team->t.t_nproc;
- kmp_info_t *thread;
+ // Enqueue task to complete bottom half completion from a thread within the
+ // corresponding team
+ kmp_team_t *team = taskdata->td_team;
+ kmp_int32 nthreads = team->t.t_nproc;
+ kmp_info_t *thread;
- //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
- kmp_int32 start_k = 0;
- kmp_int32 pass = 1;
- kmp_int32 k = start_k;
+ // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
+ // but we cannot use __kmp_get_random here
+ kmp_int32 start_k = 0;
+ kmp_int32 pass = 1;
+ kmp_int32 k = start_k;
- do {
- //For now we're just linearly trying to find a thread
- thread = team->t.t_threads[k];
- k = (k+1) % nthreads;
+ do {
+ // For now we're just linearly trying to find a thread
+ thread = team->t.t_threads[k];
+ k = (k + 1) % nthreads;
- // we did a full pass through all the threads
- if ( k == start_k ) pass = pass << 1;
+ // we did a full pass through all the threads
+ if (k == start_k)
+ pass = pass << 1;
- } while ( !__kmp_give_task( thread, k, ptask, pass ) );
+ } while (!__kmp_give_task(thread, k, ptask, pass));
- __kmp_second_top_half_finish_proxy(taskdata);
+ __kmp_second_top_half_finish_proxy(taskdata);
- KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
+ KA_TRACE(
+ 10,
+ ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
+ taskdata));
}
-//---------------------------------------------------------------------------------
-// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
+// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
+// for taskloop
//
// thread: allocating thread
// task_src: pointer to source task to be duplicated
// returns: a pointer to the allocated kmp_task_t structure (task).
-kmp_task_t *
-__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
-{
- kmp_task_t *task;
- kmp_taskdata_t *taskdata;
- kmp_taskdata_t *taskdata_src;
- kmp_taskdata_t *parent_task = thread->th.th_current_task;
- size_t shareds_offset;
- size_t task_size;
-
- KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
- taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
- KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
- KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
- task_size = taskdata_src->td_size_alloc;
-
- // Allocate a kmp_taskdata_t block and a kmp_task_t block.
- KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
- #if USE_FAST_MEMORY
- taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
- #else
- taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
- #endif /* USE_FAST_MEMORY */
- KMP_MEMCPY(taskdata, taskdata_src, task_size);
-
- task = KMP_TASKDATA_TO_TASK(taskdata);
-
- // Initialize new task (only specific fields not affected by memcpy)
- taskdata->td_task_id = KMP_GEN_TASK_ID();
- if( task->shareds != NULL ) { // need setup shareds pointer
- shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
- task->shareds = &((char*)taskdata)[shareds_offset];
- KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
- }
- taskdata->td_alloc_thread = thread;
- taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
-
- // Only need to keep track of child task counts if team parallel and tasking not serialized
- if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
- KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
- if ( parent_task->td_taskgroup )
- KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
- // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
- if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
- KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
- }
-
- KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
- thread, taskdata, taskdata->td_parent) );
+kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
+ kmp_task_t *task;
+ kmp_taskdata_t *taskdata;
+ kmp_taskdata_t *taskdata_src;
+ kmp_taskdata_t *parent_task = thread->th.th_current_task;
+ size_t shareds_offset;
+ size_t task_size;
+
+ KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
+ task_src));
+ taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
+ KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
+ TASK_FULL); // it should not be proxy task
+ KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
+ task_size = taskdata_src->td_size_alloc;
+
+ // Allocate a kmp_taskdata_t block and a kmp_task_t block.
+ KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
+ task_size));
+#if USE_FAST_MEMORY
+ taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
+#else
+ taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
+#endif /* USE_FAST_MEMORY */
+ KMP_MEMCPY(taskdata, taskdata_src, task_size);
+
+ task = KMP_TASKDATA_TO_TASK(taskdata);
+
+ // Initialize new task (only specific fields not affected by memcpy)
+ taskdata->td_task_id = KMP_GEN_TASK_ID();
+ if (task->shareds != NULL) { // need setup shareds pointer
+ shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
+ task->shareds = &((char *)taskdata)[shareds_offset];
+ KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
+ 0);
+ }
+ taskdata->td_alloc_thread = thread;
+ taskdata->td_taskgroup =
+ parent_task
+ ->td_taskgroup; // task inherits the taskgroup from the parent task
+
+ // Only need to keep track of child task counts if team parallel and tasking
+ // not serialized
+ if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+ KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks));
+ if (parent_task->td_taskgroup)
+ KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
+ // Only need to keep track of allocated child tasks for explicit tasks since
+ // implicit not deallocated
+ if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
+ KMP_TEST_THEN_INC32(
+ (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks));
+ }
+
+ KA_TRACE(20,
+ ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
+ thread, taskdata, taskdata->td_parent));
#if OMPT_SUPPORT
- __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
+ __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
+ (void *)task->routine);
#endif
- return task;
+ return task;
}
// Routine optionally generated by th ecompiler for setting the lastprivate flag
// and calling needed constructors for private/firstprivate objects
// (used to form taskloop tasks from pattern task)
-typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
+typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
-//---------------------------------------------------------------------------------
// __kmp_taskloop_linear: Start tasks of the taskloop linearly
//
// loc Source location information
@@ -3212,114 +3292,120 @@ typedef void(*p_task_dup_t)(kmp_task_t *
// sched Schedule specified 0/1/2 for none/grainsize/num_tasks
// grainsize Schedule value if specified
// task_dup Tasks duplication routine
-void
-__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
- kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
- int sched, kmp_uint64 grainsize, void *task_dup )
-{
- KMP_COUNT_BLOCK(OMP_TASKLOOP);
- KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
- p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
- kmp_uint64 tc;
- kmp_uint64 lower = *lb; // compiler provides global bounds here
- kmp_uint64 upper = *ub;
- kmp_uint64 i, num_tasks = 0, extras = 0;
- kmp_info_t *thread = __kmp_threads[gtid];
- kmp_taskdata_t *current_task = thread->th.th_current_task;
- kmp_task_t *next_task;
- kmp_int32 lastpriv = 0;
- size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
- size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
-
- // compute trip count
- if ( st == 1 ) { // most common case
- tc = upper - lower + 1;
- } else if ( st < 0 ) {
- tc = (lower - upper) / (-st) + 1;
- } else { // st > 0
- tc = (upper - lower) / st + 1;
- }
- if(tc == 0) {
- KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
- // free the pattern task and exit
- __kmp_task_start( gtid, task, current_task );
- // do not execute anything for zero-trip loop
- __kmp_task_finish( gtid, task, current_task );
- return;
- }
-
- // compute num_tasks/grainsize based on the input provided
- switch( sched ) {
- case 0: // no schedule clause specified, we can choose the default
- // let's try to schedule (team_size*10) tasks
- grainsize = thread->th.th_team_nproc * 10;
- case 2: // num_tasks provided
- if( grainsize > tc ) {
- num_tasks = tc; // too big num_tasks requested, adjust values
- grainsize = 1;
- extras = 0;
- } else {
- num_tasks = grainsize;
- grainsize = tc / num_tasks;
- extras = tc % num_tasks;
- }
- break;
- case 1: // grainsize provided
- if( grainsize > tc ) {
- num_tasks = 1; // too big grainsize requested, adjust values
- grainsize = tc;
- extras = 0;
- } else {
- num_tasks = tc / grainsize;
- grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
- extras = tc % num_tasks;
- }
- break;
- default:
- KMP_ASSERT2(0, "unknown scheduling of taskloop");
+void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
+ kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+ int sched, kmp_uint64 grainsize, void *task_dup) {
+ KMP_COUNT_BLOCK(OMP_TASKLOOP);
+ KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
+ p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
+ kmp_uint64 tc;
+ kmp_uint64 lower = *lb; // compiler provides global bounds here
+ kmp_uint64 upper = *ub;
+ kmp_uint64 i, num_tasks = 0, extras = 0;
+ kmp_info_t *thread = __kmp_threads[gtid];
+ kmp_taskdata_t *current_task = thread->th.th_current_task;
+ kmp_task_t *next_task;
+ kmp_int32 lastpriv = 0;
+ size_t lower_offset =
+ (char *)lb - (char *)task; // remember offset of lb in the task structure
+ size_t upper_offset =
+ (char *)ub - (char *)task; // remember offset of ub in the task structure
+
+ // compute trip count
+ if (st == 1) { // most common case
+ tc = upper - lower + 1;
+ } else if (st < 0) {
+ tc = (lower - upper) / (-st) + 1;
+ } else { // st > 0
+ tc = (upper - lower) / st + 1;
+ }
+ if (tc == 0) {
+ KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
+ // free the pattern task and exit
+ __kmp_task_start(gtid, task, current_task);
+ // do not execute anything for zero-trip loop
+ __kmp_task_finish(gtid, task, current_task);
+ return;
+ }
+
+ // compute num_tasks/grainsize based on the input provided
+ switch (sched) {
+ case 0: // no schedule clause specified, we can choose the default
+ // let's try to schedule (team_size*10) tasks
+ grainsize = thread->th.th_team_nproc * 10;
+ case 2: // num_tasks provided
+ if (grainsize > tc) {
+ num_tasks = tc; // too big num_tasks requested, adjust values
+ grainsize = 1;
+ extras = 0;
+ } else {
+ num_tasks = grainsize;
+ grainsize = tc / num_tasks;
+ extras = tc % num_tasks;
+ }
+ break;
+ case 1: // grainsize provided
+ if (grainsize > tc) {
+ num_tasks = 1; // too big grainsize requested, adjust values
+ grainsize = tc;
+ extras = 0;
+ } else {
+ num_tasks = tc / grainsize;
+ grainsize =
+ tc /
+ num_tasks; // adjust grainsize for balanced distribution of iterations
+ extras = tc % num_tasks;
+ }
+ break;
+ default:
+ KMP_ASSERT2(0, "unknown scheduling of taskloop");
+ }
+ KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
+ KMP_DEBUG_ASSERT(num_tasks > extras);
+ KMP_DEBUG_ASSERT(num_tasks > 0);
+ KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize "
+ "%lld, extras %lld\n",
+ gtid, num_tasks, grainsize, extras));
+
+ // Main loop, launch num_tasks tasks, assign grainsize iterations each task
+ for (i = 0; i < num_tasks; ++i) {
+ kmp_uint64 chunk_minus_1;
+ if (extras == 0) {
+ chunk_minus_1 = grainsize - 1;
+ } else {
+ chunk_minus_1 = grainsize;
+ --extras; // first extras iterations get bigger chunk (grainsize+1)
}
- KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
- KMP_DEBUG_ASSERT(num_tasks > extras);
- KMP_DEBUG_ASSERT(num_tasks > 0);
- KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
- gtid, num_tasks, grainsize, extras));
-
- // Main loop, launch num_tasks tasks, assign grainsize iterations each task
- for( i = 0; i < num_tasks; ++i ) {
- kmp_uint64 chunk_minus_1;
- if( extras == 0 ) {
- chunk_minus_1 = grainsize - 1;
- } else {
- chunk_minus_1 = grainsize;
- --extras; // first extras iterations get bigger chunk (grainsize+1)
- }
- upper = lower + st * chunk_minus_1;
- if( i == num_tasks - 1 ) {
- // schedule the last task, set lastprivate flag
- lastpriv = 1;
+ upper = lower + st * chunk_minus_1;
+ if (i == num_tasks - 1) {
+ // schedule the last task, set lastprivate flag
+ lastpriv = 1;
#if KMP_DEBUG
- if( st == 1 )
- KMP_DEBUG_ASSERT(upper == *ub);
- else if( st > 0 )
- KMP_DEBUG_ASSERT(upper+st > *ub);
- else
- KMP_DEBUG_ASSERT(upper+st < *ub);
-#endif
- }
- next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
- *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
- *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
- if( ptask_dup != NULL )
- ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
- KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
- gtid, next_task, lower, upper, lower_offset, upper_offset));
- __kmp_omp_task(gtid, next_task, true); // schedule new task
- lower = upper + st; // adjust lower bound for the next iteration
- }
- // free the pattern task and exit
- __kmp_task_start( gtid, task, current_task );
- // do not execute the pattern task, just do bookkeeping
- __kmp_task_finish( gtid, task, current_task );
+ if (st == 1)
+ KMP_DEBUG_ASSERT(upper == *ub);
+ else if (st > 0)
+ KMP_DEBUG_ASSERT(upper + st > *ub);
+ else
+ KMP_DEBUG_ASSERT(upper + st < *ub);
+#endif
+ }
+ next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
+ *(kmp_uint64 *)((char *)next_task + lower_offset) =
+ lower; // adjust task-specific bounds
+ *(kmp_uint64 *)((char *)next_task + upper_offset) = upper;
+ if (ptask_dup != NULL)
+ ptask_dup(next_task, task,
+ lastpriv); // set lastprivate flag, construct fistprivates, etc.
+ KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper "
+ "%lld (offsets %p %p)\n",
+ gtid, next_task, lower, upper, lower_offset, upper_offset));
+ __kmp_omp_task(gtid, next_task, true); // schedule new task
+ lower = upper + st; // adjust lower bound for the next iteration
+ }
+ // free the pattern task and exit
+ __kmp_task_start(gtid, task, current_task);
+ // do not execute the pattern task, just do bookkeeping
+ __kmp_task_finish(gtid, task, current_task);
}
/*!
@@ -3338,34 +3424,34 @@ __kmp_taskloop_linear(ident_t *loc, int
Execute the taskloop construct.
*/
-void
-__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
- kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
- int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
-{
- kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
- KMP_DEBUG_ASSERT( task != NULL );
-
- KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
- gtid, taskdata, *lb, *ub, st, grainsize, sched));
-
- // check if clause value first
- if( if_val == 0 ) { // if(0) specified, mark task as serial
- taskdata->td_flags.task_serial = 1;
- taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
- }
- if( nogroup == 0 ) {
- __kmpc_taskgroup( loc, gtid );
- }
-
- if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
- __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
- }
-
- if( nogroup == 0 ) {
- __kmpc_end_taskgroup( loc, gtid );
- }
- KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
+void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+ kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
+ int sched, kmp_uint64 grainsize, void *task_dup) {
+ kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+ KMP_DEBUG_ASSERT(task != NULL);
+
+ KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub "
+ "%lld st %lld, grain %llu(%d)\n",
+ gtid, taskdata, *lb, *ub, st, grainsize, sched));
+
+ // check if clause value first
+ if (if_val == 0) { // if(0) specified, mark task as serial
+ taskdata->td_flags.task_serial = 1;
+ taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
+ }
+ if (nogroup == 0) {
+ __kmpc_taskgroup(loc, gtid);
+ }
+
+ if (1 /* AC: use some heuristic here to choose task scheduling method */) {
+ __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, sched, grainsize,
+ task_dup);
+ }
+
+ if (nogroup == 0) {
+ __kmpc_end_taskgroup(loc, gtid);
+ }
+ KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
}
#endif
Modified: openmp/trunk/runtime/src/kmp_taskq.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_taskq.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_taskq.cpp (original)
+++ openmp/trunk/runtime/src/kmp_taskq.cpp Fri May 12 13:01:32 2017
@@ -14,762 +14,748 @@
#include "kmp.h"
+#include "kmp_error.h"
#include "kmp_i18n.h"
#include "kmp_io.h"
-#include "kmp_error.h"
#define MAX_MESSAGE 512
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-/*
- * Taskq routines and global variables
- */
+/* Taskq routines and global variables */
-#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
+#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
#define THREAD_ALLOC_FOR_TASKQ
-static int
-in_parallel_context( kmp_team_t *team )
-{
- return ! team -> t.t_serialized;
-}
-
-static void
-__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
-{
- int gtid = *gtid_ref;
- int tid = __kmp_tid_from_gtid( gtid );
- kmp_uint32 my_token;
- kmpc_task_queue_t *taskq;
- kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
+static int in_parallel_context(kmp_team_t *team) {
+ return !team->t.t_serialized;
+}
+
+static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
+ int gtid = *gtid_ref;
+ int tid = __kmp_tid_from_gtid(gtid);
+ kmp_uint32 my_token;
+ kmpc_task_queue_t *taskq;
+ kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
- if ( __kmp_env_consistency_check )
+ if (__kmp_env_consistency_check)
#if KMP_USE_DYNAMIC_LOCK
- __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 );
+ __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0);
#else
- __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
+ __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL);
#endif
- if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
- KMP_MB(); /* Flush all pending memory write invalidates. */
+ if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
+ KMP_MB(); /* Flush all pending memory write invalidates. */
- /* GEH - need check here under stats to make sure */
- /* inside task (curr_thunk[*tid_ref] != NULL) */
+ /* GEH - need check here under stats to make sure */
+ /* inside task (curr_thunk[*tid_ref] != NULL) */
- my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
+ my_token = tq->tq_curr_thunk[tid]->th_tasknum;
- taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
+ taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
- KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
- KMP_MB();
- }
+ KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
+ KMP_MB();
+ }
}
-static void
-__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
-{
- int gtid = *gtid_ref;
- int tid = __kmp_tid_from_gtid( gtid );
- kmp_uint32 my_token;
- kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
+static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
+ int gtid = *gtid_ref;
+ int tid = __kmp_tid_from_gtid(gtid);
+ kmp_uint32 my_token;
+ kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
- if ( __kmp_env_consistency_check )
- __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
+ if (__kmp_env_consistency_check)
+ __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref);
- if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
- KMP_MB(); /* Flush all pending memory write invalidates. */
+ if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
+ KMP_MB(); /* Flush all pending memory write invalidates. */
- /* GEH - need check here under stats to make sure */
- /* inside task (curr_thunk[tid] != NULL) */
+ /* GEH - need check here under stats to make sure */
+ /* inside task (curr_thunk[tid] != NULL) */
- my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
+ my_token = tq->tq_curr_thunk[tid]->th_tasknum;
- KMP_MB(); /* Flush all pending memory write invalidates. */
+ KMP_MB(); /* Flush all pending memory write invalidates. */
- tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
+ tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving =
+ my_token + 1;
- KMP_MB(); /* Flush all pending memory write invalidates. */
- }
+ KMP_MB(); /* Flush all pending memory write invalidates. */
+ }
}
-static void
-__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
-{
- kmp_uint32 my_token;
- kmpc_task_queue_t *taskq;
+static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
+ kmp_uint32 my_token;
+ kmpc_task_queue_t *taskq;
- /* assume we are always called from an active parallel context */
+ /* assume we are always called from an active parallel context */
- KMP_MB(); /* Flush all pending memory write invalidates. */
+ KMP_MB(); /* Flush all pending memory write invalidates. */
- my_token = thunk -> th_tasknum;
+ my_token = thunk->th_tasknum;
- taskq = thunk -> th.th_shareds -> sv_queue;
+ taskq = thunk->th.th_shareds->sv_queue;
- if(taskq->tq_tasknum_serving <= my_token) {
- KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
- KMP_MB();
- taskq->tq_tasknum_serving = my_token +1;
- KMP_MB();
- }
+ if (taskq->tq_tasknum_serving <= my_token) {
+ KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
+ KMP_MB();
+ taskq->tq_tasknum_serving = my_token + 1;
+ KMP_MB();
+ }
}
#ifdef KMP_DEBUG
-static void
-__kmp_dump_TQF(kmp_int32 flags)
-{
- if (flags & TQF_IS_ORDERED)
- __kmp_printf("ORDERED ");
- if (flags & TQF_IS_LASTPRIVATE)
- __kmp_printf("LAST_PRIV ");
- if (flags & TQF_IS_NOWAIT)
- __kmp_printf("NOWAIT ");
- if (flags & TQF_HEURISTICS)
- __kmp_printf("HEURIST ");
- if (flags & TQF_INTERFACE_RESERVED1)
- __kmp_printf("RESERV1 ");
- if (flags & TQF_INTERFACE_RESERVED2)
- __kmp_printf("RESERV2 ");
- if (flags & TQF_INTERFACE_RESERVED3)
- __kmp_printf("RESERV3 ");
- if (flags & TQF_INTERFACE_RESERVED4)
- __kmp_printf("RESERV4 ");
- if (flags & TQF_IS_LAST_TASK)
- __kmp_printf("LAST_TASK ");
- if (flags & TQF_TASKQ_TASK)
- __kmp_printf("TASKQ_TASK ");
- if (flags & TQF_RELEASE_WORKERS)
- __kmp_printf("RELEASE ");
- if (flags & TQF_ALL_TASKS_QUEUED)
- __kmp_printf("ALL_QUEUED ");
- if (flags & TQF_PARALLEL_CONTEXT)
- __kmp_printf("PARALLEL ");
- if (flags & TQF_DEALLOCATED)
- __kmp_printf("DEALLOC ");
- if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
- __kmp_printf("(NONE)");
-}
-
-static void
-__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
-{
- int i;
- int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
+static void __kmp_dump_TQF(kmp_int32 flags) {
+ if (flags & TQF_IS_ORDERED)
+ __kmp_printf("ORDERED ");
+ if (flags & TQF_IS_LASTPRIVATE)
+ __kmp_printf("LAST_PRIV ");
+ if (flags & TQF_IS_NOWAIT)
+ __kmp_printf("NOWAIT ");
+ if (flags & TQF_HEURISTICS)
+ __kmp_printf("HEURIST ");
+ if (flags & TQF_INTERFACE_RESERVED1)
+ __kmp_printf("RESERV1 ");
+ if (flags & TQF_INTERFACE_RESERVED2)
+ __kmp_printf("RESERV2 ");
+ if (flags & TQF_INTERFACE_RESERVED3)
+ __kmp_printf("RESERV3 ");
+ if (flags & TQF_INTERFACE_RESERVED4)
+ __kmp_printf("RESERV4 ");
+ if (flags & TQF_IS_LAST_TASK)
+ __kmp_printf("LAST_TASK ");
+ if (flags & TQF_TASKQ_TASK)
+ __kmp_printf("TASKQ_TASK ");
+ if (flags & TQF_RELEASE_WORKERS)
+ __kmp_printf("RELEASE ");
+ if (flags & TQF_ALL_TASKS_QUEUED)
+ __kmp_printf("ALL_QUEUED ");
+ if (flags & TQF_PARALLEL_CONTEXT)
+ __kmp_printf("PARALLEL ");
+ if (flags & TQF_DEALLOCATED)
+ __kmp_printf("DEALLOC ");
+ if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS)))
+ __kmp_printf("(NONE)");
+}
+
+static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk,
+ kmp_int32 global_tid) {
+ int i;
+ int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
- __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid);
+ __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid);
- if (thunk != NULL) {
- for (i = 0; i < nproc; i++) {
- if( tq->tq_curr_thunk[i] == thunk ) {
- __kmp_printf("[%i] ", i);
- }
- }
- __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
- __kmp_printf("th_task=%p, ", thunk->th_task);
- __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
- __kmp_printf("th_status=%d, ", thunk->th_status);
- __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
- __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags);
+ if (thunk != NULL) {
+ for (i = 0; i < nproc; i++) {
+ if (tq->tq_curr_thunk[i] == thunk) {
+ __kmp_printf("[%i] ", i);
+ }
}
+ __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
+ __kmp_printf("th_task=%p, ", thunk->th_task);
+ __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
+ __kmp_printf("th_status=%d, ", thunk->th_status);
+ __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
+ __kmp_printf("th_flags=");
+ __kmp_dump_TQF(thunk->th_flags);
+ }
- __kmp_printf("\n");
+ __kmp_printf("\n");
}
-static void
-__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
-{
- kmpc_thunk_t *th;
+static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) {
+ kmpc_thunk_t *th;
- __kmp_printf(" Thunk stack for T#%d: ", thread_num);
+ __kmp_printf(" Thunk stack for T#%d: ", thread_num);
- for (th = thunk; th != NULL; th = th->th_encl_thunk )
- __kmp_printf("%p ", th);
+ for (th = thunk; th != NULL; th = th->th_encl_thunk)
+ __kmp_printf("%p ", th);
- __kmp_printf("\n");
+ __kmp_printf("\n");
}
-static void
-__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
-{
- int qs, count, i;
- kmpc_thunk_t *thunk;
- kmpc_task_queue_t *taskq;
+static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue,
+ kmp_int32 global_tid) {
+ int qs, count, i;
+ kmpc_thunk_t *thunk;
+ kmpc_task_queue_t *taskq;
- __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
+ __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
- if (queue != NULL) {
- int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
+ if (queue != NULL) {
+ int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
- if ( __kmp_env_consistency_check ) {
- __kmp_printf(" tq_loc : ");
+ if (__kmp_env_consistency_check) {
+ __kmp_printf(" tq_loc : ");
}
- if (in_parallel) {
+ if (in_parallel) {
- //if (queue->tq.tq_parent != 0)
- //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+ // if (queue->tq.tq_parent != 0)
+ //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
+ //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent);
- __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child);
- __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child);
- __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child);
- __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count);
+ __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent);
+ __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child);
+ __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child);
+ __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child);
+ __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count);
- //__kmp_release_lock(& queue->tq_link_lck, global_tid);
+ //__kmp_release_lock(& queue->tq_link_lck, global_tid);
- //if (queue->tq.tq_parent != 0)
- //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+ // if (queue->tq.tq_parent != 0)
+ //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
- //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+ //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
+ //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
- }
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+ }
- __kmp_printf(" tq_shareds : ");
- for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
- __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
- __kmp_printf("\n");
+ __kmp_printf(" tq_shareds : ");
+ for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
+ __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
+ __kmp_printf("\n");
- if (in_parallel) {
- __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
- __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
- }
+ if (in_parallel) {
+ __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
+ __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
+ }
- __kmp_printf(" tq_queue : %p\n", queue->tq_queue);
- __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space);
- __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
-
- __kmp_printf(" tq_free_thunks : ");
- for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
- __kmp_printf("%p ", thunk);
- __kmp_printf("\n");
+ __kmp_printf(" tq_queue : %p\n", queue->tq_queue);
+ __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space);
+ __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
+
+ __kmp_printf(" tq_free_thunks : ");
+ for (thunk = queue->tq_free_thunks; thunk != NULL;
+ thunk = thunk->th.th_next_free)
+ __kmp_printf("%p ", thunk);
+ __kmp_printf("\n");
- __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots);
- __kmp_printf(" tq_head : %d\n", queue->tq_head);
- __kmp_printf(" tq_tail : %d\n", queue->tq_tail);
- __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull);
- __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat);
- __kmp_printf(" tq_flags : "); __kmp_dump_TQF(queue->tq_flags);
- __kmp_printf("\n");
+ __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots);
+ __kmp_printf(" tq_head : %d\n", queue->tq_head);
+ __kmp_printf(" tq_tail : %d\n", queue->tq_tail);
+ __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull);
+ __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat);
+ __kmp_printf(" tq_flags : ");
+ __kmp_dump_TQF(queue->tq_flags);
+ __kmp_printf("\n");
- if (in_parallel) {
- __kmp_printf(" tq_th_thunks : ");
- for (i = 0; i < queue->tq_nproc; i++) {
- __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
- }
- __kmp_printf("\n");
- }
+ if (in_parallel) {
+ __kmp_printf(" tq_th_thunks : ");
+ for (i = 0; i < queue->tq_nproc; i++) {
+ __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
+ }
+ __kmp_printf("\n");
+ }
- __kmp_printf("\n");
- __kmp_printf(" Queue slots:\n");
+ __kmp_printf("\n");
+ __kmp_printf(" Queue slots:\n");
+ qs = queue->tq_tail;
+ for (count = 0; count < queue->tq_nfull; ++count) {
+ __kmp_printf("(%d)", qs);
+ __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid);
+ qs = (qs + 1) % queue->tq_nslots;
+ }
- qs = queue->tq_tail;
- for ( count = 0; count < queue->tq_nfull; ++count ) {
- __kmp_printf("(%d)", qs);
- __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
- qs = (qs+1) % queue->tq_nslots;
- }
+ __kmp_printf("\n");
+ if (in_parallel) {
+ if (queue->tq_taskq_slot != NULL) {
+ __kmp_printf(" TaskQ slot:\n");
+ __kmp_dump_thunk(tq, (kmpc_thunk_t *)queue->tq_taskq_slot, global_tid);
__kmp_printf("\n");
-
- if (in_parallel) {
- if (queue->tq_taskq_slot != NULL) {
- __kmp_printf(" TaskQ slot:\n");
- __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
- __kmp_printf("\n");
- }
- //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
- //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
- }
+ }
+ //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
+ //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
}
+ }
- __kmp_printf(" Taskq freelist: ");
+ __kmp_printf(" Taskq freelist: ");
- //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
+ //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
- __kmp_printf("%p ", taskq);
+ for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free)
+ __kmp_printf("%p ", taskq);
- //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+ //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
- __kmp_printf("\n\n");
+ __kmp_printf("\n\n");
}
-static void
-__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
-{
- int i, count, qs;
- int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
- kmpc_task_queue_t *queue = curr_queue;
+static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq,
+ kmpc_task_queue_t *curr_queue,
+ kmp_int32 level,
+ kmp_int32 global_tid) {
+ int i, count, qs;
+ int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
+ kmpc_task_queue_t *queue = curr_queue;
- if (curr_queue == NULL)
- return;
+ if (curr_queue == NULL)
+ return;
- __kmp_printf(" ");
+ __kmp_printf(" ");
- for (i=0; i<level; i++)
- __kmp_printf(" ");
+ for (i = 0; i < level; i++)
+ __kmp_printf(" ");
- __kmp_printf("%p", curr_queue);
+ __kmp_printf("%p", curr_queue);
- for (i = 0; i < nproc; i++) {
- if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
- __kmp_printf(" [%i]", i);
- }
+ for (i = 0; i < nproc; i++) {
+ if (tq->tq_curr_thunk[i] &&
+ tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) {
+ __kmp_printf(" [%i]", i);
}
+ }
- __kmp_printf(":");
+ __kmp_printf(":");
- //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
+ //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- qs = curr_queue->tq_tail;
+ qs = curr_queue->tq_tail;
- for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
- __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
- qs = (qs+1) % curr_queue->tq_nslots;
- }
+ for (count = 0; count < curr_queue->tq_nfull; ++count) {
+ __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
+ qs = (qs + 1) % curr_queue->tq_nslots;
+ }
- //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
+ //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
- __kmp_printf("\n");
-
- if (curr_queue->tq_first_child) {
- //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+ __kmp_printf("\n");
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ if (curr_queue->tq_first_child) {
+ //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
- if (curr_queue->tq_first_child) {
- for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
- queue != NULL;
- queue = queue->tq_next_child) {
- __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
- }
- }
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+ if (curr_queue->tq_first_child) {
+ for (queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+ queue != NULL; queue = queue->tq_next_child) {
+ __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid);
+ }
}
+
+ //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+ }
}
-static void
-__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
-{
- __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
+static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq,
+ kmpc_task_queue_t *tqroot,
+ kmp_int32 global_tid) {
+ __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
- __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
+ __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid);
- __kmp_printf("\n");
+ __kmp_printf("\n");
}
#endif
-/* --------------------------------------------------------------------------- */
-
-/*
- New taskq storage routines that try to minimize overhead of mallocs but
- still provide cache line alignment.
-*/
-
+/* New taskq storage routines that try to minimize overhead of mallocs but
+ still provide cache line alignment. */
+static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) {
+ void *addr, *orig_addr;
+ size_t bytes;
-static void *
-__kmp_taskq_allocate(size_t size, kmp_int32 global_tid)
-{
- void *addr, *orig_addr;
- size_t bytes;
+ KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size,
+ global_tid));
- KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) );
-
- bytes = sizeof(void *) + CACHE_LINE + size;
+ bytes = sizeof(void *) + CACHE_LINE + size;
#ifdef THREAD_ALLOC_FOR_TASKQ
- orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
+ orig_addr =
+ (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes);
#else
- KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) );
- orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes );
+ KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes));
+ orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes);
#endif /* THREAD_ALLOC_FOR_TASKQ */
- if (orig_addr == 0)
- KMP_FATAL( OutOfHeapMemory );
+ if (orig_addr == 0)
+ KMP_FATAL(OutOfHeapMemory);
- addr = orig_addr;
+ addr = orig_addr;
- if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
- KB_TRACE( 50, ("__kmp_taskq_allocate: adjust for cache alignment\n" ) );
- addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
- }
+ if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) {
+ KB_TRACE(50, ("__kmp_taskq_allocate: adjust for cache alignment\n"));
+ addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1));
+ }
- (* (void **) addr) = orig_addr;
+ (*(void **)addr) = orig_addr;
- KB_TRACE( 10, ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
- orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1,
- (int) size, global_tid ));
+ KB_TRACE(10,
+ ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, "
+ "gtid: %d\n",
+ orig_addr, ((void **)addr) + 1,
+ ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid));
- return ( ((void **) addr) + 1 );
+ return (((void **)addr) + 1);
}
-static void
-__kmpc_taskq_free(void *p, kmp_int32 global_tid)
-{
- KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
+static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) {
+ KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid));
- KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid ));
+ KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n",
+ (*(((void **)p) - 1)), global_tid));
#ifdef THREAD_ALLOC_FOR_TASKQ
- __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) );
+ __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1));
#else
- KMP_INTERNAL_FREE( *( ((void **) p)-1) );
+ KMP_INTERNAL_FREE(*(((void **)p) - 1));
#endif /* THREAD_ALLOC_FOR_TASKQ */
}
-/* --------------------------------------------------------------------------- */
-
-/*
- * Keep freed kmpc_task_queue_t on an internal freelist and recycle since
- * they're of constant size.
- */
+/* Keep freed kmpc_task_queue_t on an internal freelist and recycle since
+ they're of constant size. */
static kmpc_task_queue_t *
-__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
- kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk,
- size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
-{
- kmp_int32 i;
- size_t bytes;
- kmpc_task_queue_t *new_queue;
- kmpc_aligned_shared_vars_t *shared_var_array;
- char *shared_var_storage;
- char *pt; /* for doing byte-adjusted address computations */
-
- __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- if( tq->tq_freelist ) {
- new_queue = tq -> tq_freelist;
- tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free;
-
- KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
-
- new_queue->tq_flags = 0;
-
- __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
- }
- else {
- __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
-
- new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid);
- new_queue->tq_flags = 0;
- }
-
- /* space in the task queue for queue slots (allocate as one big chunk */
- /* of storage including new_taskq_task space) */
-
- sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */
- pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
- new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
- *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
-
- /* chain the allocated thunks into a freelist for this queue */
-
- new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
-
- for (i = 0; i < (nthunks - 2); i++) {
- ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
+__kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots,
+ kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc,
+ size_t sizeof_thunk, size_t sizeof_shareds,
+ kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) {
+ kmp_int32 i;
+ size_t bytes;
+ kmpc_task_queue_t *new_queue;
+ kmpc_aligned_shared_vars_t *shared_var_array;
+ char *shared_var_storage;
+ char *pt; /* for doing byte-adjusted address computations */
+
+ __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
+
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+
+ if (tq->tq_freelist) {
+ new_queue = tq->tq_freelist;
+ tq->tq_freelist = tq->tq_freelist->tq.tq_next_free;
+
+ KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
+
+ new_queue->tq_flags = 0;
+
+ __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
+ } else {
+ __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
+
+ new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate(
+ sizeof(kmpc_task_queue_t), global_tid);
+ new_queue->tq_flags = 0;
+ }
+
+ /* space in the task queue for queue slots (allocate as one big chunk */
+ /* of storage including new_taskq_task space) */
+
+ sizeof_thunk +=
+ (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */
+ pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid);
+ new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
+ *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
+
+ /* chain the allocated thunks into a freelist for this queue */
+
+ new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
+
+ for (i = 0; i < (nthunks - 2); i++) {
+ ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free =
+ (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk);
#ifdef KMP_DEBUG
- ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
+ ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED;
#endif
- }
+ }
- ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
+ ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL;
#ifdef KMP_DEBUG
- ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
+ ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags =
+ TQF_DEALLOCATED;
#endif
- /* initialize the locks */
+ /* initialize the locks */
- if (in_parallel) {
- __kmp_init_lock( & new_queue->tq_link_lck );
- __kmp_init_lock( & new_queue->tq_free_thunks_lck );
- __kmp_init_lock( & new_queue->tq_queue_lck );
- }
+ if (in_parallel) {
+ __kmp_init_lock(&new_queue->tq_link_lck);
+ __kmp_init_lock(&new_queue->tq_free_thunks_lck);
+ __kmp_init_lock(&new_queue->tq_queue_lck);
+ }
- /* now allocate the slots */
+ /* now allocate the slots */
- bytes = nslots * sizeof (kmpc_aligned_queue_slot_t);
- new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
+ bytes = nslots * sizeof(kmpc_aligned_queue_slot_t);
+ new_queue->tq_queue =
+ (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid);
- /* space for array of pointers to shared variable structures */
- sizeof_shareds += sizeof(kmpc_task_queue_t *);
- sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */
+ /* space for array of pointers to shared variable structures */
+ sizeof_shareds += sizeof(kmpc_task_queue_t *);
+ sizeof_shareds +=
+ (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */
- bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t);
- shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
+ bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t);
+ shared_var_array =
+ (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid);
- bytes = nshareds * sizeof_shareds;
- shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid);
-
- for (i=0; i<nshareds; i++) {
- shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
- shared_var_array[i].ai_data->sv_queue = new_queue;
- }
- new_queue->tq_shareds = shared_var_array;
+ bytes = nshareds * sizeof_shareds;
+ shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid);
+ for (i = 0; i < nshareds; i++) {
+ shared_var_array[i].ai_data =
+ (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds);
+ shared_var_array[i].ai_data->sv_queue = new_queue;
+ }
+ new_queue->tq_shareds = shared_var_array;
- /* array for number of outstanding thunks per thread */
+ /* array for number of outstanding thunks per thread */
- if (in_parallel) {
- bytes = nproc * sizeof(kmpc_aligned_int32_t);
- new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
- new_queue->tq_nproc = nproc;
+ if (in_parallel) {
+ bytes = nproc * sizeof(kmpc_aligned_int32_t);
+ new_queue->tq_th_thunks =
+ (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid);
+ new_queue->tq_nproc = nproc;
- for (i=0; i<nproc; i++)
- new_queue->tq_th_thunks[i].ai_data = 0;
- }
+ for (i = 0; i < nproc; i++)
+ new_queue->tq_th_thunks[i].ai_data = 0;
+ }
- return new_queue;
+ return new_queue;
}
-static void
-__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid)
-{
- __kmpc_taskq_free(p->tq_thunk_space, global_tid);
- __kmpc_taskq_free(p->tq_queue, global_tid);
+static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p,
+ int in_parallel, kmp_int32 global_tid) {
+ __kmpc_taskq_free(p->tq_thunk_space, global_tid);
+ __kmpc_taskq_free(p->tq_queue, global_tid);
- /* free shared var structure storage */
- __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid);
+ /* free shared var structure storage */
+ __kmpc_taskq_free((void *)p->tq_shareds[0].ai_data, global_tid);
- /* free array of pointers to shared vars storage */
- __kmpc_taskq_free(p->tq_shareds, global_tid);
+ /* free array of pointers to shared vars storage */
+ __kmpc_taskq_free(p->tq_shareds, global_tid);
#ifdef KMP_DEBUG
- p->tq_first_child = NULL;
- p->tq_next_child = NULL;
- p->tq_prev_child = NULL;
- p->tq_ref_count = -10;
- p->tq_shareds = NULL;
- p->tq_tasknum_queuing = 0;
- p->tq_tasknum_serving = 0;
- p->tq_queue = NULL;
- p->tq_thunk_space = NULL;
- p->tq_taskq_slot = NULL;
- p->tq_free_thunks = NULL;
- p->tq_nslots = 0;
- p->tq_head = 0;
- p->tq_tail = 0;
- p->tq_nfull = 0;
- p->tq_hiwat = 0;
+ p->tq_first_child = NULL;
+ p->tq_next_child = NULL;
+ p->tq_prev_child = NULL;
+ p->tq_ref_count = -10;
+ p->tq_shareds = NULL;
+ p->tq_tasknum_queuing = 0;
+ p->tq_tasknum_serving = 0;
+ p->tq_queue = NULL;
+ p->tq_thunk_space = NULL;
+ p->tq_taskq_slot = NULL;
+ p->tq_free_thunks = NULL;
+ p->tq_nslots = 0;
+ p->tq_head = 0;
+ p->tq_tail = 0;
+ p->tq_nfull = 0;
+ p->tq_hiwat = 0;
- if (in_parallel) {
- int i;
+ if (in_parallel) {
+ int i;
- for (i=0; i<p->tq_nproc; i++)
- p->tq_th_thunks[i].ai_data = 0;
- }
- if ( __kmp_env_consistency_check )
- p->tq_loc = NULL;
- KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
- p->tq_flags = TQF_DEALLOCATED;
+ for (i = 0; i < p->tq_nproc; i++)
+ p->tq_th_thunks[i].ai_data = 0;
+ }
+ if (__kmp_env_consistency_check)
+ p->tq_loc = NULL;
+ KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED);
+ p->tq_flags = TQF_DEALLOCATED;
#endif /* KMP_DEBUG */
- if (in_parallel) {
- __kmpc_taskq_free(p->tq_th_thunks, global_tid);
- __kmp_destroy_lock(& p->tq_link_lck);
- __kmp_destroy_lock(& p->tq_queue_lck);
- __kmp_destroy_lock(& p->tq_free_thunks_lck);
- }
+ if (in_parallel) {
+ __kmpc_taskq_free(p->tq_th_thunks, global_tid);
+ __kmp_destroy_lock(&p->tq_link_lck);
+ __kmp_destroy_lock(&p->tq_queue_lck);
+ __kmp_destroy_lock(&p->tq_free_thunks_lck);
+ }
#ifdef KMP_DEBUG
- p->tq_th_thunks = NULL;
+ p->tq_th_thunks = NULL;
#endif /* KMP_DEBUG */
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
- p->tq.tq_next_free = tq->tq_freelist;
+ __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
+ p->tq.tq_next_free = tq->tq_freelist;
- tq->tq_freelist = p;
- __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+ tq->tq_freelist = p;
+ __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
}
-/*
- * Once a group of thunks has been allocated for use in a particular queue,
- * these are managed via a per-queue freelist.
- * We force a check that there's always a thunk free if we need one.
- */
+/* Once a group of thunks has been allocated for use in a particular queue,
+ these are managed via a per-queue freelist.
+ We force a check that there's always a thunk free if we need one. */
-static kmpc_thunk_t *
-__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid)
-{
- kmpc_thunk_t *fl;
+static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue,
+ int in_parallel, kmp_int32 global_tid) {
+ kmpc_thunk_t *fl;
- if (in_parallel) {
- __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
- }
+ if (in_parallel) {
+ __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+ }
- fl = queue->tq_free_thunks;
+ fl = queue->tq_free_thunks;
- KMP_DEBUG_ASSERT (fl != NULL);
+ KMP_DEBUG_ASSERT(fl != NULL);
- queue->tq_free_thunks = fl->th.th_next_free;
- fl->th_flags = 0;
+ queue->tq_free_thunks = fl->th.th_next_free;
+ fl->th_flags = 0;
- if (in_parallel)
- __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+ if (in_parallel)
+ __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
- return fl;
+ return fl;
}
-static void
-__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid)
-{
+static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p,
+ int in_parallel, kmp_int32 global_tid) {
#ifdef KMP_DEBUG
- p->th_task = 0;
- p->th_encl_thunk = 0;
- p->th_status = 0;
- p->th_tasknum = 0;
- /* Also could zero pointers to private vars */
+ p->th_task = 0;
+ p->th_encl_thunk = 0;
+ p->th_status = 0;
+ p->th_tasknum = 0;
+/* Also could zero pointers to private vars */
#endif
- if (in_parallel) {
- __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
- }
+ if (in_parallel) {
+ __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+ }
- p->th.th_next_free = queue->tq_free_thunks;
- queue->tq_free_thunks = p;
+ p->th.th_next_free = queue->tq_free_thunks;
+ queue->tq_free_thunks = p;
#ifdef KMP_DEBUG
- p->th_flags = TQF_DEALLOCATED;
+ p->th_flags = TQF_DEALLOCATED;
#endif
- if (in_parallel)
- __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+ if (in_parallel)
+ __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
}
-/* --------------------------------------------------------------------------- */
-
/* returns nonzero if the queue just became full after the enqueue */
+static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid,
+ kmpc_task_queue_t *queue,
+ kmpc_thunk_t *thunk, int in_parallel) {
+ kmp_int32 ret;
+
+ /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the
+ * master is executing then) */
+ if (in_parallel) {
+ __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+ }
-static kmp_int32
-__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel )
-{
- kmp_int32 ret;
-
- /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then) */
- if (in_parallel) {
- __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
- }
-
- KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots); /* check queue not full */
-
- queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
-
- if (queue->tq_head >= queue->tq_nslots)
- queue->tq_head = 0;
+ KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots); // check queue not full
- (queue->tq_nfull)++;
+ queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
- KMP_MB(); /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */
+ if (queue->tq_head >= queue->tq_nslots)
+ queue->tq_head = 0;
- ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
+ (queue->tq_nfull)++;
- if (in_parallel) {
- /* don't need to wait until workers are released before unlocking */
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+ KMP_MB(); /* to assure that nfull is seen to increase before
+ TQF_ALL_TASKS_QUEUED is set */
- if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
- /* If just creating the root queue, the worker threads are waiting at */
- /* a join barrier until now, when there's something in the queue for */
- /* them to do; release them now to do work. */
- /* This should only be done when this is the first task enqueued, */
- /* so reset the flag here also. */
+ ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
- tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers are still in spin mode */
+ if (in_parallel) {
+ /* don't need to wait until workers are released before unlocking */
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
- KMP_MB(); /* avoid releasing barrier twice if taskq_task switches threads */
+ if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
+ // If just creating the root queue, the worker threads are waiting at a
+ // join barrier until now, when there's something in the queue for them to
+ // do; release them now to do work. This should only be done when this is
+ // the first task enqueued, so reset the flag here also.
+ tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers
+ are still in spin mode */
+ // avoid releasing barrier twice if taskq_task switches threads
+ KMP_MB();
- __kmpc_end_barrier_master( NULL, global_tid);
- }
+ __kmpc_end_barrier_master(NULL, global_tid);
}
+ }
- return ret;
+ return ret;
}
-static kmpc_thunk_t *
-__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel)
-{
- kmpc_thunk_t *pt;
- int tid = __kmp_tid_from_gtid( global_tid );
+static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid,
+ kmpc_task_queue_t *queue,
+ int in_parallel) {
+ kmpc_thunk_t *pt;
+ int tid = __kmp_tid_from_gtid(global_tid);
- KMP_DEBUG_ASSERT (queue->tq_nfull > 0); /* check queue not empty */
+ KMP_DEBUG_ASSERT(queue->tq_nfull > 0); /* check queue not empty */
- if (queue->tq.tq_parent != NULL && in_parallel) {
- int ct;
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- ct = ++(queue->tq_ref_count);
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
- __LINE__, global_tid, queue, ct));
- }
+ if (queue->tq.tq_parent != NULL && in_parallel) {
+ int ct;
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ ct = ++(queue->tq_ref_count);
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
+ }
- pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
+ pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
- if (queue->tq_tail >= queue->tq_nslots)
- queue->tq_tail = 0;
+ if (queue->tq_tail >= queue->tq_nslots)
+ queue->tq_tail = 0;
- if (in_parallel) {
- queue->tq_th_thunks[tid].ai_data++;
+ if (in_parallel) {
+ queue->tq_th_thunks[tid].ai_data++;
- KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */
+ KMP_MB(); /* necessary so ai_data increment is propagated to other threads
+ immediately (digital) */
- KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
- global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
- }
+ KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding "
+ "thunks from queue %p\n",
+ global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
+ }
- (queue->tq_nfull)--;
+ (queue->tq_nfull)--;
#ifdef KMP_DEBUG
- KMP_MB();
+ KMP_MB();
- /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */
+ /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is
+ * decremented */
- KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
+ KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
- if (in_parallel) {
- KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
- }
+ if (in_parallel) {
+ KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=
+ __KMP_TASKQ_THUNKS_PER_TH);
+ }
#endif
- return pt;
+ return pt;
}
-/*
- * Find the next (non-null) task to dequeue and return it.
+/* Find the next (non-null) task to dequeue and return it.
* This is never called unless in_parallel=TRUE
*
* Here are the rules for deciding which queue to take the task from:
@@ -792,1241 +778,1252 @@ __kmp_dequeue_task (kmp_int32 global_tid
* TQF_IS_LASTPRIVATE).
*/
-static kmpc_thunk_t *
-__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
-{
- kmpc_thunk_t *pt = NULL;
- int tid = __kmp_tid_from_gtid( global_tid );
-
- /* To prevent deadlock from tq_queue_lck if queue already deallocated */
- if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
-
- __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-
- /* Check again to avoid race in __kmpc_end_taskq() */
- if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
- /* if there's enough room in the queue and the dispatcher */
- /* (taskq task) is available, schedule more tasks */
- pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
- queue->tq_taskq_slot = NULL;
- }
- else if (queue->tq_nfull == 0 ||
- queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
- /* do nothing if no thunks available or this thread can't */
- /* run any because it already is executing too many */
-
- pt = NULL;
- }
- else if (queue->tq_nfull > 1) {
- /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */
+static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid,
+ kmpc_task_queue_t *queue) {
+ kmpc_thunk_t *pt = NULL;
+ int tid = __kmp_tid_from_gtid(global_tid);
+
+ /* To prevent deadlock from tq_queue_lck if queue already deallocated */
+ if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+
+ __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
+
+ /* Check again to avoid race in __kmpc_end_taskq() */
+ if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+
+ if ((queue->tq_taskq_slot != NULL) &&
+ (queue->tq_nfull <= queue->tq_hiwat)) {
+ /* if there's enough room in the queue and the dispatcher */
+ /* (taskq task) is available, schedule more tasks */
+ pt = (kmpc_thunk_t *)queue->tq_taskq_slot;
+ queue->tq_taskq_slot = NULL;
+ } else if (queue->tq_nfull == 0 ||
+ queue->tq_th_thunks[tid].ai_data >=
+ __KMP_TASKQ_THUNKS_PER_TH) {
+ /* do nothing if no thunks available or this thread can't */
+ /* run any because it already is executing too many */
+ pt = NULL;
+ } else if (queue->tq_nfull > 1) {
+ /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */
+
+ pt = __kmp_dequeue_task(global_tid, queue, TRUE);
+ } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
+ // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE
+ pt = __kmp_dequeue_task(global_tid, queue, TRUE);
+ } else if (queue->tq_flags & TQF_IS_LAST_TASK) {
+ /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */
+ /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
+ /* instrumentation does copy-out. */
+ pt = __kmp_dequeue_task(global_tid, queue, TRUE);
+ pt->th_flags |=
+ TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */
+ }
+ }
+
+ /* GEH - What happens here if is lastprivate, but not last task? */
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+ }
- pt = __kmp_dequeue_task (global_tid, queue, TRUE);
- }
- else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
- /* one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE */
-
- pt = __kmp_dequeue_task (global_tid, queue, TRUE);
- }
- else if (queue->tq_flags & TQF_IS_LAST_TASK) {
- /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */
- /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
- /* instrumentation does copy-out. */
-
- pt = __kmp_dequeue_task (global_tid, queue, TRUE);
- pt->th_flags |= TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */
- }
- }
-
- /* GEH - What happens here if is lastprivate, but not last task? */
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
- }
-
- return pt;
+ return pt;
}
-/*
- * Walk a tree of queues starting at queue's first child
- * and return a non-NULL thunk if one can be scheduled.
- * Must only be called when in_parallel=TRUE
- */
+/* Walk a tree of queues starting at queue's first child and return a non-NULL
+ thunk if one can be scheduled. Must only be called when in_parallel=TRUE */
static kmpc_thunk_t *
-__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
-{
- kmpc_thunk_t *pt = NULL;
- kmpc_task_queue_t *queue = curr_queue;
-
- if (curr_queue->tq_first_child != NULL) {
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
- if (queue == NULL) {
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
- return NULL;
- }
-
- while (queue != NULL) {
- int ct;
- kmpc_task_queue_t *next;
-
- ct= ++(queue->tq_ref_count);
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
- __LINE__, global_tid, queue, ct));
-
- pt = __kmp_find_task_in_queue (global_tid, queue);
+__kmp_find_task_in_descendant_queue(kmp_int32 global_tid,
+ kmpc_task_queue_t *curr_queue) {
+ kmpc_thunk_t *pt = NULL;
+ kmpc_task_queue_t *queue = curr_queue;
+
+ if (curr_queue->tq_first_child != NULL) {
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- if (pt != NULL) {
- int ct;
+ queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+ if (queue == NULL) {
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+ return NULL;
+ }
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+ while (queue != NULL) {
+ int ct;
+ kmpc_task_queue_t *next;
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ ct = ++(queue->tq_ref_count);
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
- ct = --(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
+ pt = __kmp_find_task_in_queue(global_tid, queue);
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+ if (pt != NULL) {
+ int ct;
- return pt;
- }
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- /* although reference count stays active during descendant walk, shouldn't matter */
- /* since if children still exist, reference counts aren't being monitored anyway */
+ ct = --(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+ global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
- pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
- if (pt != NULL) {
- int ct;
+ return pt;
+ }
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+ /* although reference count stays active during descendant walk, shouldn't
+ matter since if children still exist, reference counts aren't being
+ monitored anyway */
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ pt = __kmp_find_task_in_descendant_queue(global_tid, queue);
- ct = --(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( ct >= 0 );
+ if (pt != NULL) {
+ int ct;
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- return pt;
- }
+ ct = --(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+ global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(ct >= 0);
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ return pt;
+ }
- next = queue->tq_next_child;
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- ct = --(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( ct >= 0 );
+ next = queue->tq_next_child;
- queue = next;
- }
+ ct = --(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(ct >= 0);
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+ queue = next;
}
- return pt;
-}
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+ }
-/*
- * Walk up the taskq tree looking for a task to execute.
- * If we get to the root, search the tree for a descendent queue task.
- * Must only be called when in_parallel=TRUE
- */
+ return pt;
+}
+/* Walk up the taskq tree looking for a task to execute. If we get to the root,
+ search the tree for a descendent queue task. Must only be called when
+ in_parallel=TRUE */
static kmpc_thunk_t *
-__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
-{
- kmpc_task_queue_t *queue;
- kmpc_thunk_t *pt;
-
- pt = NULL;
-
- if (curr_queue->tq.tq_parent != NULL) {
- queue = curr_queue->tq.tq_parent;
-
- while (queue != NULL) {
- if (queue->tq.tq_parent != NULL) {
- int ct;
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- ct = ++(queue->tq_ref_count);
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
- __LINE__, global_tid, queue, ct));
- }
+__kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid,
+ kmpc_task_queue_t *curr_queue) {
+ kmpc_task_queue_t *queue;
+ kmpc_thunk_t *pt;
- pt = __kmp_find_task_in_queue (global_tid, queue);
- if (pt != NULL) {
- if (queue->tq.tq_parent != NULL) {
- int ct;
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
-
- ct = --(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( ct >= 0 );
+ pt = NULL;
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- }
+ if (curr_queue->tq.tq_parent != NULL) {
+ queue = curr_queue->tq.tq_parent;
- return pt;
- }
+ while (queue != NULL) {
+ if (queue->tq.tq_parent != NULL) {
+ int ct;
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- if (queue->tq.tq_parent != NULL) {
- int ct;
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- ct = --(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( ct >= 0 );
- }
- queue = queue->tq.tq_parent;
+ ct = ++(queue->tq_ref_count);
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__,
+ global_tid, queue, ct));
+ }
+
+ pt = __kmp_find_task_in_queue(global_tid, queue);
+ if (pt != NULL) {
+ if (queue->tq.tq_parent != NULL) {
+ int ct;
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+
+ ct = --(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+ global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(ct >= 0);
- if (queue != NULL)
- __kmp_release_lock(& queue->tq_link_lck, global_tid);
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
}
+ return pt;
+ }
+
+ if (queue->tq.tq_parent != NULL) {
+ int ct;
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+
+ ct = --(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+ global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(ct >= 0);
+ }
+ queue = queue->tq.tq_parent;
+
+ if (queue != NULL)
+ __kmp_release_lock(&queue->tq_link_lck, global_tid);
}
+ }
- pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
+ pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root);
- return pt;
+ return pt;
}
-static int
-__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
-{
- int i;
+static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) {
+ int i;
- /* KMP_MB(); *//* is this really necessary? */
+ /* KMP_MB(); */ /* is this really necessary? */
- for (i=0; i<queue->tq_nproc; i++) {
- if (queue->tq_th_thunks[i].ai_data != 0)
- return FALSE;
- }
+ for (i = 0; i < queue->tq_nproc; i++) {
+ if (queue->tq_th_thunks[i].ai_data != 0)
+ return FALSE;
+ }
- return TRUE;
+ return TRUE;
}
-static int
-__kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
-{
- return (queue->tq_first_child != NULL);
+static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) {
+ return (queue->tq_first_child != NULL);
}
-static void
-__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel )
-{
+static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
+ kmpc_task_queue_t *queue,
+ int in_parallel) {
#ifdef KMP_DEBUG
- kmp_int32 i;
- kmpc_thunk_t *thunk;
+ kmp_int32 i;
+ kmpc_thunk_t *thunk;
#endif
- KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
- KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
-
- /* sub-queue in a recursion, not the root task queue */
- KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
-
- if (in_parallel) {
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
- }
-
- KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
+ KF_TRACE(50,
+ ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
+ KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid));
+
+ /* sub-queue in a recursion, not the root task queue */
+ KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL);
+
+ if (in_parallel) {
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+ }
- /* unlink queue from its siblings if any at this level */
- if (queue->tq_prev_child != NULL)
- queue->tq_prev_child->tq_next_child = queue->tq_next_child;
- if (queue->tq_next_child != NULL)
- queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
- if (queue->tq.tq_parent->tq_first_child == queue)
- queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
+ KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
- queue->tq_prev_child = NULL;
- queue->tq_next_child = NULL;
+ /* unlink queue from its siblings if any at this level */
+ if (queue->tq_prev_child != NULL)
+ queue->tq_prev_child->tq_next_child = queue->tq_next_child;
+ if (queue->tq_next_child != NULL)
+ queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
+ if (queue->tq.tq_parent->tq_first_child == queue)
+ queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
- if (in_parallel) {
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
- __LINE__, global_tid, queue, queue->tq_ref_count));
+ queue->tq_prev_child = NULL;
+ queue->tq_next_child = NULL;
- /* wait until all other threads have stopped accessing this queue */
- while (queue->tq_ref_count > 1) {
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+ if (in_parallel) {
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
+ __LINE__, global_tid, queue, queue->tq_ref_count));
- KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
+ /* wait until all other threads have stopped accessing this queue */
+ while (queue->tq_ref_count > 1) {
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+ KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
+ NULL);
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
- }
-
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
}
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n",
- __LINE__, global_tid, queue));
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ }
-#ifdef KMP_DEBUG
- KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
- KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue));
- for (i=0; i<queue->tq_nproc; i++) {
- KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
- }
+#ifdef KMP_DEBUG
+ KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
+ KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
- i = 0;
- for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
- ++i;
+ for (i = 0; i < queue->tq_nproc; i++) {
+ KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
+ }
+
+ i = 0;
+ for (thunk = queue->tq_free_thunks; thunk != NULL;
+ thunk = thunk->th.th_next_free)
+ ++i;
- KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
+ KMP_ASSERT(i ==
+ queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
#endif
- /* release storage for queue entry */
- __kmp_free_taskq ( tq, queue, TRUE, global_tid );
+ /* release storage for queue entry */
+ __kmp_free_taskq(tq, queue, TRUE, global_tid);
- KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
- KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+ KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
+ KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
}
-/*
- * Starting from indicated queue, proceed downward through tree and
- * remove all taskqs which are finished, but only go down to taskqs
- * which have the "nowait" clause present. Assume this is only called
- * when in_parallel=TRUE.
- */
-
-static void
-__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
-{
- kmpc_task_queue_t *queue = curr_queue;
-
- if (curr_queue->tq_first_child != NULL) {
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
- if (queue != NULL) {
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
- return;
- }
-
- while (queue != NULL) {
- kmpc_task_queue_t *next;
- int ct = ++(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
- __LINE__, global_tid, queue, ct));
-
-
- /* although reference count stays active during descendant walk, */
- /* shouldn't matter since if children still exist, reference */
- /* counts aren't being monitored anyway */
-
- if (queue->tq_flags & TQF_IS_NOWAIT) {
- __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
-
- if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
- __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
-
- /*
- Only remove this if we have not already marked it for deallocation.
- This should prevent multiple threads from trying to free this.
- */
-
- if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
- if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
- queue->tq_flags |= TQF_DEALLOCATED;
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
-
- __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
-
- /* Can't do any more here since can't be sure where sibling queue is so just exit this level */
- return;
- }
- else {
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
- }
- }
- /* otherwise, just fall through and decrement reference count */
- }
- }
+/* Starting from indicated queue, proceed downward through tree and remove all
+ taskqs which are finished, but only go down to taskqs which have the "nowait"
+ clause present. Assume this is only called when in_parallel=TRUE. */
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- next = queue->tq_next_child;
-
- ct = --(queue->tq_ref_count);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( ct >= 0 );
-
- queue = next;
- }
+static void __kmp_find_and_remove_finished_child_taskq(
+ kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) {
+ kmpc_task_queue_t *queue = curr_queue;
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
- }
-}
-
-/*
- * Starting from indicated queue, proceed downward through tree and
- * remove all taskq's assuming all are finished and
- * assuming NO other threads are executing at this point.
- */
-
-static void
-__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
-{
- kmpc_task_queue_t *next_child;
-
- queue = (kmpc_task_queue_t *) queue->tq_first_child;
-
- while (queue != NULL) {
- __kmp_remove_all_child_taskq ( tq, global_tid, queue );
+ if (curr_queue->tq_first_child != NULL) {
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- next_child = queue->tq_next_child;
- queue->tq_flags |= TQF_DEALLOCATED;
- __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
- queue = next_child;
+ queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+ if (queue != NULL) {
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+ return;
}
-}
-static void
-__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel )
-{
- kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
- kmp_int32 tid = __kmp_tid_from_gtid( global_tid );
-
- KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
- KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
- KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
- KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
-
- /*
- * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows:
- *
- * happens exactly once:
- * 1) __kmpc_taskq : push (if returning thunk only)
- * 4) __kmpc_end_taskq_task : pop
- *
- * optionally happens *each* time taskq task is dequeued/enqueued:
- * 2) __kmpc_taskq_task : pop
- * 3) __kmp_execute_task_from_queue : push
- *
- * execution ordering: 1,(2,3)*,4
- */
-
- if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
- kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
- thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
-
- if ( __kmp_env_consistency_check ) {
- __kmp_push_workshare( global_tid,
- (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
- queue->tq_loc );
- }
- }
- else {
- if ( __kmp_env_consistency_check )
- __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
- }
+ while (queue != NULL) {
+ kmpc_task_queue_t *next;
+ int ct = ++(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
+
+ /* although reference count stays active during descendant walk, */
+ /* shouldn't matter since if children still exist, reference */
+ /* counts aren't being monitored anyway */
+
+ if (queue->tq_flags & TQF_IS_NOWAIT) {
+ __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
+
+ if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) &&
+ (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) &&
+ !__kmp_taskq_has_any_children(queue)) {
+
+ /* Only remove this if we have not already marked it for deallocation.
+ This should prevent multiple threads from trying to free this. */
+
+ if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) {
+ if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+ queue->tq_flags |= TQF_DEALLOCATED;
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+
+ __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
+
+ /* Can't do any more here since can't be sure where sibling queue
+ * is so just exit this level */
+ return;
+ } else {
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+ }
+ }
+ /* otherwise, just fall through and decrement reference count */
+ }
+ }
+
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+
+ next = queue->tq_next_child;
+
+ ct = --(queue->tq_ref_count);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(ct >= 0);
+
+ queue = next;
+ }
+
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+ }
+}
+
+/* Starting from indicated queue, proceed downward through tree and remove all
+ taskq's assuming all are finished and assuming NO other threads are executing
+ at this point. */
+static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid,
+ kmpc_task_queue_t *queue) {
+ kmpc_task_queue_t *next_child;
+
+ queue = (kmpc_task_queue_t *)queue->tq_first_child;
+
+ while (queue != NULL) {
+ __kmp_remove_all_child_taskq(tq, global_tid, queue);
+
+ next_child = queue->tq_next_child;
+ queue->tq_flags |= TQF_DEALLOCATED;
+ __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE);
+ queue = next_child;
+ }
+}
+
+static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc,
+ kmp_int32 global_tid,
+ kmpc_thunk_t *thunk,
+ int in_parallel) {
+ kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
+ kmp_int32 tid = __kmp_tid_from_gtid(global_tid);
+
+ KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
+ KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
+ KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
+ KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
+
+ /* For the taskq task, the curr_thunk pushes and pop pairs are set up as
+ * follows:
+ *
+ * happens exactly once:
+ * 1) __kmpc_taskq : push (if returning thunk only)
+ * 4) __kmpc_end_taskq_task : pop
+ *
+ * optionally happens *each* time taskq task is dequeued/enqueued:
+ * 2) __kmpc_taskq_task : pop
+ * 3) __kmp_execute_task_from_queue : push
+ *
+ * execution ordering: 1,(2,3)*,4
+ */
+
+ if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
+ kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
+ thunk->th.th_shareds =
+ (kmpc_shared_vars_t *)queue->tq_shareds[index].ai_data;
+
+ if (__kmp_env_consistency_check) {
+ __kmp_push_workshare(global_tid,
+ (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
+ : ct_task,
+ queue->tq_loc);
+ }
+ } else {
+ if (__kmp_env_consistency_check)
+ __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc);
+ }
+
+ if (in_parallel) {
+ thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
+ tq->tq_curr_thunk[tid] = thunk;
+
+ KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
+ }
+
+ KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk,
+ queue, global_tid));
+ thunk->th_task(global_tid, thunk);
+ KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,
+ global_tid));
+
+ if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
+ if (__kmp_env_consistency_check)
+ __kmp_pop_workshare(global_tid,
+ (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
+ : ct_task,
+ queue->tq_loc);
if (in_parallel) {
- thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
- tq->tq_curr_thunk[tid] = thunk;
-
- KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+ tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+ thunk->th_encl_thunk = NULL;
+ KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
}
- KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
- thunk->th_task (global_tid, thunk);
- KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
-
- if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
- if ( __kmp_env_consistency_check )
- __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
- queue->tq_loc );
-
- if (in_parallel) {
- tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
- thunk->th_encl_thunk = NULL;
- KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
- }
-
- if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
- __kmp_taskq_check_ordered(global_tid, thunk);
- }
+ if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
+ __kmp_taskq_check_ordered(global_tid, thunk);
+ }
- __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
+ __kmp_free_thunk(queue, thunk, in_parallel, global_tid);
- KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
- KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+ KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n",
+ global_tid, thunk));
+ KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
- if (in_parallel) {
- KMP_MB(); /* needed so thunk put on free list before outstanding thunk count is decremented */
+ if (in_parallel) {
+ KMP_MB(); /* needed so thunk put on free list before outstanding thunk
+ count is decremented */
- KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
+ KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
- KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
- global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
+ KF_TRACE(
+ 200,
+ ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
+ global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue));
- queue->tq_th_thunks[tid].ai_data--;
+ queue->tq_th_thunks[tid].ai_data--;
- /* KMP_MB(); */ /* is MB really necessary ? */
- }
+ /* KMP_MB(); */ /* is MB really necessary ? */
+ }
- if (queue->tq.tq_parent != NULL && in_parallel) {
- int ct;
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- ct = --(queue->tq_ref_count);
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
- __LINE__, global_tid, queue, ct));
- KMP_DEBUG_ASSERT( ct >= 0 );
- }
+ if (queue->tq.tq_parent != NULL && in_parallel) {
+ int ct;
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ ct = --(queue->tq_ref_count);
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ KMP_DEBUG_REF_CTS(
+ ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
+ KMP_DEBUG_ASSERT(ct >= 0);
}
+ }
}
-/* --------------------------------------------------------------------------- */
-
/* starts a taskq; creates and returns a thunk for the taskq_task */
/* also, returns pointer to shared vars for this thread in "shareds" arg */
+kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid,
+ kmpc_task_t taskq_task, size_t sizeof_thunk,
+ size_t sizeof_shareds, kmp_int32 flags,
+ kmpc_shared_vars_t **shareds) {
+ int in_parallel;
+ kmp_int32 nslots, nthunks, nshareds, nproc;
+ kmpc_task_queue_t *new_queue, *curr_queue;
+ kmpc_thunk_t *new_taskq_thunk;
+ kmp_info_t *th;
+ kmp_team_t *team;
+ kmp_taskq_t *tq;
+ kmp_int32 tid;
+
+ KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid));
+
+ th = __kmp_threads[global_tid];
+ team = th->th.th_team;
+ tq = &team->t.t_taskq;
+ nproc = team->t.t_nproc;
+ tid = __kmp_tid_from_gtid(global_tid);
-kmpc_thunk_t *
-__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
- size_t sizeof_thunk, size_t sizeof_shareds,
- kmp_int32 flags, kmpc_shared_vars_t **shareds )
-{
- int in_parallel;
- kmp_int32 nslots, nthunks, nshareds, nproc;
- kmpc_task_queue_t *new_queue, *curr_queue;
- kmpc_thunk_t *new_taskq_thunk;
- kmp_info_t *th;
- kmp_team_t *team;
- kmp_taskq_t *tq;
- kmp_int32 tid;
-
- KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid));
-
- th = __kmp_threads[ global_tid ];
- team = th -> th.th_team;
- tq = & team -> t.t_taskq;
- nproc = team -> t.t_nproc;
- tid = __kmp_tid_from_gtid( global_tid );
-
- /* find out whether this is a parallel taskq or serialized one. */
- in_parallel = in_parallel_context( team );
-
- if( ! tq->tq_root ) {
- if (in_parallel) {
- /* Vector ORDERED SECTION to taskq version */
- th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
-
- /* Vector ORDERED SECTION to taskq version */
- th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
- }
-
- if (in_parallel) {
- /* This shouldn't be a barrier region boundary, it will confuse the user. */
- /* Need the boundary to be at the end taskq instead. */
- if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
- /* Creating the active root queue, and we are not the master thread. */
- /* The master thread below created the queue and tasks have been */
- /* enqueued, and the master thread released this barrier. This */
- /* worker thread can now proceed and execute tasks. See also the */
- /* TQF_RELEASE_WORKERS which is used to handle this case. */
-
- *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
-
- KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
-
- return NULL;
- }
- }
-
- /* master thread only executes this code */
-
- if( tq->tq_curr_thunk_capacity < nproc ) {
- if(tq->tq_curr_thunk)
- __kmp_free(tq->tq_curr_thunk);
- else {
- /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */
- __kmp_init_lock( & tq->tq_freelist_lck );
- }
-
- tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) );
- tq -> tq_curr_thunk_capacity = nproc;
- }
-
- if (in_parallel)
- tq->tq_global_flags = TQF_RELEASE_WORKERS;
- }
-
- /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
- /* on some heuristics (e.g., depth of queue nesting?). */
-
- nslots = (in_parallel) ? (2 * nproc) : 1;
-
- /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
- /* jobs being executed by other threads, and one extra for taskq slot */
-
- nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
-
- /* Only the root taskq gets a per-thread array of shareds. */
- /* The rest of the taskq's only get one copy of the shared vars. */
-
- nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
-
- /* create overall queue data structure and its components that require allocation */
-
- new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
- sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
-
- /* rest of new_queue initializations */
-
- new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
+ /* find out whether this is a parallel taskq or serialized one. */
+ in_parallel = in_parallel_context(team);
+ if (!tq->tq_root) {
if (in_parallel) {
- new_queue->tq_tasknum_queuing = 0;
- new_queue->tq_tasknum_serving = 0;
- new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
- }
+ /* Vector ORDERED SECTION to taskq version */
+ th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
- new_queue->tq_taskq_slot = NULL;
- new_queue->tq_nslots = nslots;
- new_queue->tq_hiwat = HIGH_WATER_MARK (nslots);
- new_queue->tq_nfull = 0;
- new_queue->tq_head = 0;
- new_queue->tq_tail = 0;
- new_queue->tq_loc = loc;
-
- if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
- /* prepare to serve the first-queued task's ORDERED directive */
- new_queue->tq_tasknum_serving = 1;
-
- /* Vector ORDERED SECTION to taskq version */
- th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
-
- /* Vector ORDERED SECTION to taskq version */
- th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
+ /* Vector ORDERED SECTION to taskq version */
+ th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
}
- /* create a new thunk for the taskq_task in the new_queue */
- *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
-
- new_taskq_thunk->th.th_shareds = *shareds;
- new_taskq_thunk->th_task = taskq_task;
- new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
- new_taskq_thunk->th_status = 0;
-
- KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
-
- /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */
-
- /* insert the new task queue into the tree, but only after all fields initialized */
-
if (in_parallel) {
- if( ! tq->tq_root ) {
- new_queue->tq.tq_parent = NULL;
- new_queue->tq_first_child = NULL;
- new_queue->tq_next_child = NULL;
- new_queue->tq_prev_child = NULL;
- new_queue->tq_ref_count = 1;
- tq->tq_root = new_queue;
- }
- else {
- curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
- new_queue->tq.tq_parent = curr_queue;
- new_queue->tq_first_child = NULL;
- new_queue->tq_prev_child = NULL;
- new_queue->tq_ref_count = 1; /* for this the thread that built the queue */
-
- KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n",
- __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
-
- __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
- new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child;
-
- if (curr_queue->tq_first_child != NULL)
- curr_queue->tq_first_child->tq_prev_child = new_queue;
-
- curr_queue->tq_first_child = new_queue;
-
- __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
- }
-
- /* set up thunk stack only after code that determines curr_queue above */
- new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
- tq->tq_curr_thunk[tid] = new_taskq_thunk;
-
- KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
- }
- else {
- new_taskq_thunk->th_encl_thunk = 0;
- new_queue->tq.tq_parent = NULL;
- new_queue->tq_first_child = NULL;
- new_queue->tq_next_child = NULL;
- new_queue->tq_prev_child = NULL;
- new_queue->tq_ref_count = 1;
+ // This shouldn't be a barrier region boundary, it will confuse the user.
+ /* Need the boundary to be at the end taskq instead. */
+ if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
+ /* Creating the active root queue, and we are not the master thread. */
+ /* The master thread below created the queue and tasks have been */
+ /* enqueued, and the master thread released this barrier. This */
+ /* worker thread can now proceed and execute tasks. See also the */
+ /* TQF_RELEASE_WORKERS which is used to handle this case. */
+ *shareds = (kmpc_shared_vars_t *)tq->tq_root->tq_shareds[tid].ai_data;
+
+ KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
+
+ return NULL;
+ }
+ }
+
+ /* master thread only executes this code */
+ if (tq->tq_curr_thunk_capacity < nproc) {
+ if (tq->tq_curr_thunk)
+ __kmp_free(tq->tq_curr_thunk);
+ else {
+ /* only need to do this once at outer level, i.e. when tq_curr_thunk is
+ * still NULL */
+ __kmp_init_lock(&tq->tq_freelist_lck);
+ }
+
+ tq->tq_curr_thunk =
+ (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *));
+ tq->tq_curr_thunk_capacity = nproc;
}
-#ifdef KMP_DEBUG
- KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
- KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
+ if (in_parallel)
+ tq->tq_global_flags = TQF_RELEASE_WORKERS;
+ }
- if (in_parallel) {
- KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
+ /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
+ /* on some heuristics (e.g., depth of queue nesting?). */
+ nslots = (in_parallel) ? (2 * nproc) : 1;
+
+ /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
+ /* jobs being executed by other threads, and one extra for taskq slot */
+ nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1)
+ : nslots + 2;
+
+ /* Only the root taskq gets a per-thread array of shareds. */
+ /* The rest of the taskq's only get one copy of the shared vars. */
+ nshareds = (!tq->tq_root && in_parallel) ? nproc : 1;
+
+ /* create overall queue data structure and its components that require
+ * allocation */
+ new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds,
+ nproc, sizeof_thunk, sizeof_shareds,
+ &new_taskq_thunk, global_tid);
+
+ /* rest of new_queue initializations */
+ new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
+
+ if (in_parallel) {
+ new_queue->tq_tasknum_queuing = 0;
+ new_queue->tq_tasknum_serving = 0;
+ new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
+ }
+
+ new_queue->tq_taskq_slot = NULL;
+ new_queue->tq_nslots = nslots;
+ new_queue->tq_hiwat = HIGH_WATER_MARK(nslots);
+ new_queue->tq_nfull = 0;
+ new_queue->tq_head = 0;
+ new_queue->tq_tail = 0;
+ new_queue->tq_loc = loc;
+
+ if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
+ /* prepare to serve the first-queued task's ORDERED directive */
+ new_queue->tq_tasknum_serving = 1;
+
+ /* Vector ORDERED SECTION to taskq version */
+ th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
+
+ /* Vector ORDERED SECTION to taskq version */
+ th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
+ }
+
+ /* create a new thunk for the taskq_task in the new_queue */
+ *shareds = (kmpc_shared_vars_t *)new_queue->tq_shareds[0].ai_data;
+
+ new_taskq_thunk->th.th_shareds = *shareds;
+ new_taskq_thunk->th_task = taskq_task;
+ new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
+ new_taskq_thunk->th_status = 0;
+
+ KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
+
+ // Make sure these inits complete before threads start using this queue
+ /* KMP_MB(); */ // (necessary?)
+
+ /* insert the new task queue into the tree, but only after all fields
+ * initialized */
+
+ if (in_parallel) {
+ if (!tq->tq_root) {
+ new_queue->tq.tq_parent = NULL;
+ new_queue->tq_first_child = NULL;
+ new_queue->tq_next_child = NULL;
+ new_queue->tq_prev_child = NULL;
+ new_queue->tq_ref_count = 1;
+ tq->tq_root = new_queue;
} else {
- KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
- }
+ curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
+ new_queue->tq.tq_parent = curr_queue;
+ new_queue->tq_first_child = NULL;
+ new_queue->tq_prev_child = NULL;
+ new_queue->tq_ref_count =
+ 1; /* for this the thread that built the queue */
+
+ KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__,
+ global_tid, new_queue, new_queue->tq_ref_count));
+
+ __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
+
+ new_queue->tq_next_child =
+ (struct kmpc_task_queue_t *)curr_queue->tq_first_child;
+
+ if (curr_queue->tq_first_child != NULL)
+ curr_queue->tq_first_child->tq_prev_child = new_queue;
+
+ curr_queue->tq_first_child = new_queue;
+
+ __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+ }
+
+ /* set up thunk stack only after code that determines curr_queue above */
+ new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
+ tq->tq_curr_thunk[tid] = new_taskq_thunk;
+
+ KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
+ } else {
+ new_taskq_thunk->th_encl_thunk = 0;
+ new_queue->tq.tq_parent = NULL;
+ new_queue->tq_first_child = NULL;
+ new_queue->tq_next_child = NULL;
+ new_queue->tq_prev_child = NULL;
+ new_queue->tq_ref_count = 1;
+ }
- KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
+#ifdef KMP_DEBUG
+ KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
+ KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid));
- if (in_parallel) {
- KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
- }
+ if (in_parallel) {
+ KF_TRACE(25,
+ ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
+ } else {
+ KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue,
+ global_tid));
+ }
+
+ KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid));
+
+ if (in_parallel) {
+ KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
+ }
#endif /* KMP_DEBUG */
- if ( __kmp_env_consistency_check )
- __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
+ if (__kmp_env_consistency_check)
+ __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc);
- KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
- return new_taskq_thunk;
+ return new_taskq_thunk;
}
-
/* ends a taskq; last thread out destroys the queue */
-void
-__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
-{
+void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *taskq_thunk) {
#ifdef KMP_DEBUG
- kmp_int32 i;
+ kmp_int32 i;
#endif
- kmp_taskq_t *tq;
- int in_parallel;
- kmp_info_t *th;
- kmp_int32 is_outermost;
- kmpc_task_queue_t *queue;
- kmpc_thunk_t *thunk;
- int nproc;
-
- KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid));
-
- tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
- nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
-
- /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */
- queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
-
- KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
- is_outermost = (queue == tq->tq_root);
- in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+ kmp_taskq_t *tq;
+ int in_parallel;
+ kmp_info_t *th;
+ kmp_int32 is_outermost;
+ kmpc_task_queue_t *queue;
+ kmpc_thunk_t *thunk;
+ int nproc;
+
+ KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid));
+
+ tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+ nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
+
+ /* For the outermost taskq only, all but one thread will have taskq_thunk ==
+ * NULL */
+ queue = (taskq_thunk == NULL) ? tq->tq_root
+ : taskq_thunk->th.th_shareds->sv_queue;
+
+ KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
+ is_outermost = (queue == tq->tq_root);
+ in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+
+ if (in_parallel) {
+ kmp_uint32 spins;
+
+ /* this is just a safeguard to release the waiting threads if */
+ /* the outermost taskq never queues a task */
+
+ if (is_outermost && (KMP_MASTER_GTID(global_tid))) {
+ if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
+ /* no lock needed, workers are still in spin mode */
+ tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
+
+ __kmp_end_split_barrier(bs_plain_barrier, global_tid);
+ }
+ }
+
+ /* keep dequeueing work until all tasks are queued and dequeued */
+
+ do {
+ /* wait until something is available to dequeue */
+ KMP_INIT_YIELD(spins);
+
+ while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
+ (!__kmp_taskq_has_any_children(queue)) &&
+ (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
+ KMP_YIELD_WHEN(TRUE, spins);
+ }
+
+ /* check to see if we can execute tasks in the queue */
+ while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) &&
+ (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) {
+ KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk,
+ queue, global_tid));
+ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+ }
+
+ /* see if work found can be found in a descendant queue */
+ if ((__kmp_taskq_has_any_children(queue)) &&
+ (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
+ NULL) {
+
+ KF_TRACE(50,
+ ("Stole thunk: %p in descendant queue: %p while waiting in "
+ "queue: %p (%d)\n",
+ thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+
+ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+ }
+
+ } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) ||
+ (queue->tq_nfull != 0));
+
+ KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue,
+ global_tid));
+
+ /* wait while all tasks are not finished and more work found
+ in descendant queues */
+
+ while ((!__kmp_taskq_tasks_finished(queue)) &&
+ (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
+ NULL) {
- if (in_parallel) {
- kmp_uint32 spins;
-
- /* this is just a safeguard to release the waiting threads if */
- /* the outermost taskq never queues a task */
-
- if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
- if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
- /* no lock needed, workers are still in spin mode */
- tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
-
- __kmp_end_split_barrier( bs_plain_barrier, global_tid );
- }
- }
-
- /* keep dequeueing work until all tasks are queued and dequeued */
-
- do {
- /* wait until something is available to dequeue */
- KMP_INIT_YIELD(spins);
-
- while ( (queue->tq_nfull == 0)
- && (queue->tq_taskq_slot == NULL)
- && (! __kmp_taskq_has_any_children(queue) )
- && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
- ) {
- KMP_YIELD_WHEN( TRUE, spins );
- }
-
- /* check to see if we can execute tasks in the queue */
- while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
- && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
- ) {
- KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
- __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
- }
-
- /* see if work found can be found in a descendant queue */
- if ( (__kmp_taskq_has_any_children(queue))
- && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
- ) {
+ KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in "
+ "queue: %p (%d)\n",
+ thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
- KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
- thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
+ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+ }
- __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
- }
+ KF_TRACE(50, ("No work found in descendent queues or all work finished in "
+ "queue: %p (%d)\n",
+ queue, global_tid));
- } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
- || (queue->tq_nfull != 0)
- );
+ if (!is_outermost) {
+ /* need to return if NOWAIT present and not outermost taskq */
- KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
+ if (queue->tq_flags & TQF_IS_NOWAIT) {
+ __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+ queue->tq_ref_count--;
+ KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
+ __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
- /* wait while all tasks are not finished and more work found
- in descendant queues */
+ KE_TRACE(
+ 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
- while ( (!__kmp_taskq_tasks_finished(queue))
- && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
- ) {
+ return;
+ }
- KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
- thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+ __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
- __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+ /* WAIT until all tasks are finished and no child queues exist before
+ * proceeding */
+ KMP_INIT_YIELD(spins);
+
+ while (!__kmp_taskq_tasks_finished(queue) ||
+ __kmp_taskq_has_any_children(queue)) {
+ thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue);
+
+ if (thunk != NULL) {
+ KF_TRACE(50,
+ ("Stole thunk: %p in ancestor queue: %p while waiting in "
+ "queue: %p (%d)\n",
+ thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk,
+ in_parallel);
}
- KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
+ KMP_YIELD_WHEN(thunk == NULL, spins);
- if (!is_outermost) {
- /* need to return if NOWAIT present and not outermost taskq */
+ __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
+ }
- if (queue->tq_flags & TQF_IS_NOWAIT) {
- __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
- queue->tq_ref_count--;
- KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
- __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
- KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
-
- return;
- }
-
- __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
-
- /* WAIT until all tasks are finished and no child queues exist before proceeding */
- KMP_INIT_YIELD(spins);
-
- while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
- thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
+ __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
+ if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+ queue->tq_flags |= TQF_DEALLOCATED;
+ }
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
- if (thunk != NULL) {
- KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
- thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
- __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
- }
+ /* only the allocating thread can deallocate the queue */
+ if (taskq_thunk != NULL) {
+ __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
+ }
- KMP_YIELD_WHEN( thunk == NULL, spins );
+ KE_TRACE(
+ 10,
+ ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",
+ global_tid));
- __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
- }
-
- __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
- if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
- queue->tq_flags |= TQF_DEALLOCATED;
- }
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+ return;
+ }
- /* only the allocating thread can deallocate the queue */
- if (taskq_thunk != NULL) {
- __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
- }
+ // Outermost Queue: steal work from descendants until all tasks are finished
- KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
+ KMP_INIT_YIELD(spins);
- return;
- }
+ while (!__kmp_taskq_tasks_finished(queue)) {
+ thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
- /* Outermost Queue: steal work from descendants until all tasks are finished */
+ if (thunk != NULL) {
+ KF_TRACE(50,
+ ("Stole thunk: %p in descendant queue: %p while waiting in "
+ "queue: %p (%d)\n",
+ thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
- KMP_INIT_YIELD(spins);
+ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+ }
- while (!__kmp_taskq_tasks_finished(queue)) {
- thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
+ KMP_YIELD_WHEN(thunk == NULL, spins);
+ }
- if (thunk != NULL) {
- KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
- thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+ /* Need this barrier to prevent destruction of queue before threads have all
+ * executed above code */
+ /* This may need to be done earlier when NOWAIT is implemented for the
+ * outermost level */
- __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
- }
+ if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
+ /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */
+ /* for right now, everybody waits, and the master thread destroys the */
+ /* remaining queues. */
- KMP_YIELD_WHEN( thunk == NULL, spins );
- }
-
- /* Need this barrier to prevent destruction of queue before threads have all executed above code */
- /* This may need to be done earlier when NOWAIT is implemented for the outermost level */
+ __kmp_remove_all_child_taskq(tq, global_tid, queue);
- if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
- /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */
- /* for right now, everybody waits, and the master thread destroys the */
- /* remaining queues. */
-
- __kmp_remove_all_child_taskq( tq, global_tid, queue );
-
- /* Now destroy the root queue */
- KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
- KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+ /* Now destroy the root queue */
+ KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n",
+ global_tid, queue));
+ KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
#ifdef KMP_DEBUG
- /* the root queue entry */
- KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
-
- /* children must all be gone by now because of barrier above */
- KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
-
- for (i=0; i<nproc; i++) {
- KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
- }
+ /* the root queue entry */
+ KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&
+ (queue->tq_next_child == NULL));
- for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
+ /* children must all be gone by now because of barrier above */
+ KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
- KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
+ for (i = 0; i < nproc; i++) {
+ KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
+ }
- for (i = 0; i < nproc; i++) {
- KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
- }
+ for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL;
+ i++, thunk = thunk->th.th_next_free)
+ ;
+
+ KMP_DEBUG_ASSERT(i ==
+ queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
+
+ for (i = 0; i < nproc; i++) {
+ KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]);
+ }
#endif
- /* unlink the root queue entry */
- tq -> tq_root = NULL;
+ /* unlink the root queue entry */
+ tq->tq_root = NULL;
- /* release storage for root queue entry */
- KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
+ /* release storage for root queue entry */
+ KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue,
+ global_tid));
- queue->tq_flags |= TQF_DEALLOCATED;
- __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
+ queue->tq_flags |= TQF_DEALLOCATED;
+ __kmp_free_taskq(tq, queue, in_parallel, global_tid);
- KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+ KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
- /* release the workers now that the data structures are up to date */
- __kmp_end_split_barrier( bs_plain_barrier, global_tid );
- }
+ /* release the workers now that the data structures are up to date */
+ __kmp_end_split_barrier(bs_plain_barrier, global_tid);
+ }
- th = __kmp_threads[ global_tid ];
+ th = __kmp_threads[global_tid];
- /* Reset ORDERED SECTION to parallel version */
- th->th.th_dispatch->th_deo_fcn = 0;
+ /* Reset ORDERED SECTION to parallel version */
+ th->th.th_dispatch->th_deo_fcn = 0;
- /* Reset ORDERED SECTION to parallel version */
- th->th.th_dispatch->th_dxo_fcn = 0;
- }
- else {
- /* in serial execution context, dequeue the last task */
- /* and execute it, if there were any tasks encountered */
+ /* Reset ORDERED SECTION to parallel version */
+ th->th.th_dispatch->th_dxo_fcn = 0;
+ } else {
+ /* in serial execution context, dequeue the last task */
+ /* and execute it, if there were any tasks encountered */
- if (queue->tq_nfull > 0) {
- KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
+ if (queue->tq_nfull > 0) {
+ KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
- thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
+ thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
- if (queue->tq_flags & TQF_IS_LAST_TASK) {
- /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
- /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
- /* instrumentation does copy-out. */
+ if (queue->tq_flags & TQF_IS_LAST_TASK) {
+ /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
+ /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
+ /* instrumentation does copy-out. */
- /* no need for test_then_or call since already locked */
- thunk->th_flags |= TQF_IS_LAST_TASK;
- }
+ /* no need for test_then_or call since already locked */
+ thunk->th_flags |= TQF_IS_LAST_TASK;
+ }
- KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
+ KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
+ thunk, queue));
- __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
- }
+ __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+ }
- /* destroy the unattached serial queue now that there is no more work to do */
- KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
- KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+ // destroy the unattached serial queue now that there is no more work to do
+ KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n",
+ queue, global_tid));
+ KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
#ifdef KMP_DEBUG
- i = 0;
- for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
- ++i;
- KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
+ i = 0;
+ for (thunk = queue->tq_free_thunks; thunk != NULL;
+ thunk = thunk->th.th_next_free)
+ ++i;
+ KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1);
#endif
- /* release storage for unattached serial queue */
- KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
-
- queue->tq_flags |= TQF_DEALLOCATED;
- __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
- }
+ /* release storage for unattached serial queue */
+ KF_TRACE(50,
+ ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
+
+ queue->tq_flags |= TQF_DEALLOCATED;
+ __kmp_free_taskq(tq, queue, in_parallel, global_tid);
+ }
- KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid));
}
/* Enqueues a task for thunk previously created by __kmpc_task_buffer. */
/* Returns nonzero if just filled up queue */
-kmp_int32
-__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
-{
- kmp_int32 ret;
- kmpc_task_queue_t *queue;
- int in_parallel;
- kmp_taskq_t *tq;
+kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) {
+ kmp_int32 ret;
+ kmpc_task_queue_t *queue;
+ int in_parallel;
+ kmp_taskq_t *tq;
- KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid));
- KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */
+ KMP_DEBUG_ASSERT(!(thunk->th_flags &
+ TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */
- tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
- queue = thunk->th.th_shareds->sv_queue;
- in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+ tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+ queue = thunk->th.th_shareds->sv_queue;
+ in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
- if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
- thunk->th_tasknum = ++queue->tq_tasknum_queuing;
+ if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
+ thunk->th_tasknum = ++queue->tq_tasknum_queuing;
- /* For serial execution dequeue the preceding task and execute it, if one exists */
- /* This cannot be the last task. That one is handled in __kmpc_end_taskq */
+ /* For serial execution dequeue the preceding task and execute it, if one
+ * exists */
+ /* This cannot be the last task. That one is handled in __kmpc_end_taskq */
- if (!in_parallel && queue->tq_nfull > 0) {
- kmpc_thunk_t *prev_thunk;
+ if (!in_parallel && queue->tq_nfull > 0) {
+ kmpc_thunk_t *prev_thunk;
- KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
+ KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
- prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
+ prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
- KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
+ KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
+ prev_thunk, queue));
- __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
- }
+ __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel);
+ }
- /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */
- /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */
- /* task queue is not full and allocates a thunk (which is then passed to */
- /* __kmpc_task()). So, the enqueue below should never fail due to a full queue. */
+ /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private
+ variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the
+ task queue is not full and allocates a thunk (which is then passed to
+ __kmpc_task()). So, the enqueue below should never fail due to a full
+ queue. */
- KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
- KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+ KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
+ KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
- ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
+ ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel);
- KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
- KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+ KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
+ KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
- KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid));
- return ret;
+ return ret;
}
/* enqueues a taskq_task for thunk previously created by __kmpc_taskq */
/* this should never be called unless in a parallel context */
-void
-__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
-{
- kmpc_task_queue_t *queue;
- kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
- int tid = __kmp_tid_from_gtid( global_tid );
+void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
+ kmp_int32 status) {
+ kmpc_task_queue_t *queue;
+ kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+ int tid = __kmp_tid_from_gtid(global_tid);
- KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid));
- KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
- KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+ KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid));
+ KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
+ KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
- queue = thunk->th.th_shareds->sv_queue;
+ queue = thunk->th.th_shareds->sv_queue;
- if ( __kmp_env_consistency_check )
- __kmp_pop_workshare( global_tid, ct_taskq, loc );
+ if (__kmp_env_consistency_check)
+ __kmp_pop_workshare(global_tid, ct_taskq, loc);
- /* thunk->th_task is the taskq_task */
- KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
+ /* thunk->th_task is the taskq_task */
+ KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK);
- /* not supposed to call __kmpc_taskq_task if it's already enqueued */
- KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
+ /* not supposed to call __kmpc_taskq_task if it's already enqueued */
+ KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL);
- /* dequeue taskq thunk from curr_thunk stack */
- tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
- thunk->th_encl_thunk = NULL;
+ /* dequeue taskq thunk from curr_thunk stack */
+ tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+ thunk->th_encl_thunk = NULL;
- KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+ KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
- thunk->th_status = status;
+ thunk->th_status = status;
- KMP_MB(); /* flush thunk->th_status before taskq_task enqueued to avoid race condition */
+ // Flush thunk->th_status before taskq_task enqueued to avoid race condition
+ KMP_MB();
- /* enqueue taskq_task in thunk into special slot in queue */
- /* GEH - probably don't need to lock taskq slot since only one */
- /* thread enqueues & already a lock set at dequeue point */
+ /* enqueue taskq_task in thunk into special slot in queue */
+ /* GEH - probably don't need to lock taskq slot since only one */
+ /* thread enqueues & already a lock set at dequeue point */
- queue->tq_taskq_slot = thunk;
+ queue->tq_taskq_slot = thunk;
- KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid));
}
-/* ends a taskq_task; done generating tasks */
+/* ends a taskq_task; done generating tasks */
-void
-__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
-{
- kmp_taskq_t *tq;
- kmpc_task_queue_t *queue;
- int in_parallel;
- int tid;
+void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *thunk) {
+ kmp_taskq_t *tq;
+ kmpc_task_queue_t *queue;
+ int in_parallel;
+ int tid;
- KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
- tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
- queue = thunk->th.th_shareds->sv_queue;
- in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
- tid = __kmp_tid_from_gtid( global_tid );
+ tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+ queue = thunk->th.th_shareds->sv_queue;
+ in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+ tid = __kmp_tid_from_gtid(global_tid);
- if ( __kmp_env_consistency_check )
- __kmp_pop_workshare( global_tid, ct_taskq, loc );
+ if (__kmp_env_consistency_check)
+ __kmp_pop_workshare(global_tid, ct_taskq, loc);
- if (in_parallel) {
-#if KMP_ARCH_X86 || \
- KMP_ARCH_X86_64
+ if (in_parallel) {
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
+ KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_ALL_TASKS_QUEUED);
#else
- {
- __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+ {
+ __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+ // Make sure data structures are in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
-
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
- }
-#endif
+ queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
}
+#endif
+ }
- if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
- /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */
- /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task */
- /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. When */
- /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so */
- /* we know the last one in the queue is the lastprivate task. Mark the queue */
- /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that */
- /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */
- /* bit signals the instrumented code to do copy-outs after execution). */
-
- if (! in_parallel) {
- /* No synchronization needed for serial context */
- queue->tq_flags |= TQF_IS_LAST_TASK;
- }
- else {
-#if KMP_ARCH_X86 || \
- KMP_ARCH_X86_64
+ if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
+ /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in
+ the queue if TQF_IS_LASTPRIVATE so we can positively identify that last
+ task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.
+ When __kmpc_end_taskq_task() is called we are done generating all the
+ tasks, so we know the last one in the queue is the lastprivate task.
+ Mark the queue as having gotten to this state via tq_flags &
+ TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags &
+ TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do
+ copy-outs after execution). */
+ if (!in_parallel) {
+ /* No synchronization needed for serial context */
+ queue->tq_flags |= TQF_IS_LAST_TASK;
+ } else {
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
+ KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_IS_LAST_TASK);
#else
- {
- __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-
- KMP_MB(); /* make sure data structures are in consistent state before querying them */
- /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
+ {
+ __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
- queue->tq_flags |= TQF_IS_LAST_TASK;
+ // Make sure data structures in consistent state before querying them
+ // Seems to work without this for digital/alpha, needed for IBM/RS6000
+ KMP_MB();
- __kmp_release_lock(& queue->tq_queue_lck, global_tid);
- }
+ queue->tq_flags |= TQF_IS_LAST_TASK;
+ __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+ }
#endif
- /* to prevent race condition where last task is dequeued but */
- /* flag isn't visible yet (not sure about this) */
- KMP_MB();
- }
+ /* to prevent race condition where last task is dequeued but */
+ /* flag isn't visible yet (not sure about this) */
+ KMP_MB();
}
+ }
- /* dequeue taskq thunk from curr_thunk stack */
- if (in_parallel) {
- tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
- thunk->th_encl_thunk = NULL;
+ /* dequeue taskq thunk from curr_thunk stack */
+ if (in_parallel) {
+ tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+ thunk->th_encl_thunk = NULL;
- KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
- }
+ KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
+ }
- KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
}
/* returns thunk for a regular task based on taskq_thunk */
/* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */
-kmpc_thunk_t *
-__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
-{
- kmp_taskq_t *tq;
- kmpc_task_queue_t *queue;
- kmpc_thunk_t *new_thunk;
- int in_parallel;
+kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid,
+ kmpc_thunk_t *taskq_thunk, kmpc_task_t task) {
+ kmp_taskq_t *tq;
+ kmpc_task_queue_t *queue;
+ kmpc_thunk_t *new_thunk;
+ int in_parallel;
- KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid));
- KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */
+ KMP_DEBUG_ASSERT(
+ taskq_thunk->th_flags &
+ TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */
- tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
- queue = taskq_thunk->th.th_shareds->sv_queue;
- in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+ tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+ queue = taskq_thunk->th.th_shareds->sv_queue;
+ in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
- /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */
- /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */
- /* task queue is not full and allocates a thunk (which is then passed to */
- /* __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be */
- /* the next to be enqueued in __kmpc_task(). */
+ /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private
+ variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the
+ task queue is not full and allocates a thunk (which is then passed to
+ __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be
+ the next to be enqueued in __kmpc_task(). */
- new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
- new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
- new_thunk->th_encl_thunk = NULL;
- new_thunk->th_task = task;
+ new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid);
+ new_thunk->th.th_shareds = (kmpc_shared_vars_t *)queue->tq_shareds[0].ai_data;
+ new_thunk->th_encl_thunk = NULL;
+ new_thunk->th_task = task;
- /* GEH - shouldn't need to lock the read of tq_flags here */
- new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
+ /* GEH - shouldn't need to lock the read of tq_flags here */
+ new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
- new_thunk->th_status = 0;
+ new_thunk->th_status = 0;
- KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
+ KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK));
- KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
- KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
+ KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
+ KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid));
- KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid));
+ KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid));
- return new_thunk;
+ return new_thunk;
}
-
-/* --------------------------------------------------------------------------- */
Modified: openmp/trunk/runtime/src/kmp_threadprivate.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_threadprivate.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_threadprivate.cpp (original)
+++ openmp/trunk/runtime/src/kmp_threadprivate.cpp Fri May 12 13:01:32 2017
@@ -14,502 +14,476 @@
#include "kmp.h"
-#include "kmp_itt.h"
#include "kmp_i18n.h"
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
+#include "kmp_itt.h"
#define USE_CHECKS_COMMON
-#define KMP_INLINE_SUBR 1
-
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-void
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
-struct private_common *
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+#define KMP_INLINE_SUBR 1
-struct shared_table __kmp_threadprivate_d_table;
+void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
+ void *data_addr, size_t pc_size);
+struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
+ void *data_addr,
+ size_t pc_size);
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
+struct shared_table __kmp_threadprivate_d_table;
static
#ifdef KMP_INLINE_SUBR
-__forceinline
+ __forceinline
#endif
-struct private_common *
-__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr )
+ struct private_common *
+ __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
+ void *pc_addr)
{
- struct private_common *tn;
+ struct private_common *tn;
#ifdef KMP_TASK_COMMON_DEBUG
- KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n",
- gtid, pc_addr ) );
- dump_list();
+ KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
+ "address %p\n",
+ gtid, pc_addr));
+ dump_list();
#endif
- for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) {
- if (tn->gbl_addr == pc_addr) {
+ for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
+ if (tn->gbl_addr == pc_addr) {
#ifdef KMP_TASK_COMMON_DEBUG
- KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n",
- gtid, pc_addr ) );
+ KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
+ "node %p on list\n",
+ gtid, pc_addr));
#endif
- return tn;
- }
+ return tn;
}
- return 0;
+ }
+ return 0;
}
static
#ifdef KMP_INLINE_SUBR
-__forceinline
+ __forceinline
#endif
-struct shared_common *
-__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr )
-{
- struct shared_common *tn;
+ struct shared_common *
+ __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
+ void *pc_addr) {
+ struct shared_common *tn;
- for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) {
- if (tn->gbl_addr == pc_addr) {
+ for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
+ if (tn->gbl_addr == pc_addr) {
#ifdef KMP_TASK_COMMON_DEBUG
- KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
- gtid, pc_addr ) );
+ KC_TRACE(
+ 10,
+ ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
+ gtid, pc_addr));
#endif
- return tn;
- }
- }
- return 0;
-}
-
-
-/*
- * Create a template for the data initialized storage.
- * Either the template is NULL indicating zero fill,
- * or the template is a copy of the original data.
- */
-
-static struct private_data *
-__kmp_init_common_data( void *pc_addr, size_t pc_size )
-{
- struct private_data *d;
- size_t i;
- char *p;
-
- d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) );
-/*
- d->data = 0; // AC: commented out because __kmp_allocate zeroes the memory
- d->next = 0;
-*/
- d->size = pc_size;
- d->more = 1;
-
- p = (char*)pc_addr;
-
- for (i = pc_size; i > 0; --i) {
- if (*p++ != '\0') {
- d->data = __kmp_allocate( pc_size );
- KMP_MEMCPY( d->data, pc_addr, pc_size );
- break;
- }
+ return tn;
}
-
- return d;
+ }
+ return 0;
}
-/*
- * Initialize the data area from the template.
- */
-
-static void
-__kmp_copy_common_data( void *pc_addr, struct private_data *d )
-{
- char *addr = (char *) pc_addr;
- int i, offset;
-
- for (offset = 0; d != 0; d = d->next) {
- for (i = d->more; i > 0; --i) {
- if (d->data == 0)
- memset( & addr[ offset ], '\0', d->size );
- else
- KMP_MEMCPY( & addr[ offset ], d->data, d->size );
- offset += d->size;
- }
+// Create a template for the data initialized storage. Either the template is
+// NULL indicating zero fill, or the template is a copy of the original data.
+static struct private_data *__kmp_init_common_data(void *pc_addr,
+ size_t pc_size) {
+ struct private_data *d;
+ size_t i;
+ char *p;
+
+ d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
+ /*
+ d->data = 0; // AC: commented out because __kmp_allocate zeroes the
+ memory
+ d->next = 0;
+ */
+ d->size = pc_size;
+ d->more = 1;
+
+ p = (char *)pc_addr;
+
+ for (i = pc_size; i > 0; --i) {
+ if (*p++ != '\0') {
+ d->data = __kmp_allocate(pc_size);
+ KMP_MEMCPY(d->data, pc_addr, pc_size);
+ break;
+ }
+ }
+
+ return d;
+}
+
+// Initialize the data area from the template.
+static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
+ char *addr = (char *)pc_addr;
+ int i, offset;
+
+ for (offset = 0; d != 0; d = d->next) {
+ for (i = d->more; i > 0; --i) {
+ if (d->data == 0)
+ memset(&addr[offset], '\0', d->size);
+ else
+ KMP_MEMCPY(&addr[offset], d->data, d->size);
+ offset += d->size;
}
+ }
}
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
/* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
-void
-__kmp_common_initialize( void )
-{
- if( ! TCR_4(__kmp_init_common) ) {
- int q;
+void __kmp_common_initialize(void) {
+ if (!TCR_4(__kmp_init_common)) {
+ int q;
#ifdef KMP_DEBUG
- int gtid;
+ int gtid;
#endif
- __kmp_threadpriv_cache_list = NULL;
+ __kmp_threadpriv_cache_list = NULL;
#ifdef KMP_DEBUG
- /* verify the uber masters were initialized */
- for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ )
- if( __kmp_root[gtid] ) {
- KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread );
- for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q)
- KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] );
-/* __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/
- }
+ /* verify the uber masters were initialized */
+ for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
+ if (__kmp_root[gtid]) {
+ KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
+ for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
+ KMP_DEBUG_ASSERT(
+ !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
+ /* __kmp_root[ gitd ]-> r.r_uber_thread ->
+ * th.th_pri_common -> data[ q ] = 0;*/
+ }
#endif /* KMP_DEBUG */
- for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
- __kmp_threadprivate_d_table.data[ q ] = 0;
+ for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
+ __kmp_threadprivate_d_table.data[q] = 0;
- TCW_4(__kmp_init_common, TRUE);
- }
+ TCW_4(__kmp_init_common, TRUE);
+ }
}
/* Call all destructors for threadprivate data belonging to all threads.
Currently unused! */
-void
-__kmp_common_destroy( void )
-{
- if( TCR_4(__kmp_init_common) ) {
- int q;
-
- TCW_4(__kmp_init_common, FALSE);
-
- for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
- int gtid;
- struct private_common *tn;
- struct shared_common *d_tn;
-
- /* C++ destructors need to be called once per thread before exiting */
- /* don't call destructors for master thread though unless we used copy constructor */
-
- for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) {
- if (d_tn->is_vec) {
- if (d_tn->dt.dtorv != 0) {
- for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
- if( __kmp_threads[gtid] ) {
- if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
- (! KMP_UBER_GTID (gtid)) ) {
- tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common,
- gtid, d_tn->gbl_addr );
- if (tn) {
- (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len);
- }
- }
- }
- }
- if (d_tn->obj_init != 0) {
- (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len);
- }
- }
- } else {
- if (d_tn->dt.dtor != 0) {
- for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
- if( __kmp_threads[gtid] ) {
- if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
- (! KMP_UBER_GTID (gtid)) ) {
- tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common,
- gtid, d_tn->gbl_addr );
- if (tn) {
- (*d_tn->dt.dtor) (tn->par_addr);
- }
- }
- }
- }
- if (d_tn->obj_init != 0) {
- (*d_tn->dt.dtor) (d_tn->obj_init);
- }
- }
+void __kmp_common_destroy(void) {
+ if (TCR_4(__kmp_init_common)) {
+ int q;
+
+ TCW_4(__kmp_init_common, FALSE);
+
+ for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
+ int gtid;
+ struct private_common *tn;
+ struct shared_common *d_tn;
+
+ /* C++ destructors need to be called once per thread before exiting.
+ Don't call destructors for master thread though unless we used copy
+ constructor */
+
+ for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
+ d_tn = d_tn->next) {
+ if (d_tn->is_vec) {
+ if (d_tn->dt.dtorv != 0) {
+ for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
+ if (__kmp_threads[gtid]) {
+ if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
+ : (!KMP_UBER_GTID(gtid))) {
+ tn = __kmp_threadprivate_find_task_common(
+ __kmp_threads[gtid]->th.th_pri_common, gtid,
+ d_tn->gbl_addr);
+ if (tn) {
+ (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
+ }
+ }
+ }
+ }
+ if (d_tn->obj_init != 0) {
+ (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
+ }
+ }
+ } else {
+ if (d_tn->dt.dtor != 0) {
+ for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
+ if (__kmp_threads[gtid]) {
+ if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
+ : (!KMP_UBER_GTID(gtid))) {
+ tn = __kmp_threadprivate_find_task_common(
+ __kmp_threads[gtid]->th.th_pri_common, gtid,
+ d_tn->gbl_addr);
+ if (tn) {
+ (*d_tn->dt.dtor)(tn->par_addr);
+ }
}
+ }
}
- __kmp_threadprivate_d_table.data[ q ] = 0;
+ if (d_tn->obj_init != 0) {
+ (*d_tn->dt.dtor)(d_tn->obj_init);
+ }
+ }
}
+ }
+ __kmp_threadprivate_d_table.data[q] = 0;
}
+ }
}
/* Call all destructors for threadprivate data belonging to this thread */
-void
-__kmp_common_destroy_gtid( int gtid )
-{
- struct private_common *tn;
- struct shared_common *d_tn;
-
- KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) );
- if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
- (! KMP_UBER_GTID (gtid)) ) {
-
- if( TCR_4(__kmp_init_common) ) {
-
- /* Cannot do this here since not all threads have destroyed their data */
- /* TCW_4(__kmp_init_common, FALSE); */
-
- for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) {
-
- d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
- gtid, tn->gbl_addr );
-
- KMP_DEBUG_ASSERT( d_tn );
-
- if (d_tn->is_vec) {
- if (d_tn->dt.dtorv != 0) {
- (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len);
- }
- if (d_tn->obj_init != 0) {
- (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len);
- }
- } else {
- if (d_tn->dt.dtor != 0) {
- (void) (*d_tn->dt.dtor) (tn->par_addr);
- }
- if (d_tn->obj_init != 0) {
- (void) (*d_tn->dt.dtor) (d_tn->obj_init);
- }
- }
- }
- KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n",
- gtid ) );
- }
+void __kmp_common_destroy_gtid(int gtid) {
+ struct private_common *tn;
+ struct shared_common *d_tn;
+
+ KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
+ if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
+
+ if (TCR_4(__kmp_init_common)) {
+
+ /* Cannot do this here since not all threads have destroyed their data */
+ /* TCW_4(__kmp_init_common, FALSE); */
+
+ for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
+
+ d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
+ tn->gbl_addr);
+
+ KMP_DEBUG_ASSERT(d_tn);
+
+ if (d_tn->is_vec) {
+ if (d_tn->dt.dtorv != 0) {
+ (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
+ }
+ if (d_tn->obj_init != 0) {
+ (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
+ }
+ } else {
+ if (d_tn->dt.dtor != 0) {
+ (void)(*d_tn->dt.dtor)(tn->par_addr);
+ }
+ if (d_tn->obj_init != 0) {
+ (void)(*d_tn->dt.dtor)(d_tn->obj_init);
+ }
+ }
+ }
+ KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
+ "complete\n",
+ gtid));
}
+ }
}
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
#ifdef KMP_TASK_COMMON_DEBUG
-static void
-dump_list( void )
-{
- int p, q;
+static void dump_list(void) {
+ int p, q;
- for (p = 0; p < __kmp_all_nth; ++p) {
- if( !__kmp_threads[p] ) continue;
- for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
- if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) {
- struct private_common *tn;
-
- KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) );
-
- for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next) {
- KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
- tn->gbl_addr, tn->par_addr ) );
- }
- }
+ for (p = 0; p < __kmp_all_nth; ++p) {
+ if (!__kmp_threads[p])
+ continue;
+ for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
+ if (__kmp_threads[p]->th.th_pri_common->data[q]) {
+ struct private_common *tn;
+
+ KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
+
+ for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
+ tn = tn->next) {
+ KC_TRACE(10,
+ ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
+ tn->gbl_addr, tn->par_addr));
}
+ }
}
+ }
}
#endif /* KMP_TASK_COMMON_DEBUG */
+// NOTE: this routine is to be called only from the serial part of the program.
+void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
+ void *data_addr, size_t pc_size) {
+ struct shared_common **lnk_tn, *d_tn;
+ KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
+ __kmp_threads[gtid]->th.th_root->r.r_active == 0);
-/*
- * NOTE: this routine is to be called only from the serial part of the program.
- */
+ d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
+ pc_addr);
-void
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size )
-{
- struct shared_common **lnk_tn, *d_tn;
- KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] &&
- __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 );
-
- d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
- gtid, pc_addr );
-
- if (d_tn == 0) {
- d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
-
- d_tn->gbl_addr = pc_addr;
- d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size );
-/*
- d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory
- d_tn->ct.ctor = 0;
- d_tn->cct.cctor = 0;;
- d_tn->dt.dtor = 0;
- d_tn->is_vec = FALSE;
- d_tn->vec_len = 0L;
-*/
- d_tn->cmn_size = pc_size;
-
- __kmp_acquire_lock( &__kmp_global_lock, gtid );
-
- lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]);
-
- d_tn->next = *lnk_tn;
- *lnk_tn = d_tn;
-
- __kmp_release_lock( &__kmp_global_lock, gtid );
- }
-}
+ if (d_tn == 0) {
+ d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
-struct private_common *
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size )
-{
- struct private_common *tn, **tt;
- struct shared_common *d_tn;
-
- /* +++++++++ START OF CRITICAL SECTION +++++++++ */
+ d_tn->gbl_addr = pc_addr;
+ d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
+ /*
+ d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
+ zeroes the memory
+ d_tn->ct.ctor = 0;
+ d_tn->cct.cctor = 0;;
+ d_tn->dt.dtor = 0;
+ d_tn->is_vec = FALSE;
+ d_tn->vec_len = 0L;
+ */
+ d_tn->cmn_size = pc_size;
- __kmp_acquire_lock( & __kmp_global_lock, gtid );
+ __kmp_acquire_lock(&__kmp_global_lock, gtid);
- tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) );
+ lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
- tn->gbl_addr = pc_addr;
+ d_tn->next = *lnk_tn;
+ *lnk_tn = d_tn;
- d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
- gtid, pc_addr ); /* Only the MASTER data table exists. */
+ __kmp_release_lock(&__kmp_global_lock, gtid);
+ }
+}
- if (d_tn != 0) {
- /* This threadprivate variable has already been seen. */
+struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
+ void *data_addr,
+ size_t pc_size) {
+ struct private_common *tn, **tt;
+ struct shared_common *d_tn;
- if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) {
- d_tn->cmn_size = pc_size;
+ /* +++++++++ START OF CRITICAL SECTION +++++++++ */
+ __kmp_acquire_lock(&__kmp_global_lock, gtid);
- if (d_tn->is_vec) {
- if (d_tn->ct.ctorv != 0) {
- /* Construct from scratch so no prototype exists */
- d_tn->obj_init = 0;
- }
- else if (d_tn->cct.cctorv != 0) {
- /* Now data initialize the prototype since it was previously registered */
- d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size );
- (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len);
- }
- else {
- d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size );
- }
- } else {
- if (d_tn->ct.ctor != 0) {
- /* Construct from scratch so no prototype exists */
- d_tn->obj_init = 0;
- }
- else if (d_tn->cct.cctor != 0) {
- /* Now data initialize the prototype since it was previously registered */
- d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size );
- (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr);
- }
- else {
- d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size );
- }
- }
- }
- }
- else {
- struct shared_common **lnk_tn;
+ tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
- d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
- d_tn->gbl_addr = pc_addr;
- d_tn->cmn_size = pc_size;
- d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size );
-/*
- d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory
- d_tn->ct.ctor = 0;
- d_tn->cct.cctor = 0;
- d_tn->dt.dtor = 0;
- d_tn->is_vec = FALSE;
- d_tn->vec_len = 0L;
-*/
- lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]);
+ tn->gbl_addr = pc_addr;
- d_tn->next = *lnk_tn;
- *lnk_tn = d_tn;
- }
+ d_tn = __kmp_find_shared_task_common(
+ &__kmp_threadprivate_d_table, gtid,
+ pc_addr); /* Only the MASTER data table exists. */
- tn->cmn_size = d_tn->cmn_size;
+ if (d_tn != 0) {
+ /* This threadprivate variable has already been seen. */
- if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) {
- tn->par_addr = (void *) pc_addr;
- }
- else {
- tn->par_addr = (void *) __kmp_allocate( tn->cmn_size );
- }
+ if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
+ d_tn->cmn_size = pc_size;
- __kmp_release_lock( & __kmp_global_lock, gtid );
+ if (d_tn->is_vec) {
+ if (d_tn->ct.ctorv != 0) {
+ /* Construct from scratch so no prototype exists */
+ d_tn->obj_init = 0;
+ } else if (d_tn->cct.cctorv != 0) {
+ /* Now data initialize the prototype since it was previously
+ * registered */
+ d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
+ (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
+ } else {
+ d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
+ }
+ } else {
+ if (d_tn->ct.ctor != 0) {
+ /* Construct from scratch so no prototype exists */
+ d_tn->obj_init = 0;
+ } else if (d_tn->cct.cctor != 0) {
+ /* Now data initialize the prototype since it was previously
+ registered */
+ d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
+ (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
+ } else {
+ d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
+ }
+ }
+ }
+ } else {
+ struct shared_common **lnk_tn;
+
+ d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
+ d_tn->gbl_addr = pc_addr;
+ d_tn->cmn_size = pc_size;
+ d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
+ /*
+ d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
+ zeroes the memory
+ d_tn->ct.ctor = 0;
+ d_tn->cct.cctor = 0;
+ d_tn->dt.dtor = 0;
+ d_tn->is_vec = FALSE;
+ d_tn->vec_len = 0L;
+ */
+ lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
+
+ d_tn->next = *lnk_tn;
+ *lnk_tn = d_tn;
+ }
+
+ tn->cmn_size = d_tn->cmn_size;
+
+ if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
+ tn->par_addr = (void *)pc_addr;
+ } else {
+ tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
+ }
- /* +++++++++ END OF CRITICAL SECTION +++++++++ */
+ __kmp_release_lock(&__kmp_global_lock, gtid);
+/* +++++++++ END OF CRITICAL SECTION +++++++++ */
#ifdef USE_CHECKS_COMMON
- if (pc_size > d_tn->cmn_size) {
- KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%"
- KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n",
- pc_addr, pc_size, d_tn->cmn_size ) );
- KMP_FATAL( TPCommonBlocksInconsist );
- }
+ if (pc_size > d_tn->cmn_size) {
+ KC_TRACE(
+ 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
+ " ,%" KMP_UINTPTR_SPEC ")\n",
+ pc_addr, pc_size, d_tn->cmn_size));
+ KMP_FATAL(TPCommonBlocksInconsist);
+ }
#endif /* USE_CHECKS_COMMON */
- tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]);
+ tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
#ifdef KMP_TASK_COMMON_DEBUG
- if (*tt != 0) {
- KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
- gtid, pc_addr ) );
- }
+ if (*tt != 0) {
+ KC_TRACE(
+ 10,
+ ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
+ gtid, pc_addr));
+ }
#endif
- tn->next = *tt;
- *tt = tn;
+ tn->next = *tt;
+ *tt = tn;
#ifdef KMP_TASK_COMMON_DEBUG
- KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
- gtid, pc_addr ) );
- dump_list( );
+ KC_TRACE(10,
+ ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
+ gtid, pc_addr));
+ dump_list();
#endif
- /* Link the node into a simple list */
+ /* Link the node into a simple list */
- tn->link = __kmp_threads[ gtid ]->th.th_pri_head;
- __kmp_threads[ gtid ]->th.th_pri_head = tn;
+ tn->link = __kmp_threads[gtid]->th.th_pri_head;
+ __kmp_threads[gtid]->th.th_pri_head = tn;
#ifdef BUILD_TV
- __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr );
+ __kmp_tv_threadprivate_store(__kmp_threads[gtid], tn->gbl_addr, tn->par_addr);
#endif
- if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) )
- return tn;
-
- /*
- * if C++ object with copy constructor, use it;
- * else if C++ object with constructor, use it for the non-master copies only;
- * else use pod_init and memcpy
- *
- * C++ constructors need to be called once for each non-master thread on allocate
- * C++ copy constructors need to be called once for each thread on allocate
- */
+ if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
+ return tn;
- /*
- * C++ object with constructors/destructors;
- * don't call constructors for master thread though
- */
- if (d_tn->is_vec) {
- if ( d_tn->ct.ctorv != 0) {
- (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len);
- } else if (d_tn->cct.cctorv != 0) {
- (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len);
- } else if (tn->par_addr != tn->gbl_addr) {
- __kmp_copy_common_data( tn->par_addr, d_tn->pod_init );
- }
- } else {
- if ( d_tn->ct.ctor != 0 ) {
- (void) (*d_tn->ct.ctor) (tn->par_addr);
- } else if (d_tn->cct.cctor != 0) {
- (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init);
- } else if (tn->par_addr != tn->gbl_addr) {
- __kmp_copy_common_data( tn->par_addr, d_tn->pod_init );
- }
- }
-/* !BUILD_OPENMP_C
- if (tn->par_addr != tn->gbl_addr)
- __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
+ /* if C++ object with copy constructor, use it;
+ * else if C++ object with constructor, use it for the non-master copies only;
+ * else use pod_init and memcpy
+ *
+ * C++ constructors need to be called once for each non-master thread on
+ * allocate
+ * C++ copy constructors need to be called once for each thread on allocate */
+
+ /* C++ object with constructors/destructors; don't call constructors for
+ master thread though */
+ if (d_tn->is_vec) {
+ if (d_tn->ct.ctorv != 0) {
+ (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
+ } else if (d_tn->cct.cctorv != 0) {
+ (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
+ } else if (tn->par_addr != tn->gbl_addr) {
+ __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
+ }
+ } else {
+ if (d_tn->ct.ctor != 0) {
+ (void)(*d_tn->ct.ctor)(tn->par_addr);
+ } else if (d_tn->cct.cctor != 0) {
+ (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
+ } else if (tn->par_addr != tn->gbl_addr) {
+ __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
+ }
+ }
+ /* !BUILD_OPENMP_C
+ if (tn->par_addr != tn->gbl_addr)
+ __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
- return tn;
+ return tn;
}
/* ------------------------------------------------------------------------ */
@@ -528,91 +502,95 @@ kmp_threadprivate_insert( int gtid, void
Register constructors and destructors for thread private data.
This function is called when executing in parallel, when we know the thread id.
*/
-void
-__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
-{
- struct shared_common *d_tn, **lnk_tn;
+void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
+ kmpc_cctor cctor, kmpc_dtor dtor) {
+ struct shared_common *d_tn, **lnk_tn;
- KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) );
+ KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
#ifdef USE_CHECKS_COMMON
- /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
- KMP_ASSERT( cctor == 0);
+ /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
+ KMP_ASSERT(cctor == 0);
#endif /* USE_CHECKS_COMMON */
- /* Only the global data table exists. */
- d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data );
-
- if (d_tn == 0) {
- d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
- d_tn->gbl_addr = data;
-
- d_tn->ct.ctor = ctor;
- d_tn->cct.cctor = cctor;
- d_tn->dt.dtor = dtor;
-/*
- d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate zeroes the memory
- d_tn->vec_len = 0L;
- d_tn->obj_init = 0;
- d_tn->pod_init = 0;
-*/
- lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]);
-
- d_tn->next = *lnk_tn;
- *lnk_tn = d_tn;
- }
-}
+ /* Only the global data table exists. */
+ d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
-void *
-__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size)
-{
- void *ret;
- struct private_common *tn;
+ if (d_tn == 0) {
+ d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
+ d_tn->gbl_addr = data;
+
+ d_tn->ct.ctor = ctor;
+ d_tn->cct.cctor = cctor;
+ d_tn->dt.dtor = dtor;
+ /*
+ d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate
+ zeroes the memory
+ d_tn->vec_len = 0L;
+ d_tn->obj_init = 0;
+ d_tn->pod_init = 0;
+ */
+ lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
+
+ d_tn->next = *lnk_tn;
+ *lnk_tn = d_tn;
+ }
+}
+
+void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
+ size_t size) {
+ void *ret;
+ struct private_common *tn;
- KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) );
+ KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
#ifdef USE_CHECKS_COMMON
- if (! __kmp_init_serial)
- KMP_FATAL( RTLNotInitialized );
+ if (!__kmp_init_serial)
+ KMP_FATAL(RTLNotInitialized);
#endif /* USE_CHECKS_COMMON */
- if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) {
- /* The parallel address will NEVER overlap with the data_address */
- /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */
-
- KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) );
- kmp_threadprivate_insert_private_data( global_tid, data, data, size );
-
- ret = data;
- }
- else {
- KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
- global_tid, data ) );
- tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data );
+ if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
+ /* The parallel address will NEVER overlap with the data_address */
+ /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
+ * data_address; use data_address = data */
+
+ KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
+ global_tid));
+ kmp_threadprivate_insert_private_data(global_tid, data, data, size);
+
+ ret = data;
+ } else {
+ KC_TRACE(
+ 50,
+ ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
+ global_tid, data));
+ tn = __kmp_threadprivate_find_task_common(
+ __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
- if ( tn ) {
- KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) );
+ if (tn) {
+ KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
#ifdef USE_CHECKS_COMMON
- if ((size_t) size > tn->cmn_size) {
- KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n",
- data, size, tn->cmn_size ) );
- KMP_FATAL( TPCommonBlocksInconsist );
- }
+ if ((size_t)size > tn->cmn_size) {
+ KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
+ " ,%" KMP_UINTPTR_SPEC ")\n",
+ data, size, tn->cmn_size));
+ KMP_FATAL(TPCommonBlocksInconsist);
+ }
#endif /* USE_CHECKS_COMMON */
- }
- else {
- /* The parallel address will NEVER overlap with the data_address */
- /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */
- KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) );
- tn = kmp_threadprivate_insert( global_tid, data, data, size );
- }
-
- ret = tn->par_addr;
+ } else {
+ /* The parallel address will NEVER overlap with the data_address */
+ /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
+ * data_address = data */
+ KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
+ tn = kmp_threadprivate_insert(global_tid, data, data, size);
}
- KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
- global_tid, ret ) );
- return ret;
+ ret = tn->par_addr;
+ }
+ KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
+ global_tid, ret));
+
+ return ret;
}
/*!
@@ -627,62 +605,63 @@ __kmpc_threadprivate(ident_t *loc, kmp_i
Allocate private storage for threadprivate data.
*/
void *
-__kmpc_threadprivate_cached(
- ident_t * loc,
- kmp_int32 global_tid, // gtid.
- void * data, // Pointer to original global variable.
- size_t size, // Size of original global variable.
- void *** cache
-) {
- KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %"
- KMP_SIZE_T_SPEC "\n",
- global_tid, *cache, data, size ) );
-
- if ( TCR_PTR(*cache) == 0) {
- __kmp_acquire_lock( & __kmp_global_lock, global_tid );
-
- if ( TCR_PTR(*cache) == 0) {
- __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
- __kmp_tp_cached = 1;
- __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
- void ** my_cache;
- KMP_ITT_IGNORE(
- my_cache = (void**)
- __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t ));
- );
- // No need to zero the allocated memory; __kmp_allocate does that.
- KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n",
- global_tid, my_cache ) );
-
- /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */
- /* Add address of mycache to linked list for cleanup later */
- kmp_cached_addr_t *tp_cache_addr;
-
- tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity];
- tp_cache_addr -> addr = my_cache;
- tp_cache_addr -> next = __kmp_threadpriv_cache_list;
- __kmp_threadpriv_cache_list = tp_cache_addr;
+__kmpc_threadprivate_cached(ident_t *loc,
+ kmp_int32 global_tid, // gtid.
+ void *data, // Pointer to original global variable.
+ size_t size, // Size of original global variable.
+ void ***cache) {
+ KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
+ "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
+ global_tid, *cache, data, size));
+
+ if (TCR_PTR(*cache) == 0) {
+ __kmp_acquire_lock(&__kmp_global_lock, global_tid);
+
+ if (TCR_PTR(*cache) == 0) {
+ __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
+ __kmp_tp_cached = 1;
+ __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
+ void **my_cache;
+ KMP_ITT_IGNORE(
+ my_cache = (void **)__kmp_allocate(
+ sizeof(void *) * __kmp_tp_capacity + sizeof(kmp_cached_addr_t)););
+ // No need to zero the allocated memory; __kmp_allocate does that.
+ KC_TRACE(
+ 50,
+ ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n",
+ global_tid, my_cache));
+
+ /* TODO: free all this memory in __kmp_common_destroy using
+ * __kmp_threadpriv_cache_list */
+ /* Add address of mycache to linked list for cleanup later */
+ kmp_cached_addr_t *tp_cache_addr;
+
+ tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
+ tp_cache_addr->addr = my_cache;
+ tp_cache_addr->next = __kmp_threadpriv_cache_list;
+ __kmp_threadpriv_cache_list = tp_cache_addr;
+
+ KMP_MB();
+
+ TCW_PTR(*cache, my_cache);
+
+ KMP_MB();
+ }
+
+ __kmp_release_lock(&__kmp_global_lock, global_tid);
+ }
+
+ void *ret;
+ if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
+ ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
+
+ TCW_PTR((*cache)[global_tid], ret);
+ }
+ KC_TRACE(10,
+ ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
+ global_tid, ret));
- KMP_MB();
-
- TCW_PTR( *cache, my_cache);
-
- KMP_MB();
- }
-
- __kmp_release_lock( & __kmp_global_lock, global_tid );
- }
-
- void *ret;
- if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) {
- ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size);
-
- TCW_PTR( (*cache)[ global_tid ], ret);
- }
- KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
- global_tid, ret ) );
-
- return ret;
+ return ret;
}
/*!
@@ -695,39 +674,40 @@ __kmpc_threadprivate_cached(
@param vector_length length of the vector (bytes or elements?)
Register vector constructors and destructors for thread private data.
*/
-void
-__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor,
- kmpc_cctor_vec cctor, kmpc_dtor_vec dtor,
- size_t vector_length )
-{
- struct shared_common *d_tn, **lnk_tn;
+void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
+ kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
+ kmpc_dtor_vec dtor,
+ size_t vector_length) {
+ struct shared_common *d_tn, **lnk_tn;
- KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) );
+ KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
#ifdef USE_CHECKS_COMMON
- /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
- KMP_ASSERT( cctor == 0);
+ /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
+ KMP_ASSERT(cctor == 0);
#endif /* USE_CHECKS_COMMON */
- d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
- -1, data ); /* Only the global data table exists. */
-
- if (d_tn == 0) {
- d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
- d_tn->gbl_addr = data;
-
- d_tn->ct.ctorv = ctor;
- d_tn->cct.cctorv = cctor;
- d_tn->dt.dtorv = dtor;
- d_tn->is_vec = TRUE;
- d_tn->vec_len = (size_t) vector_length;
-/*
- d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory
- d_tn->pod_init = 0;
-*/
- lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]);
-
- d_tn->next = *lnk_tn;
- *lnk_tn = d_tn;
- }
+ d_tn = __kmp_find_shared_task_common(
+ &__kmp_threadprivate_d_table, -1,
+ data); /* Only the global data table exists. */
+
+ if (d_tn == 0) {
+ d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
+ d_tn->gbl_addr = data;
+
+ d_tn->ct.ctorv = ctor;
+ d_tn->cct.cctorv = cctor;
+ d_tn->dt.dtorv = dtor;
+ d_tn->is_vec = TRUE;
+ d_tn->vec_len = (size_t)vector_length;
+ /*
+ d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
+ zeroes the memory
+ d_tn->pod_init = 0;
+ */
+ lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
+
+ d_tn->next = *lnk_tn;
+ *lnk_tn = d_tn;
+ }
}
Modified: openmp/trunk/runtime/src/kmp_utility.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_utility.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_utility.cpp (original)
+++ openmp/trunk/runtime/src/kmp_utility.cpp Fri May 12 13:01:32 2017
@@ -14,416 +14,396 @@
#include "kmp.h"
-#include "kmp_wrapper_getpid.h"
+#include "kmp_i18n.h"
#include "kmp_str.h"
+#include "kmp_wrapper_getpid.h"
#include <float.h>
-#include "kmp_i18n.h"
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
static const char *unknown = "unknown";
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */
-/* the debugging package has not been initialized yet, and only "0" will print */
-/* debugging output since the environment variables have not been read. */
+/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then
+ the debugging package has not been initialized yet, and only "0" will print
+ debugging output since the environment variables have not been read. */
#ifdef KMP_DEBUG
static int trace_level = 5;
#endif
-/*
- * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
+/* LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
* APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID
* PHY_ID = APIC_ID >> LOG_ID_BITS
*/
-int
-__kmp_get_physical_id( int log_per_phy, int apic_id )
-{
- int index_lsb, index_msb, temp;
-
- if (log_per_phy > 1) {
- index_lsb = 0;
- index_msb = 31;
-
- temp = log_per_phy;
- while ( (temp & 1) == 0 ) {
- temp >>= 1;
- index_lsb++;
- }
-
- temp = log_per_phy;
- while ( (temp & 0x80000000)==0 ) {
- temp <<= 1;
- index_msb--;
- }
+int __kmp_get_physical_id(int log_per_phy, int apic_id) {
+ int index_lsb, index_msb, temp;
- /* If >1 bits were set in log_per_phy, choose next higher power of 2 */
- if (index_lsb != index_msb) index_msb++;
+ if (log_per_phy > 1) {
+ index_lsb = 0;
+ index_msb = 31;
+
+ temp = log_per_phy;
+ while ((temp & 1) == 0) {
+ temp >>= 1;
+ index_lsb++;
+ }
- return ( (int) (apic_id >> index_msb) );
- }
+ temp = log_per_phy;
+ while ((temp & 0x80000000) == 0) {
+ temp <<= 1;
+ index_msb--;
+ }
- return apic_id;
-}
+ /* If >1 bits were set in log_per_phy, choose next higher power of 2 */
+ if (index_lsb != index_msb)
+ index_msb++;
+
+ return ((int)(apic_id >> index_msb));
+ }
+ return apic_id;
+}
/*
* LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
* APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID
* LOG_ID = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 )
*/
-int
-__kmp_get_logical_id( int log_per_phy, int apic_id )
-{
- unsigned current_bit;
- int bits_seen;
-
- if (log_per_phy <= 1) return ( 0 );
-
- bits_seen = 0;
-
- for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) {
- if ( log_per_phy & current_bit ) {
- log_per_phy &= ~current_bit;
- bits_seen++;
- }
- }
-
- /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */
- if (bits_seen == 1) {
- current_bit >>= 1;
- }
+int __kmp_get_logical_id(int log_per_phy, int apic_id) {
+ unsigned current_bit;
+ int bits_seen;
+
+ if (log_per_phy <= 1)
+ return (0);
+
+ bits_seen = 0;
+
+ for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) {
+ if (log_per_phy & current_bit) {
+ log_per_phy &= ~current_bit;
+ bits_seen++;
+ }
+ }
- return ( (int) ((current_bit - 1) & apic_id) );
-}
+ /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */
+ if (bits_seen == 1) {
+ current_bit >>= 1;
+ }
+ return ((int)((current_bit - 1) & apic_id));
+}
-static
-kmp_uint64
-__kmp_parse_frequency( // R: Frequency in Hz.
- char const * frequency // I: Float number and unit: MHz, GHz, or TGz.
-) {
-
- double value = 0.0;
- char const * unit = NULL;
- kmp_uint64 result = 0; /* Zero is a better unknown value than all ones. */
+static kmp_uint64 __kmp_parse_frequency( // R: Frequency in Hz.
+ char const *frequency // I: Float number and unit: MHz, GHz, or TGz.
+ ) {
+
+ double value = 0.0;
+ char const *unit = NULL;
+ kmp_uint64 result = 0; /* Zero is a better unknown value than all ones. */
- if ( frequency == NULL ) {
- return result;
- }; // if
- value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char const *".
- if ( 0 < value && value <= DBL_MAX ) { // Good value (not overflow, underflow, etc).
- if ( strcmp( unit, "MHz" ) == 0 ) {
- value = value * 1.0E+6;
- } else if ( strcmp( unit, "GHz" ) == 0 ) {
- value = value * 1.0E+9;
- } else if ( strcmp( unit, "THz" ) == 0 ) {
- value = value * 1.0E+12;
- } else { // Wrong unit.
- return result;
- }; // if
- result = value;
- }; // if
+ if (frequency == NULL) {
return result;
+ }; // if
+ value = strtod(frequency,
+ (char **)&unit); // strtod() does not like "char const *".
+ if (0 < value &&
+ value <= DBL_MAX) { // Good value (not overflow, underflow, etc).
+ if (strcmp(unit, "MHz") == 0) {
+ value = value * 1.0E+6;
+ } else if (strcmp(unit, "GHz") == 0) {
+ value = value * 1.0E+9;
+ } else if (strcmp(unit, "THz") == 0) {
+ value = value * 1.0E+12;
+ } else { // Wrong unit.
+ return result;
+ }; // if
+ result = value;
+ }; // if
+ return result;
}; // func __kmp_parse_cpu_frequency
-void
-__kmp_query_cpuid( kmp_cpuinfo_t *p )
-{
- struct kmp_cpuid buf;
- int max_arg;
- int log_per_phy;
+void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
+ struct kmp_cpuid buf;
+ int max_arg;
+ int log_per_phy;
#ifdef KMP_DEBUG
- int cflush_size;
+ int cflush_size;
#endif
- p->initialized = 1;
+ p->initialized = 1;
- p->sse2 = 1; // Assume SSE2 by default.
+ p->sse2 = 1; // Assume SSE2 by default.
- __kmp_x86_cpuid( 0, 0, &buf );
+ __kmp_x86_cpuid(0, 0, &buf);
- KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
- 0, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+ KA_TRACE(trace_level,
+ ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 0,
+ buf.eax, buf.ebx, buf.ecx, buf.edx));
- max_arg = buf.eax;
+ max_arg = buf.eax;
- p->apic_id = -1;
+ p->apic_id = -1;
- if (max_arg >= 1) {
- int i;
- kmp_uint32 t, data[ 4 ];
+ if (max_arg >= 1) {
+ int i;
+ kmp_uint32 t, data[4];
- __kmp_x86_cpuid( 1, 0, &buf );
- KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
- 1, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+ __kmp_x86_cpuid(1, 0, &buf);
+ KA_TRACE(trace_level,
+ ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+ 1, buf.eax, buf.ebx, buf.ecx, buf.edx));
- {
-#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask ) )
+ {
+#define get_value(reg, lo, mask) (((reg) >> (lo)) & (mask))
- p->signature = buf.eax;
- p->family = get_value( buf.eax, 20, 0xff ) + get_value( buf.eax, 8, 0x0f );
- p->model = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f );
- p->stepping = get_value( buf.eax, 0, 0x0f );
+ p->signature = buf.eax;
+ p->family = get_value(buf.eax, 20, 0xff) + get_value(buf.eax, 8, 0x0f);
+ p->model =
+ (get_value(buf.eax, 16, 0x0f) << 4) + get_value(buf.eax, 4, 0x0f);
+ p->stepping = get_value(buf.eax, 0, 0x0f);
#undef get_value
- KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) );
- }
+ KA_TRACE(trace_level, (" family = %d, model = %d, stepping = %d\n",
+ p->family, p->model, p->stepping));
+ }
- for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) {
- data[ i ] = (t & 0xff);
- }; // for
+ for (t = buf.ebx, i = 0; i < 4; t >>= 8, ++i) {
+ data[i] = (t & 0xff);
+ }; // for
- p->sse2 = ( buf.edx >> 26 ) & 1;
+ p->sse2 = (buf.edx >> 26) & 1;
#ifdef KMP_DEBUG
- if ( (buf.edx >> 4) & 1 ) {
- /* TSC - Timestamp Counter Available */
- KA_TRACE( trace_level, (" TSC" ) );
- }
- if ( (buf.edx >> 8) & 1 ) {
- /* CX8 - CMPXCHG8B Instruction Available */
- KA_TRACE( trace_level, (" CX8" ) );
- }
- if ( (buf.edx >> 9) & 1 ) {
- /* APIC - Local APIC Present (multi-processor operation support */
- KA_TRACE( trace_level, (" APIC" ) );
- }
- if ( (buf.edx >> 15) & 1 ) {
- /* CMOV - Conditional MOVe Instruction Available */
- KA_TRACE( trace_level, (" CMOV" ) );
- }
- if ( (buf.edx >> 18) & 1 ) {
- /* PSN - Processor Serial Number Available */
- KA_TRACE( trace_level, (" PSN" ) );
- }
- if ( (buf.edx >> 19) & 1 ) {
- /* CLFULSH - Cache Flush Instruction Available */
- cflush_size = data[ 1 ] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */
- KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) );
-
- }
- if ( (buf.edx >> 21) & 1 ) {
- /* DTES - Debug Trace & EMON Store */
- KA_TRACE( trace_level, (" DTES" ) );
- }
- if ( (buf.edx >> 22) & 1 ) {
- /* ACPI - ACPI Support Available */
- KA_TRACE( trace_level, (" ACPI" ) );
- }
- if ( (buf.edx >> 23) & 1 ) {
- /* MMX - Multimedia Extensions */
- KA_TRACE( trace_level, (" MMX" ) );
- }
- if ( (buf.edx >> 25) & 1 ) {
- /* SSE - SSE Instructions */
- KA_TRACE( trace_level, (" SSE" ) );
- }
- if ( (buf.edx >> 26) & 1 ) {
- /* SSE2 - SSE2 Instructions */
- KA_TRACE( trace_level, (" SSE2" ) );
- }
- if ( (buf.edx >> 27) & 1 ) {
- /* SLFSNP - Self-Snooping Cache */
- KA_TRACE( trace_level, (" SLFSNP" ) );
- }
+ if ((buf.edx >> 4) & 1) {
+ /* TSC - Timestamp Counter Available */
+ KA_TRACE(trace_level, (" TSC"));
+ }
+ if ((buf.edx >> 8) & 1) {
+ /* CX8 - CMPXCHG8B Instruction Available */
+ KA_TRACE(trace_level, (" CX8"));
+ }
+ if ((buf.edx >> 9) & 1) {
+ /* APIC - Local APIC Present (multi-processor operation support */
+ KA_TRACE(trace_level, (" APIC"));
+ }
+ if ((buf.edx >> 15) & 1) {
+ /* CMOV - Conditional MOVe Instruction Available */
+ KA_TRACE(trace_level, (" CMOV"));
+ }
+ if ((buf.edx >> 18) & 1) {
+ /* PSN - Processor Serial Number Available */
+ KA_TRACE(trace_level, (" PSN"));
+ }
+ if ((buf.edx >> 19) & 1) {
+ /* CLFULSH - Cache Flush Instruction Available */
+ cflush_size =
+ data[1] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */
+ KA_TRACE(trace_level, (" CLFLUSH(%db)", cflush_size));
+ }
+ if ((buf.edx >> 21) & 1) {
+ /* DTES - Debug Trace & EMON Store */
+ KA_TRACE(trace_level, (" DTES"));
+ }
+ if ((buf.edx >> 22) & 1) {
+ /* ACPI - ACPI Support Available */
+ KA_TRACE(trace_level, (" ACPI"));
+ }
+ if ((buf.edx >> 23) & 1) {
+ /* MMX - Multimedia Extensions */
+ KA_TRACE(trace_level, (" MMX"));
+ }
+ if ((buf.edx >> 25) & 1) {
+ /* SSE - SSE Instructions */
+ KA_TRACE(trace_level, (" SSE"));
+ }
+ if ((buf.edx >> 26) & 1) {
+ /* SSE2 - SSE2 Instructions */
+ KA_TRACE(trace_level, (" SSE2"));
+ }
+ if ((buf.edx >> 27) & 1) {
+ /* SLFSNP - Self-Snooping Cache */
+ KA_TRACE(trace_level, (" SLFSNP"));
+ }
#endif /* KMP_DEBUG */
- if ( (buf.edx >> 28) & 1 ) {
- /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */
- log_per_phy = data[ 2 ];
- p->apic_id = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */
- KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) );
+ if ((buf.edx >> 28) & 1) {
+ /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */
+ log_per_phy = data[2];
+ p->apic_id = data[3]; /* Bits 31-24: Processor Initial APIC ID (X) */
+ KA_TRACE(trace_level, (" HT(%d TPUs)", log_per_phy));
- if( log_per_phy > 1 ) {
- /* default to 1k FOR JT-enabled processors (4k on OS X*) */
+ if (log_per_phy > 1) {
+/* default to 1k FOR JT-enabled processors (4k on OS X*) */
#if KMP_OS_DARWIN
- p->cpu_stackoffset = 4 * 1024;
+ p->cpu_stackoffset = 4 * 1024;
#else
- p->cpu_stackoffset = 1 * 1024;
+ p->cpu_stackoffset = 1 * 1024;
#endif
- }
+ }
- p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id );
- p->logical_id = __kmp_get_logical_id( log_per_phy, p->apic_id );
- }
+ p->physical_id = __kmp_get_physical_id(log_per_phy, p->apic_id);
+ p->logical_id = __kmp_get_logical_id(log_per_phy, p->apic_id);
+ }
#ifdef KMP_DEBUG
- if ( (buf.edx >> 29) & 1 ) {
- /* ATHROTL - Automatic Throttle Control */
- KA_TRACE( trace_level, (" ATHROTL" ) );
- }
- KA_TRACE( trace_level, (" ]\n" ) );
+ if ((buf.edx >> 29) & 1) {
+ /* ATHROTL - Automatic Throttle Control */
+ KA_TRACE(trace_level, (" ATHROTL"));
+ }
+ KA_TRACE(trace_level, (" ]\n"));
- for (i = 2; i <= max_arg; ++i) {
- __kmp_x86_cpuid( i, 0, &buf );
- KA_TRACE( trace_level,
- ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
- i, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
- }
+ for (i = 2; i <= max_arg; ++i) {
+ __kmp_x86_cpuid(i, 0, &buf);
+ KA_TRACE(trace_level,
+ ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+ i, buf.eax, buf.ebx, buf.ecx, buf.edx));
+ }
#endif
#if KMP_USE_ADAPTIVE_LOCKS
- p->rtm = 0;
- if (max_arg > 7)
- {
- /* RTM bit CPUID.07:EBX, bit 11 */
- __kmp_x86_cpuid(7, 0, &buf);
- p->rtm = (buf.ebx >> 11) & 1;
- KA_TRACE( trace_level, (" RTM" ) );
- }
+ p->rtm = 0;
+ if (max_arg > 7) {
+ /* RTM bit CPUID.07:EBX, bit 11 */
+ __kmp_x86_cpuid(7, 0, &buf);
+ p->rtm = (buf.ebx >> 11) & 1;
+ KA_TRACE(trace_level, (" RTM"));
+ }
#endif
- }; // if
+ }; // if
- { // Parse CPU brand string for frequency, saving the string for later.
- int i;
- kmp_cpuid_t * base = (kmp_cpuid_t *)&p->name[0];
-
- // Get CPU brand string.
- for ( i = 0; i < 3; ++ i ) {
- __kmp_x86_cpuid( 0x80000002 + i, 0, base+i );
- }; // for
- p->name[ sizeof(p->name) - 1 ] = 0; // Just in case. ;-)
- KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", &p->name[0] ) );
-
- // Parse frequency.
- p->frequency = __kmp_parse_frequency( strrchr( &p->name[0], ' ' ) );
- KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) );
- }
+ { // Parse CPU brand string for frequency, saving the string for later.
+ int i;
+ kmp_cpuid_t *base = (kmp_cpuid_t *)&p->name[0];
+
+ // Get CPU brand string.
+ for (i = 0; i < 3; ++i) {
+ __kmp_x86_cpuid(0x80000002 + i, 0, base + i);
+ }; // for
+ p->name[sizeof(p->name) - 1] = 0; // Just in case. ;-)
+ KA_TRACE(trace_level, ("cpu brand string: \"%s\"\n", &p->name[0]));
+
+ // Parse frequency.
+ p->frequency = __kmp_parse_frequency(strrchr(&p->name[0], ' '));
+ KA_TRACE(trace_level,
+ ("cpu frequency from brand string: %" KMP_UINT64_SPEC "\n",
+ p->frequency));
+ }
}
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
-/* ------------------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------------------ */
-
-void
-__kmp_expand_host_name( char *buffer, size_t size )
-{
- KMP_DEBUG_ASSERT(size >= sizeof(unknown));
+void __kmp_expand_host_name(char *buffer, size_t size) {
+ KMP_DEBUG_ASSERT(size >= sizeof(unknown));
#if KMP_OS_WINDOWS
- {
- DWORD s = size;
+ {
+ DWORD s = size;
- if (! GetComputerNameA( buffer, & s ))
- KMP_STRCPY_S( buffer, size, unknown );
- }
+ if (!GetComputerNameA(buffer, &s))
+ KMP_STRCPY_S(buffer, size, unknown);
+ }
#else
- buffer[size - 2] = 0;
- if (gethostname( buffer, size ) || buffer[size - 2] != 0)
- KMP_STRCPY_S( buffer, size, unknown );
+ buffer[size - 2] = 0;
+ if (gethostname(buffer, size) || buffer[size - 2] != 0)
+ KMP_STRCPY_S(buffer, size, unknown);
#endif
}
/* Expand the meta characters in the filename:
- *
* Currently defined characters are:
- *
* %H the hostname
* %P the number of threads used.
* %I the unique identifier for this run.
*/
-void
-__kmp_expand_file_name( char *result, size_t rlen, char *pattern )
-{
- char *pos = result, *end = result + rlen - 1;
- char buffer[256];
- int default_cpu_width = 1;
- int snp_result;
-
- KMP_DEBUG_ASSERT(rlen > 0);
- *end = 0;
- {
- int i;
- for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width);
- }
-
- if (pattern != NULL) {
- while (*pattern != '\0' && pos < end) {
- if (*pattern != '%') {
- *pos++ = *pattern++;
- } else {
- char *old_pattern = pattern;
- int width = 1;
- int cpu_width = default_cpu_width;
-
- ++pattern;
-
- if (*pattern >= '0' && *pattern <= '9') {
- width = 0;
- do {
- width = (width * 10) + *pattern++ - '0';
- } while (*pattern >= '0' && *pattern <= '9');
- if (width < 0 || width > 1024)
- width = 1;
-
- cpu_width = width;
- }
-
- switch (*pattern) {
- case 'H':
- case 'h':
- {
- __kmp_expand_host_name( buffer, sizeof( buffer ) );
- KMP_STRNCPY( pos, buffer, end - pos + 1);
- if(*end == 0) {
- while ( *pos )
- ++pos;
- ++pattern;
- } else
- pos = end;
- }
- break;
- case 'P':
- case 'p':
- {
- snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth );
- if(snp_result >= 0 && snp_result <= end - pos) {
- while ( *pos )
- ++pos;
- ++pattern;
- } else
- pos = end;
- }
- break;
- case 'I':
- case 'i':
- {
- pid_t id = getpid();
- snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id );
- if(snp_result >= 0 && snp_result <= end - pos) {
- while ( *pos )
- ++pos;
- ++pattern;
- } else
- pos = end;
- break;
- }
- case '%':
- {
- *pos++ = '%';
- ++pattern;
- break;
- }
- default:
- {
- *pos++ = '%';
- pattern = old_pattern + 1;
- break;
- }
- }
- }
- }
- /* TODO: How do we get rid of this? */
- if(*pattern != '\0')
- KMP_FATAL( FileNameTooLong );
+void __kmp_expand_file_name(char *result, size_t rlen, char *pattern) {
+ char *pos = result, *end = result + rlen - 1;
+ char buffer[256];
+ int default_cpu_width = 1;
+ int snp_result;
+
+ KMP_DEBUG_ASSERT(rlen > 0);
+ *end = 0;
+ {
+ int i;
+ for (i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width)
+ ;
+ }
+
+ if (pattern != NULL) {
+ while (*pattern != '\0' && pos < end) {
+ if (*pattern != '%') {
+ *pos++ = *pattern++;
+ } else {
+ char *old_pattern = pattern;
+ int width = 1;
+ int cpu_width = default_cpu_width;
+
+ ++pattern;
+
+ if (*pattern >= '0' && *pattern <= '9') {
+ width = 0;
+ do {
+ width = (width * 10) + *pattern++ - '0';
+ } while (*pattern >= '0' && *pattern <= '9');
+ if (width < 0 || width > 1024)
+ width = 1;
+
+ cpu_width = width;
+ }
+
+ switch (*pattern) {
+ case 'H':
+ case 'h': {
+ __kmp_expand_host_name(buffer, sizeof(buffer));
+ KMP_STRNCPY(pos, buffer, end - pos + 1);
+ if (*end == 0) {
+ while (*pos)
+ ++pos;
+ ++pattern;
+ } else
+ pos = end;
+ } break;
+ case 'P':
+ case 'p': {
+ snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", cpu_width,
+ __kmp_dflt_team_nth);
+ if (snp_result >= 0 && snp_result <= end - pos) {
+ while (*pos)
+ ++pos;
+ ++pattern;
+ } else
+ pos = end;
+ } break;
+ case 'I':
+ case 'i': {
+ pid_t id = getpid();
+ snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id);
+ if (snp_result >= 0 && snp_result <= end - pos) {
+ while (*pos)
+ ++pos;
+ ++pattern;
+ } else
+ pos = end;
+ break;
+ }
+ case '%': {
+ *pos++ = '%';
+ ++pattern;
+ break;
+ }
+ default: {
+ *pos++ = '%';
+ pattern = old_pattern + 1;
+ break;
+ }
+ }
+ }
}
+ /* TODO: How do we get rid of this? */
+ if (*pattern != '\0')
+ KMP_FATAL(FileNameTooLong);
+ }
- *pos = '\0';
+ *pos = '\0';
}
-
Modified: openmp/trunk/runtime/src/kmp_version.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_version.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_version.cpp (original)
+++ openmp/trunk/runtime/src/kmp_version.cpp Fri May 12 13:01:32 2017
@@ -18,199 +18,191 @@
#include "kmp_version.h"
// Replace with snapshot date YYYYMMDD for promotion build.
-#define KMP_VERSION_BUILD 20140926
+#define KMP_VERSION_BUILD 20140926
// Helper macros to convert value of macro to string literal.
-#define _stringer( x ) #x
-#define stringer( x ) _stringer( x )
+#define _stringer(x) #x
+#define stringer(x) _stringer(x)
// Detect compiler.
#if KMP_COMPILER_ICC
- #if __INTEL_COMPILER == 1010
- #define KMP_COMPILER "Intel C++ Compiler 10.1"
- #elif __INTEL_COMPILER == 1100
- #define KMP_COMPILER "Intel C++ Compiler 11.0"
- #elif __INTEL_COMPILER == 1110
- #define KMP_COMPILER "Intel C++ Compiler 11.1"
- #elif __INTEL_COMPILER == 1200
- #define KMP_COMPILER "Intel C++ Compiler 12.0"
- #elif __INTEL_COMPILER == 1210
- #define KMP_COMPILER "Intel C++ Compiler 12.1"
- #elif __INTEL_COMPILER == 1300
- #define KMP_COMPILER "Intel C++ Compiler 13.0"
- #elif __INTEL_COMPILER == 1310
- #define KMP_COMPILER "Intel C++ Compiler 13.1"
- #elif __INTEL_COMPILER == 1400
- #define KMP_COMPILER "Intel C++ Compiler 14.0"
- #elif __INTEL_COMPILER == 1410
- #define KMP_COMPILER "Intel C++ Compiler 14.1"
- #elif __INTEL_COMPILER == 1500
- #define KMP_COMPILER "Intel C++ Compiler 15.0"
- #elif __INTEL_COMPILER == 1600
- #define KMP_COMPILER "Intel C++ Compiler 16.0"
- #elif __INTEL_COMPILER == 1700
- #define KMP_COMPILER "Intel C++ Compiler 17.0"
- #elif __INTEL_COMPILER == 9998
- #define KMP_COMPILER "Intel C++ Compiler mainline"
- #elif __INTEL_COMPILER == 9999
- #define KMP_COMPILER "Intel C++ Compiler mainline"
- #endif
+#if __INTEL_COMPILER == 1010
+#define KMP_COMPILER "Intel C++ Compiler 10.1"
+#elif __INTEL_COMPILER == 1100
+#define KMP_COMPILER "Intel C++ Compiler 11.0"
+#elif __INTEL_COMPILER == 1110
+#define KMP_COMPILER "Intel C++ Compiler 11.1"
+#elif __INTEL_COMPILER == 1200
+#define KMP_COMPILER "Intel C++ Compiler 12.0"
+#elif __INTEL_COMPILER == 1210
+#define KMP_COMPILER "Intel C++ Compiler 12.1"
+#elif __INTEL_COMPILER == 1300
+#define KMP_COMPILER "Intel C++ Compiler 13.0"
+#elif __INTEL_COMPILER == 1310
+#define KMP_COMPILER "Intel C++ Compiler 13.1"
+#elif __INTEL_COMPILER == 1400
+#define KMP_COMPILER "Intel C++ Compiler 14.0"
+#elif __INTEL_COMPILER == 1410
+#define KMP_COMPILER "Intel C++ Compiler 14.1"
+#elif __INTEL_COMPILER == 1500
+#define KMP_COMPILER "Intel C++ Compiler 15.0"
+#elif __INTEL_COMPILER == 1600
+#define KMP_COMPILER "Intel C++ Compiler 16.0"
+#elif __INTEL_COMPILER == 1700
+#define KMP_COMPILER "Intel C++ Compiler 17.0"
+#elif __INTEL_COMPILER == 9998
+#define KMP_COMPILER "Intel C++ Compiler mainline"
+#elif __INTEL_COMPILER == 9999
+#define KMP_COMPILER "Intel C++ Compiler mainline"
+#endif
#elif KMP_COMPILER_CLANG
- #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ )
+#define KMP_COMPILER \
+ "Clang " stringer(__clang_major__) "." stringer(__clang_minor__)
#elif KMP_COMPILER_GCC
- #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ )
+#define KMP_COMPILER "GCC " stringer(__GNUC__) "." stringer(__GNUC_MINOR__)
#elif KMP_COMPILER_MSVC
- #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER )
+#define KMP_COMPILER "MSVC " stringer(_MSC_FULL_VER)
#endif
#ifndef KMP_COMPILER
- #warning "Unknown compiler"
- #define KMP_COMPILER "unknown compiler"
+#warning "Unknown compiler"
+#define KMP_COMPILER "unknown compiler"
#endif
// Detect librray type (perf, stub).
#ifdef KMP_STUB
- #define KMP_LIB_TYPE "stub"
+#define KMP_LIB_TYPE "stub"
#else
- #define KMP_LIB_TYPE "performance"
+#define KMP_LIB_TYPE "performance"
#endif // KMP_LIB_TYPE
// Detect link type (static, dynamic).
#ifdef KMP_DYNAMIC_LIB
- #define KMP_LINK_TYPE "dynamic"
+#define KMP_LINK_TYPE "dynamic"
#else
- #define KMP_LINK_TYPE "static"
+#define KMP_LINK_TYPE "static"
#endif // KMP_LINK_TYPE
// Finally, define strings.
-#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")"
+#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")"
#define KMP_COPYRIGHT ""
int const __kmp_version_major = KMP_VERSION_MAJOR;
int const __kmp_version_minor = KMP_VERSION_MINOR;
int const __kmp_version_build = KMP_VERSION_BUILD;
int const __kmp_openmp_version =
- #if OMP_50_ENABLED
- 201611;
- #elif OMP_45_ENABLED
- 201511;
- #elif OMP_40_ENABLED
- 201307;
- #else
- 201107;
- #endif
-
-/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for a
- specific format some changes in the recognition routine there need to
- be made before this is changed.
-*/
-char const __kmp_copyright[] =
- KMP_VERSION_PREFIX KMP_LIBRARY
- " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR )
- "." stringer( KMP_VERSION_BUILD ) " "
- KMP_COPYRIGHT;
-
-char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT;
-char const __kmp_version_lib_ver[] = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD );
-char const __kmp_version_lib_type[] = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE;
-char const __kmp_version_link_type[] = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE;
-char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: " "no_timestamp";
+#if OMP_50_ENABLED
+ 201611;
+#elif OMP_45_ENABLED
+ 201511;
+#elif OMP_40_ENABLED
+ 201307;
+#else
+ 201107;
+#endif
+
+/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for
+ a specific format some changes in the recognition routine there need to be
+ made before this is changed. */
+char const __kmp_copyright[] = KMP_VERSION_PREFIX KMP_LIBRARY
+ " ver. " stringer(KMP_VERSION_MAJOR) "." stringer(
+ KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD) " " KMP_COPYRIGHT;
+
+char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT;
+char const __kmp_version_lib_ver[] =
+ KMP_VERSION_PREFIX "version: " stringer(KMP_VERSION_MAJOR) "." stringer(
+ KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD);
+char const __kmp_version_lib_type[] =
+ KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE;
+char const __kmp_version_link_type[] =
+ KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE;
+char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: "
+ "no_timestamp";
#if KMP_MIC2
- char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2";
+char const __kmp_version_target_env[] =
+ KMP_VERSION_PREFIX "target environment: MIC2";
#endif
-char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER;
+char const __kmp_version_build_compiler[] =
+ KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER;
-//
// Called at serial initialization time.
-//
static int __kmp_version_1_printed = FALSE;
-void
-__kmp_print_version_1( void )
-{
- if ( __kmp_version_1_printed ) {
- return;
- }; // if
- __kmp_version_1_printed = TRUE;
-
- #ifndef KMP_STUB
- kmp_str_buf_t buffer;
- __kmp_str_buf_init( & buffer );
- // Print version strings skipping initial magic.
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] );
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] );
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] );
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] );
- #if KMP_MIC
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] );
- #endif
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] );
- #if defined(KMP_GOMP_COMPAT)
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] );
- #endif /* defined(KMP_GOMP_COMPAT) */
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] );
- __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" ) );
- #ifdef KMP_DEBUG
- for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) {
- __kmp_str_buf_print(
- & buffer,
- "%s%s barrier branch bits: gather=%u, release=%u\n",
- KMP_VERSION_PREF_STR,
- __kmp_barrier_type_name[ i ],
- __kmp_barrier_gather_branch_bits[ i ],
- __kmp_barrier_release_branch_bits[ i ]
- ); // __kmp_str_buf_print
- }; // for i
- for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) {
- __kmp_str_buf_print(
- & buffer,
- "%s%s barrier pattern: gather=%s, release=%s\n",
- KMP_VERSION_PREF_STR,
- __kmp_barrier_type_name[ i ],
- __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ],
- __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ]
- ); // __kmp_str_buf_print
- }; // for i
- __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] );
- #endif
- __kmp_str_buf_print(
- & buffer,
- "%sthread affinity support: %s\n",
- KMP_VERSION_PREF_STR,
- #if KMP_AFFINITY_SUPPORTED
- (
- KMP_AFFINITY_CAPABLE()
- ?
- (
- __kmp_affinity_type == affinity_none
- ?
- "not used"
- :
- "yes"
- )
- :
- "no"
- )
- #else
- "no"
- #endif
- );
- __kmp_printf( "%s", buffer.str );
- __kmp_str_buf_free( & buffer );
- K_DIAG( 1, ( "KMP_VERSION is true\n" ) );
- #endif // KMP_STUB
+void __kmp_print_version_1(void) {
+ if (__kmp_version_1_printed) {
+ return;
+ }; // if
+ __kmp_version_1_printed = TRUE;
+
+#ifndef KMP_STUB
+ kmp_str_buf_t buffer;
+ __kmp_str_buf_init(&buffer);
+ // Print version strings skipping initial magic.
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]);
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_lib_type[KMP_VERSION_MAGIC_LEN]);
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_link_type[KMP_VERSION_MAGIC_LEN]);
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_build_time[KMP_VERSION_MAGIC_LEN]);
+#if KMP_MIC
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_target_env[KMP_VERSION_MAGIC_LEN]);
+#endif
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_build_compiler[KMP_VERSION_MAGIC_LEN]);
+#if defined(KMP_GOMP_COMPAT)
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_alt_comp[KMP_VERSION_MAGIC_LEN]);
+#endif /* defined(KMP_GOMP_COMPAT) */
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_omp_api[KMP_VERSION_MAGIC_LEN]);
+ __kmp_str_buf_print(&buffer, "%sdynamic error checking: %s\n",
+ KMP_VERSION_PREF_STR,
+ (__kmp_env_consistency_check ? "yes" : "no"));
+#ifdef KMP_DEBUG
+ for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) {
+ __kmp_str_buf_print(
+ &buffer, "%s%s barrier branch bits: gather=%u, release=%u\n",
+ KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i],
+ __kmp_barrier_gather_branch_bits[i],
+ __kmp_barrier_release_branch_bits[i]); // __kmp_str_buf_print
+ }; // for i
+ for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) {
+ __kmp_str_buf_print(
+ &buffer, "%s%s barrier pattern: gather=%s, release=%s\n",
+ KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i],
+ __kmp_barrier_pattern_name[__kmp_barrier_gather_pattern[i]],
+ __kmp_barrier_pattern_name
+ [__kmp_barrier_release_pattern[i]]); // __kmp_str_buf_print
+ }; // for i
+ __kmp_str_buf_print(&buffer, "%s\n",
+ &__kmp_version_lock[KMP_VERSION_MAGIC_LEN]);
+#endif
+ __kmp_str_buf_print(
+ &buffer, "%sthread affinity support: %s\n", KMP_VERSION_PREF_STR,
+#if KMP_AFFINITY_SUPPORTED
+ (KMP_AFFINITY_CAPABLE()
+ ? (__kmp_affinity_type == affinity_none ? "not used" : "yes")
+ : "no")
+#else
+ "no"
+#endif
+ );
+ __kmp_printf("%s", buffer.str);
+ __kmp_str_buf_free(&buffer);
+ K_DIAG(1, ("KMP_VERSION is true\n"));
+#endif // KMP_STUB
} // __kmp_print_version_1
-//
// Called at parallel initialization time.
-//
static int __kmp_version_2_printed = FALSE;
-void
-__kmp_print_version_2( void ) {
- if ( __kmp_version_2_printed ) {
- return;
- }; // if
- __kmp_version_2_printed = TRUE;
+void __kmp_print_version_2(void) {
+ if (__kmp_version_2_printed) {
+ return;
+ }; // if
+ __kmp_version_2_printed = TRUE;
} // __kmp_print_version_2
// end of file //
Modified: openmp/trunk/runtime/src/kmp_version.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_version.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_version.h (original)
+++ openmp/trunk/runtime/src/kmp_version.h Fri May 12 13:01:32 2017
@@ -17,31 +17,32 @@
#define KMP_VERSION_H
#ifdef __cplusplus
- extern "C" {
+extern "C" {
#endif // __cplusplus
#ifndef KMP_VERSION_MAJOR
- #error KMP_VERSION_MAJOR macro is not defined.
+#error KMP_VERSION_MAJOR macro is not defined.
#endif
-#define KMP_VERSION_MINOR 0
-/*
- Using "magic" prefix in all the version strings is rather convenient to get static version info
- from binaries by using standard utilities "strings" and "grep", e. g.:
+#define KMP_VERSION_MINOR 0
+/* Using "magic" prefix in all the version strings is rather convenient to get
+ static version info from binaries by using standard utilities "strings" and
+ "grep", e. g.:
$ strings libomp.so | grep "@(#)"
- gives clean list of all version strings in the library. Leading zero helps to keep version
- string separate from printable characters which may occurs just before version string.
-*/
-#define KMP_VERSION_MAGIC_STR "\x00@(#) "
-#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR.
-#define KMP_VERSION_PREF_STR "Intel(R) OMP "
-#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR
+ gives clean list of all version strings in the library. Leading zero helps
+ to keep version string separate from printable characters which may occurs
+ just before version string. */
+#define KMP_VERSION_MAGIC_STR "\x00@(#) "
+#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR.
+#define KMP_VERSION_PREF_STR "Intel(R) OMP "
+#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR
/* declare all the version string constants for KMP_VERSION env. variable */
-extern int const __kmp_version_major;
-extern int const __kmp_version_minor;
-extern int const __kmp_version_build;
-extern int const __kmp_openmp_version;
-extern char const __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP.
+extern int const __kmp_version_major;
+extern int const __kmp_version_minor;
+extern int const __kmp_version_build;
+extern int const __kmp_openmp_version;
+extern char const
+ __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP.
extern char const __kmp_version_copyright[];
extern char const __kmp_version_lib_ver[];
extern char const __kmp_version_lib_type[];
@@ -58,11 +59,11 @@ extern char const __kmp_version_ftnstdca
extern char const __kmp_version_ftncdecl[];
extern char const __kmp_version_ftnextra[];
-void __kmp_print_version_1( void );
-void __kmp_print_version_2( void );
+void __kmp_print_version_1(void);
+void __kmp_print_version_2(void);
#ifdef __cplusplus
- } // extern "C"
+} // extern "C"
#endif // __cplusplus
#endif /* KMP_VERSION_H */
Modified: openmp/trunk/runtime/src/kmp_wait_release.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.cpp (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.cpp Fri May 12 13:01:32 2017
@@ -14,13 +14,10 @@
#include "kmp_wait_release.h"
-void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
- USE_ITT_BUILD_ARG(void * itt_sync_obj) )
-{
- __kmp_wait_template(this_thr, flag, final_spin
- USE_ITT_BUILD_ARG(itt_sync_obj) );
+void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
+ int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ __kmp_wait_template(this_thr, flag,
+ final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
}
-void __kmp_release_64(kmp_flag_64 *flag) {
- __kmp_release_template(flag);
-}
+void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); }
Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Fri May 12 13:01:32 2017
@@ -24,8 +24,8 @@
@defgroup WAIT_RELEASE Wait/Release operations
The definitions and functions here implement the lowest level thread
-synchronizations of suspending a thread and awaking it. They are used
-to build higher level operations such as barriers and fork/join.
+synchronizations of suspending a thread and awaking it. They are used to build
+higher level operations such as barriers and fork/join.
*/
/*!
@@ -37,581 +37,647 @@ to build higher level operations such as
* The flag_type describes the storage used for the flag.
*/
enum flag_type {
- flag32, /**< 32 bit flags */
- flag64, /**< 64 bit flags */
- flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
+ flag32, /**< 32 bit flags */
+ flag64, /**< 64 bit flags */
+ flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
};
/*!
* Base class for wait/release volatile flag
*/
-template <typename P>
-class kmp_flag {
- volatile P * loc; /**< Pointer to the flag storage that is modified by another thread */
- flag_type t; /**< "Type" of the flag in loc */
- public:
- typedef P flag_t;
- kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
- /*!
- * @result the pointer to the actual flag
- */
- volatile P * get() { return loc; }
- /*!
- * @param new_loc in set loc to point at new_loc
- */
- void set(volatile P *new_loc) { loc = new_loc; }
- /*!
- * @result the flag_type
- */
- flag_type get_type() { return t; }
- // Derived classes must provide the following:
- /*
- kmp_info_t * get_waiter(kmp_uint32 i);
- kmp_uint32 get_num_waiters();
- bool done_check();
- bool done_check_val(P old_loc);
- bool notdone_check();
- P internal_release();
- void suspend(int th_gtid);
- void resume(int th_gtid);
- P set_sleeping();
- P unset_sleeping();
- bool is_sleeping();
- bool is_any_sleeping();
- bool is_sleeping_val(P old_loc);
- int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained);
- */
+template <typename P> class kmp_flag {
+ volatile P
+ *loc; /**< Pointer to the flag storage that is modified by another thread
+ */
+ flag_type t; /**< "Type" of the flag in loc */
+public:
+ typedef P flag_t;
+ kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
+ /*!
+ * @result the pointer to the actual flag
+ */
+ volatile P *get() { return loc; }
+ /*!
+ * @param new_loc in set loc to point at new_loc
+ */
+ void set(volatile P *new_loc) { loc = new_loc; }
+ /*!
+ * @result the flag_type
+ */
+ flag_type get_type() { return t; }
+ // Derived classes must provide the following:
+ /*
+ kmp_info_t * get_waiter(kmp_uint32 i);
+ kmp_uint32 get_num_waiters();
+ bool done_check();
+ bool done_check_val(P old_loc);
+ bool notdone_check();
+ P internal_release();
+ void suspend(int th_gtid);
+ void resume(int th_gtid);
+ P set_sleeping();
+ P unset_sleeping();
+ bool is_sleeping();
+ bool is_any_sleeping();
+ bool is_sleeping_val(P old_loc);
+ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+ int *thread_finished
+ USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
+ is_constrained);
+ */
};
-/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
- must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
+/* Spin wait loop that first does pause, then yield, then sleep. A thread that
+ calls __kmp_wait_* must make certain that another thread calls __kmp_release
+ to wake it back up to prevent deadlocks! */
template <class C>
static inline void
-__kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
- USE_ITT_BUILD_ARG(void * itt_sync_obj) )
-{
- // NOTE: We may not belong to a team at this point.
- volatile typename C::flag_t *spin = flag->get();
- kmp_uint32 spins;
- kmp_uint32 hibernate;
- int th_gtid;
- int tasks_completed = FALSE;
- int oversubscribed;
-#if ! KMP_USE_MONITOR
- kmp_uint64 poll_count;
- kmp_uint64 hibernate_goal;
-#endif
-
- KMP_FSYNC_SPIN_INIT(spin, NULL);
- if (flag->done_check()) {
- KMP_FSYNC_SPIN_ACQUIRED(spin);
- return;
- }
- th_gtid = this_thr->th.th_info.ds.ds_gtid;
- KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
+__kmp_wait_template(kmp_info_t *this_thr, C *flag,
+ int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ // NOTE: We may not belong to a team at this point.
+ volatile typename C::flag_t *spin = flag->get();
+ kmp_uint32 spins;
+ kmp_uint32 hibernate;
+ int th_gtid;
+ int tasks_completed = FALSE;
+ int oversubscribed;
+#if !KMP_USE_MONITOR
+ kmp_uint64 poll_count;
+ kmp_uint64 hibernate_goal;
+#endif
+
+ KMP_FSYNC_SPIN_INIT(spin, NULL);
+ if (flag->done_check()) {
+ KMP_FSYNC_SPIN_ACQUIRED(spin);
+ return;
+ }
+ th_gtid = this_thr->th.th_info.ds.ds_gtid;
+ KA_TRACE(20,
+ ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
#if KMP_STATS_ENABLED
- stats_state_e thread_state = KMP_GET_THREAD_STATE();
+ stats_state_e thread_state = KMP_GET_THREAD_STATE();
#endif
#if OMPT_SUPPORT && OMPT_BLAME
- ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
- if (ompt_enabled &&
- ompt_state != ompt_state_undefined) {
- if (ompt_state == ompt_state_idle) {
- if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
- }
- } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
- KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
- ompt_state == ompt_state_wait_barrier_implicit ||
- ompt_state == ompt_state_wait_barrier_explicit);
-
- ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
- ompt_parallel_id_t pId;
- ompt_task_id_t tId;
- if (team){
- pId = team->ompt_team_info.parallel_id;
- tId = team->ompt_task_info.task_id;
- } else {
- pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
- tId = this_thr->th.th_current_task->ompt_task_info.task_id;
- }
- ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
- }
+ ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
+ if (ompt_enabled && ompt_state != ompt_state_undefined) {
+ if (ompt_state == ompt_state_idle) {
+ if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
+ ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
+ }
+ } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
+ KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
+ ompt_state == ompt_state_wait_barrier_implicit ||
+ ompt_state == ompt_state_wait_barrier_explicit);
+
+ ompt_lw_taskteam_t *team =
+ this_thr->th.th_team->t.ompt_serialized_team_info;
+ ompt_parallel_id_t pId;
+ ompt_task_id_t tId;
+ if (team) {
+ pId = team->ompt_team_info.parallel_id;
+ tId = team->ompt_task_info.task_id;
+ } else {
+ pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
+ tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+ }
+ ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
}
+ }
#endif
- // Setup for waiting
- KMP_INIT_YIELD(spins);
+ // Setup for waiting
+ KMP_INIT_YIELD(spins);
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
#if KMP_USE_MONITOR
- // The worker threads cannot rely on the team struct existing at this point.
- // Use the bt values cached in the thread struct instead.
+// The worker threads cannot rely on the team struct existing at this point.
+// Use the bt values cached in the thread struct instead.
#ifdef KMP_ADJUST_BLOCKTIME
- if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
- // Force immediate suspend if not set by user and more threads than available procs
- hibernate = 0;
- else
- hibernate = this_thr->th.th_team_bt_intervals;
+ if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
+ // Force immediate suspend if not set by user and more threads than
+ // available procs
+ hibernate = 0;
+ else
+ hibernate = this_thr->th.th_team_bt_intervals;
#else
- hibernate = this_thr->th.th_team_bt_intervals;
+ hibernate = this_thr->th.th_team_bt_intervals;
#endif /* KMP_ADJUST_BLOCKTIME */
- /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
- of the specified #intervals, plus up to one interval more. This increment make
- certain that this thread doesn't go to sleep too soon. */
- if (hibernate != 0)
- hibernate++;
-
- // Add in the current time value.
- hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
- KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
- th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
- hibernate - __kmp_global.g.g_time.dt.t_value));
+ /* If the blocktime is nonzero, we want to make sure that we spin wait for
+ the entirety of the specified #intervals, plus up to one interval more.
+ This increment make certain that this thread doesn't go to sleep too
+ soon. */
+ if (hibernate != 0)
+ hibernate++;
+
+ // Add in the current time value.
+ hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
+ KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
+ th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
+ hibernate - __kmp_global.g.g_time.dt.t_value));
#else
- hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
- poll_count = 0;
+ hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
+ poll_count = 0;
#endif // KMP_USE_MONITOR
- }
-
- oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
- KMP_MB();
+ }
- // Main wait spin loop
- while (flag->notdone_check()) {
- int in_pool;
- kmp_task_team_t * task_team = NULL;
- if (__kmp_tasking_mode != tskm_immediate_exec) {
- task_team = this_thr->th.th_task_team;
- /* If the thread's task team pointer is NULL, it means one of 3 things:
- 1) A newly-created thread is first being released by __kmp_fork_barrier(), and
- its task team has not been set up yet.
- 2) All tasks have been executed to completion.
- 3) Tasking is off for this region. This could be because we are in a serialized region
- (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */
- if (task_team != NULL) {
- if (TCR_SYNC_4(task_team->tt.tt_active)) {
- if (KMP_TASKING_ENABLED(task_team))
- flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
- USE_ITT_BUILD_ARG(itt_sync_obj), 0);
- else
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- else {
- KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
- this_thr->th.th_task_team = NULL;
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- }
- } else {
- this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
- } // if
- } // if
-
- KMP_FSYNC_SPIN_PREPARE(spin);
- if (TCR_4(__kmp_global.g.g_done)) {
- if (__kmp_global.g.g_abort)
- __kmp_abort_thread();
- break;
- }
+ oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
+ KMP_MB();
- // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
- KMP_YIELD(oversubscribed);
- // TODO: Should it be number of cores instead of thread contexts? Like:
- // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
- // Need performance improvement data to make the change...
- KMP_YIELD_SPIN(spins);
-
- // Check if this thread was transferred from a team
- // to the thread pool (or vice-versa) while spinning.
- in_pool = !!TCR_4(this_thr->th.th_in_pool);
- if (in_pool != !!this_thr->th.th_active_in_pool) {
- if (in_pool) { // Recently transferred from team to pool
- KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
- this_thr->th.th_active_in_pool = TRUE;
- /* Here, we cannot assert that:
- KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
- __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
- lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
- by the workers. The two can get out of sync for brief periods of time. */
- }
- else { // Recently transferred from pool to team
- KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
- KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
- this_thr->th.th_active_in_pool = FALSE;
- }
- }
+ // Main wait spin loop
+ while (flag->notdone_check()) {
+ int in_pool;
+ kmp_task_team_t *task_team = NULL;
+ if (__kmp_tasking_mode != tskm_immediate_exec) {
+ task_team = this_thr->th.th_task_team;
+ /* If the thread's task team pointer is NULL, it means one of 3 things:
+ 1) A newly-created thread is first being released by
+ __kmp_fork_barrier(), and its task team has not been set up yet.
+ 2) All tasks have been executed to completion.
+ 3) Tasking is off for this region. This could be because we are in a
+ serialized region (perhaps the outer one), or else tasking was manually
+ disabled (KMP_TASKING=0). */
+ if (task_team != NULL) {
+ if (TCR_SYNC_4(task_team->tt.tt_active)) {
+ if (KMP_TASKING_ENABLED(task_team))
+ flag->execute_tasks(
+ this_thr, th_gtid, final_spin,
+ &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+ else
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ } else {
+ KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
+ this_thr->th.th_task_team = NULL;
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ }
+ } else {
+ this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+ } // if
+ } // if
+
+ KMP_FSYNC_SPIN_PREPARE(spin);
+ if (TCR_4(__kmp_global.g.g_done)) {
+ if (__kmp_global.g.g_abort)
+ __kmp_abort_thread();
+ break;
+ }
+
+ // If we are oversubscribed, or have waited a bit (and
+ // KMP_LIBRARY=throughput), then yield
+ KMP_YIELD(oversubscribed);
+ // TODO: Should it be number of cores instead of thread contexts? Like:
+ // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
+ // Need performance improvement data to make the change...
+ KMP_YIELD_SPIN(spins);
+ // Check if this thread was transferred from a team
+ // to the thread pool (or vice-versa) while spinning.
+ in_pool = !!TCR_4(this_thr->th.th_in_pool);
+ if (in_pool != !!this_thr->th.th_active_in_pool) {
+ if (in_pool) { // Recently transferred from team to pool
+ KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
+ this_thr->th.th_active_in_pool = TRUE;
+ /* Here, we cannot assert that:
+ KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
+ __kmp_thread_pool_nth);
+ __kmp_thread_pool_nth is inc/dec'd by the master thread while the
+ fork/join lock is held, whereas __kmp_thread_pool_active_nth is
+ inc/dec'd asynchronously by the workers. The two can get out of sync
+ for brief periods of time. */
+ } else { // Recently transferred from pool to team
+ KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
+ KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
+ this_thr->th.th_active_in_pool = FALSE;
+ }
+ }
#if KMP_STATS_ENABLED
- // Check if thread has been signalled to idle state
- // This indicates that the logical "join-barrier" has finished
- if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
- KMP_SET_THREAD_STATE(IDLE);
- KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
- }
+ // Check if thread has been signalled to idle state
+ // This indicates that the logical "join-barrier" has finished
+ if (this_thr->th.th_stats->isIdle() &&
+ KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
+ KMP_SET_THREAD_STATE(IDLE);
+ KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
+ }
#endif
- // Don't suspend if KMP_BLOCKTIME is set to "infinite"
- if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
- continue;
-
- // Don't suspend if there is a likelihood of new tasks being spawned.
- if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
- continue;
+ // Don't suspend if KMP_BLOCKTIME is set to "infinite"
+ if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+ continue;
+
+ // Don't suspend if there is a likelihood of new tasks being spawned.
+ if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
+ continue;
#if KMP_USE_MONITOR
- // If we have waited a bit more, fall asleep
- if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
- continue;
+ // If we have waited a bit more, fall asleep
+ if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
+ continue;
#else
- if (KMP_BLOCKING(hibernate_goal, poll_count++))
- continue;
+ if (KMP_BLOCKING(hibernate_goal, poll_count++))
+ continue;
#endif
- KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
-
- flag->suspend(th_gtid);
+ KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
+ flag->suspend(th_gtid);
- if (TCR_4(__kmp_global.g.g_done)) {
- if (__kmp_global.g.g_abort)
- __kmp_abort_thread();
- break;
- }
- else if (__kmp_tasking_mode != tskm_immediate_exec
- && this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
- this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
- }
- // TODO: If thread is done with work and times out, disband/free
+ if (TCR_4(__kmp_global.g.g_done)) {
+ if (__kmp_global.g.g_abort)
+ __kmp_abort_thread();
+ break;
+ } else if (__kmp_tasking_mode != tskm_immediate_exec &&
+ this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
+ this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
}
+ // TODO: If thread is done with work and times out, disband/free
+ }
#if OMPT_SUPPORT && OMPT_BLAME
- if (ompt_enabled &&
- ompt_state != ompt_state_undefined) {
- if (ompt_state == ompt_state_idle) {
- if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
- ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
- }
- } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
- KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
- ompt_state == ompt_state_wait_barrier_implicit ||
- ompt_state == ompt_state_wait_barrier_explicit);
-
- ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
- ompt_parallel_id_t pId;
- ompt_task_id_t tId;
- if (team){
- pId = team->ompt_team_info.parallel_id;
- tId = team->ompt_task_info.task_id;
- } else {
- pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
- tId = this_thr->th.th_current_task->ompt_task_info.task_id;
- }
- ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
- }
+ if (ompt_enabled && ompt_state != ompt_state_undefined) {
+ if (ompt_state == ompt_state_idle) {
+ if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
+ ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
+ }
+ } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
+ KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
+ ompt_state == ompt_state_wait_barrier_implicit ||
+ ompt_state == ompt_state_wait_barrier_explicit);
+
+ ompt_lw_taskteam_t *team =
+ this_thr->th.th_team->t.ompt_serialized_team_info;
+ ompt_parallel_id_t pId;
+ ompt_task_id_t tId;
+ if (team) {
+ pId = team->ompt_team_info.parallel_id;
+ tId = team->ompt_task_info.task_id;
+ } else {
+ pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
+ tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+ }
+ ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
}
+ }
#endif
#if KMP_STATS_ENABLED
- // If we were put into idle state, pop that off the state stack
- if (KMP_GET_THREAD_STATE() == IDLE) {
- KMP_POP_PARTITIONED_TIMER();
- KMP_SET_THREAD_STATE(thread_state);
- this_thr->th.th_stats->resetIdleFlag();
- }
+ // If we were put into idle state, pop that off the state stack
+ if (KMP_GET_THREAD_STATE() == IDLE) {
+ KMP_POP_PARTITIONED_TIMER();
+ KMP_SET_THREAD_STATE(thread_state);
+ this_thr->th.th_stats->resetIdleFlag();
+ }
#endif
- KMP_FSYNC_SPIN_ACQUIRED(spin);
+ KMP_FSYNC_SPIN_ACQUIRED(spin);
}
-/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
- if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
- up the potentially sleeping thread and prevent deadlocks! */
-template <class C>
-static inline void
-__kmp_release_template(C *flag)
-{
+/* Release any threads specified as waiting on the flag by releasing the flag
+ and resume the waiting thread if indicated by the sleep bit(s). A thread that
+ calls __kmp_wait_template must call this function to wake up the potentially
+ sleeping thread and prevent deadlocks! */
+template <class C> static inline void __kmp_release_template(C *flag) {
#ifdef KMP_DEBUG
- int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
+ int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
#endif
- KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
- KMP_DEBUG_ASSERT(flag->get());
- KMP_FSYNC_RELEASING(flag->get());
-
- flag->internal_release();
-
- KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get())));
-
- if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
- // Only need to check sleep stuff if infinite block time not set
- if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping?
- for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
- kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag
- if (waiter) {
- int wait_gtid = waiter->th.th_info.ds.ds_gtid;
- // Wake up thread if needed
- KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n",
- gtid, wait_gtid, flag->get()));
- flag->resume(wait_gtid); // unsets flag's current_waiter when done
- }
- }
+ KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
+ KMP_DEBUG_ASSERT(flag->get());
+ KMP_FSYNC_RELEASING(flag->get());
+
+ flag->internal_release();
+
+ KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
+ *(flag->get())));
+
+ if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+ // Only need to check sleep stuff if infinite block time not set.
+ // Are *any* threads waiting on flag sleeping?
+ if (flag->is_any_sleeping()) {
+ for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
+ // if sleeping waiter exists at i, sets current_waiter to i inside flag
+ kmp_info_t *waiter = flag->get_waiter(i);
+ if (waiter) {
+ int wait_gtid = waiter->th.th_info.ds.ds_gtid;
+ // Wake up thread if needed
+ KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
+ "flag(%p) set\n",
+ gtid, wait_gtid, flag->get()));
+ flag->resume(wait_gtid); // unsets flag's current_waiter when done
}
+ }
}
+ }
}
-template <typename FlagType>
-struct flag_traits {};
+template <typename FlagType> struct flag_traits {};
-template <>
-struct flag_traits<kmp_uint32> {
- typedef kmp_uint32 flag_t;
- static const flag_type t = flag32;
- static inline flag_t tcr(flag_t f) { return TCR_4(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
+template <> struct flag_traits<kmp_uint32> {
+ typedef kmp_uint32 flag_t;
+ static const flag_type t = flag32;
+ static inline flag_t tcr(flag_t f) { return TCR_4(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f);
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v);
+ }
};
-template <>
-struct flag_traits<kmp_uint64> {
- typedef kmp_uint64 flag_t;
- static const flag_type t = flag64;
- static inline flag_t tcr(flag_t f) { return TCR_8(f); }
- static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
- static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
- static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
+template <> struct flag_traits<kmp_uint64> {
+ typedef kmp_uint64 flag_t;
+ static const flag_type t = flag64;
+ static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+ static inline flag_t test_then_add4(volatile flag_t *f) {
+ return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f);
+ }
+ static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v);
+ }
+ static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+ return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v);
+ }
};
-template <typename FlagType>
-class kmp_basic_flag : public kmp_flag<FlagType> {
- typedef flag_traits<FlagType> traits_type;
- FlagType checker; /**< Value to compare flag to to check if flag has been released. */
- kmp_info_t * waiting_threads[1]; /**< Array of threads sleeping on this thread. */
- kmp_uint32 num_waiting_threads; /**< Number of threads sleeping on this thread. */
- public:
- kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
- kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
- waiting_threads[0] = thr;
- }
- kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
- /*!
- * param i in index into waiting_threads
- * @result the thread that is waiting at index i
- */
- kmp_info_t * get_waiter(kmp_uint32 i) {
- KMP_DEBUG_ASSERT(i<num_waiting_threads);
- return waiting_threads[i];
- }
- /*!
- * @result num_waiting_threads
- */
- kmp_uint32 get_num_waiters() { return num_waiting_threads; }
- /*!
- * @param thr in the thread which is now waiting
- *
- * Insert a waiting thread at index 0.
- */
- void set_waiter(kmp_info_t *thr) {
- waiting_threads[0] = thr;
- num_waiting_threads = 1;
- }
- /*!
- * @result true if the flag object has been released.
- */
- bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
- /*!
- * @param old_loc in old value of flag
- * @result true if the flag's old value indicates it was released.
- */
- bool done_check_val(FlagType old_loc) { return old_loc == checker; }
- /*!
- * @result true if the flag object is not yet released.
- * Used in __kmp_wait_template like:
- * @code
- * while (flag.notdone_check()) { pause(); }
- * @endcode
- */
- bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
- /*!
- * @result Actual flag value before release was applied.
- * Trigger all waiting threads to run by modifying flag to release state.
- */
- void internal_release() {
- (void) traits_type::test_then_add4((volatile FlagType *)this->get());
- }
- /*!
- * @result Actual flag value before sleep bit(s) set.
- * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s).
- */
- FlagType set_sleeping() {
- return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
- }
- /*!
- * @result Actual flag value before sleep bit(s) cleared.
- * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s).
- */
- FlagType unset_sleeping() {
- return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
- }
- /*!
- * @param old_loc in old value of flag
- * Test whether there are threads sleeping on the flag's old value in old_loc.
- */
- bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
- /*!
- * Test whether there are threads sleeping on the flag.
- */
- bool is_sleeping() { return is_sleeping_val(*(this->get())); }
- bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
- kmp_uint8 *get_stolen() { return NULL; }
- enum barrier_type get_bt() { return bs_last_barrier; }
+template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
+ typedef flag_traits<FlagType> traits_type;
+ FlagType checker; /**< Value to compare flag to to check if flag has been
+ released. */
+ kmp_info_t
+ *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
+ kmp_uint32
+ num_waiting_threads; /**< Number of threads sleeping on this thread. */
+public:
+ kmp_basic_flag(volatile FlagType *p)
+ : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
+ kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr)
+ : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
+ waiting_threads[0] = thr;
+ }
+ kmp_basic_flag(volatile FlagType *p, FlagType c)
+ : kmp_flag<FlagType>(p, traits_type::t), checker(c),
+ num_waiting_threads(0) {}
+ /*!
+ * param i in index into waiting_threads
+ * @result the thread that is waiting at index i
+ */
+ kmp_info_t *get_waiter(kmp_uint32 i) {
+ KMP_DEBUG_ASSERT(i < num_waiting_threads);
+ return waiting_threads[i];
+ }
+ /*!
+ * @result num_waiting_threads
+ */
+ kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+ /*!
+ * @param thr in the thread which is now waiting
+ *
+ * Insert a waiting thread at index 0.
+ */
+ void set_waiter(kmp_info_t *thr) {
+ waiting_threads[0] = thr;
+ num_waiting_threads = 1;
+ }
+ /*!
+ * @result true if the flag object has been released.
+ */
+ bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
+ /*!
+ * @param old_loc in old value of flag
+ * @result true if the flag's old value indicates it was released.
+ */
+ bool done_check_val(FlagType old_loc) { return old_loc == checker; }
+ /*!
+ * @result true if the flag object is not yet released.
+ * Used in __kmp_wait_template like:
+ * @code
+ * while (flag.notdone_check()) { pause(); }
+ * @endcode
+ */
+ bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
+ /*!
+ * @result Actual flag value before release was applied.
+ * Trigger all waiting threads to run by modifying flag to release state.
+ */
+ void internal_release() {
+ (void)traits_type::test_then_add4((volatile FlagType *)this->get());
+ }
+ /*!
+ * @result Actual flag value before sleep bit(s) set.
+ * Notes that there is at least one thread sleeping on the flag by setting
+ * sleep bit(s).
+ */
+ FlagType set_sleeping() {
+ return traits_type::test_then_or((volatile FlagType *)this->get(),
+ KMP_BARRIER_SLEEP_STATE);
+ }
+ /*!
+ * @result Actual flag value before sleep bit(s) cleared.
+ * Notes that there are no longer threads sleeping on the flag by clearing
+ * sleep bit(s).
+ */
+ FlagType unset_sleeping() {
+ return traits_type::test_then_and((volatile FlagType *)this->get(),
+ ~KMP_BARRIER_SLEEP_STATE);
+ }
+ /*!
+ * @param old_loc in old value of flag
+ * Test whether there are threads sleeping on the flag's old value in old_loc.
+ */
+ bool is_sleeping_val(FlagType old_loc) {
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ /*!
+ * Test whether there are threads sleeping on the flag.
+ */
+ bool is_sleeping() { return is_sleeping_val(*(this->get())); }
+ bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
+ kmp_uint8 *get_stolen() { return NULL; }
+ enum barrier_type get_bt() { return bs_last_barrier; }
};
class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
- public:
- kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
- kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
- kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
- void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
- void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
- int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
- return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
- }
- void wait(kmp_info_t *this_thr, int final_spin
- USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
- __kmp_wait_template(this_thr, this, final_spin
- USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- void release() { __kmp_release_template(this); }
- flag_type get_ptr_type() { return flag32; }
+public:
+ kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
+ kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr)
+ : kmp_basic_flag<kmp_uint32>(p, thr) {}
+ kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c)
+ : kmp_basic_flag<kmp_uint32>(p, c) {}
+ void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
+ void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
+ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_32(
+ this_thr, gtid, this, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+ }
+ void wait(kmp_info_t *this_thr,
+ int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ __kmp_wait_template(this_thr, this,
+ final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+ }
+ void release() { __kmp_release_template(this); }
+ flag_type get_ptr_type() { return flag32; }
};
class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
- public:
- kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
- kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
- kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
- void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
- void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
- int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
- return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
- }
- void wait(kmp_info_t *this_thr, int final_spin
- USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
- __kmp_wait_template(this_thr, this, final_spin
- USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- void release() { __kmp_release_template(this); }
- flag_type get_ptr_type() { return flag64; }
+public:
+ kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
+ kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
+ : kmp_basic_flag<kmp_uint64>(p, thr) {}
+ kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
+ : kmp_basic_flag<kmp_uint64>(p, c) {}
+ void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
+ void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
+ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_64(
+ this_thr, gtid, this, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+ }
+ void wait(kmp_info_t *this_thr,
+ int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+ __kmp_wait_template(this_thr, this,
+ final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+ }
+ void release() { __kmp_release_template(this); }
+ flag_type get_ptr_type() { return flag64; }
};
// Hierarchical 64-bit on-core barrier instantiation
class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
- kmp_uint64 checker;
- kmp_info_t * waiting_threads[1];
- kmp_uint32 num_waiting_threads;
- kmp_uint32 offset; /**< Portion of flag that is of interest for an operation. */
- bool flag_switch; /**< Indicates a switch in flag location. */
- enum barrier_type bt; /**< Barrier type. */
- kmp_info_t * this_thr; /**< Thread that may be redirected to different flag location. */
+ kmp_uint64 checker;
+ kmp_info_t *waiting_threads[1];
+ kmp_uint32 num_waiting_threads;
+ kmp_uint32
+ offset; /**< Portion of flag that is of interest for an operation. */
+ bool flag_switch; /**< Indicates a switch in flag location. */
+ enum barrier_type bt; /**< Barrier type. */
+ kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
+ location. */
#if USE_ITT_BUILD
- void *itt_sync_obj; /**< ITT object that must be passed to new flag location. */
+ void *
+ itt_sync_obj; /**< ITT object that must be passed to new flag location. */
#endif
- unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; }
+ unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
+ return ((unsigned char *)loc)[offset];
+ }
+
public:
- kmp_flag_oncore(volatile kmp_uint64 *p)
- : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
- kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
- : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
- kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
- kmp_info_t * thr
+ kmp_flag_oncore(volatile kmp_uint64 *p)
+ : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
+ flag_switch(false) {}
+ kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
+ : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
+ offset(idx), flag_switch(false) {}
+ kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
+ enum barrier_type bar_t, kmp_info_t *thr
#if USE_ITT_BUILD
- , void *itt
+ ,
+ void *itt
#endif
- )
- : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx),
- flag_switch(false), bt(bar_t), this_thr(thr)
+ )
+ : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c),
+ num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
+ this_thr(thr)
#if USE_ITT_BUILD
- , itt_sync_obj(itt)
+ ,
+ itt_sync_obj(itt)
#endif
- {}
- kmp_info_t * get_waiter(kmp_uint32 i) {
- KMP_DEBUG_ASSERT(i<num_waiting_threads);
- return waiting_threads[i];
- }
- kmp_uint32 get_num_waiters() { return num_waiting_threads; }
- void set_waiter(kmp_info_t *thr) {
- waiting_threads[0] = thr;
- num_waiting_threads = 1;
- }
- bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
- bool done_check() { return done_check_val(*get()); }
- bool notdone_check() {
- // Calculate flag_switch
- if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
- flag_switch = true;
- if (byteref(get(),offset) != 1 && !flag_switch)
- return true;
- else if (flag_switch) {
- this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
- kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
- __kmp_wait_64(this_thr, &flag, TRUE
+ {
+ }
+ kmp_info_t *get_waiter(kmp_uint32 i) {
+ KMP_DEBUG_ASSERT(i < num_waiting_threads);
+ return waiting_threads[i];
+ }
+ kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+ void set_waiter(kmp_info_t *thr) {
+ waiting_threads[0] = thr;
+ num_waiting_threads = 1;
+ }
+ bool done_check_val(kmp_uint64 old_loc) {
+ return byteref(&old_loc, offset) == checker;
+ }
+ bool done_check() { return done_check_val(*get()); }
+ bool notdone_check() {
+ // Calculate flag_switch
+ if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
+ flag_switch = true;
+ if (byteref(get(), offset) != 1 && !flag_switch)
+ return true;
+ else if (flag_switch) {
+ this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
+ kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
+ (kmp_uint64)KMP_BARRIER_STATE_BUMP);
+ __kmp_wait_64(this_thr, &flag, TRUE
#if USE_ITT_BUILD
- , itt_sync_obj
+ ,
+ itt_sync_obj
#endif
- );
- }
- return false;
- }
- void internal_release() {
- if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
- byteref(get(),offset) = 1;
- }
- else {
- kmp_uint64 mask=0;
- byteref(&mask,offset) = 1;
- (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
- }
+ );
}
- kmp_uint64 set_sleeping() {
- return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
- }
- kmp_uint64 unset_sleeping() {
- return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
- }
- bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
- bool is_sleeping() { return is_sleeping_val(*get()); }
- bool is_any_sleeping() { return is_sleeping_val(*get()); }
- void wait(kmp_info_t *this_thr, int final_spin) {
- __kmp_wait_template<kmp_flag_oncore>(this_thr, this, final_spin
- USE_ITT_BUILD_ARG(itt_sync_obj));
- }
- void release() { __kmp_release_template(this); }
- void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
- void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
- int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
- USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
- return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
- USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
- }
- kmp_uint8 *get_stolen() { return NULL; }
- enum barrier_type get_bt() { return bt; }
- flag_type get_ptr_type() { return flag_oncore; }
+ return false;
+ }
+ void internal_release() {
+ if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
+ byteref(get(), offset) = 1;
+ } else {
+ kmp_uint64 mask = 0;
+ byteref(&mask, offset) = 1;
+ (void)KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
+ }
+ }
+ kmp_uint64 set_sleeping() {
+ return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(),
+ KMP_BARRIER_SLEEP_STATE);
+ }
+ kmp_uint64 unset_sleeping() {
+ return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(),
+ ~KMP_BARRIER_SLEEP_STATE);
+ }
+ bool is_sleeping_val(kmp_uint64 old_loc) {
+ return old_loc & KMP_BARRIER_SLEEP_STATE;
+ }
+ bool is_sleeping() { return is_sleeping_val(*get()); }
+ bool is_any_sleeping() { return is_sleeping_val(*get()); }
+ void wait(kmp_info_t *this_thr, int final_spin) {
+ __kmp_wait_template<kmp_flag_oncore>(
+ this_thr, this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+ }
+ void release() { __kmp_release_template(this); }
+ void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
+ void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
+ int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+ int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+ kmp_int32 is_constrained) {
+ return __kmp_execute_tasks_oncore(
+ this_thr, gtid, this, final_spin,
+ thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+ }
+ kmp_uint8 *get_stolen() { return NULL; }
+ enum barrier_type get_bt() { return bt; }
+ flag_type get_ptr_type() { return flag_oncore; }
};
-// Used to wake up threads, volatile void* flag is usually the th_sleep_loc associated
-// with int gtid.
+// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
+// associated with int gtid.
static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
- if (!flag) return;
+ if (!flag)
+ return;
- switch (((kmp_flag_64 *)flag)->get_type()) {
- case flag32: __kmp_resume_32(gtid, NULL); break;
- case flag64: __kmp_resume_64(gtid, NULL); break;
- case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
- }
+ switch (((kmp_flag_64 *)flag)->get_type()) {
+ case flag32:
+ __kmp_resume_32(gtid, NULL);
+ break;
+ case flag64:
+ __kmp_resume_64(gtid, NULL);
+ break;
+ case flag_oncore:
+ __kmp_resume_oncore(gtid, NULL);
+ break;
+ }
}
/*!
Modified: openmp/trunk/runtime/src/kmp_wrapper_getpid.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wrapper_getpid.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wrapper_getpid.h (original)
+++ openmp/trunk/runtime/src/kmp_wrapper_getpid.h Fri May 12 13:01:32 2017
@@ -18,50 +18,52 @@
#if KMP_OS_UNIX
- // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers.
- #include <sys/types.h>
- #include <unistd.h>
- #include <sys/syscall.h>
- #if KMP_OS_DARWIN
- //OS X
- #define __kmp_gettid() syscall(SYS_thread_selfid)
- #elif defined(SYS_gettid)
- // Hopefully other Unix systems define SYS_gettid syscall for getting os thread id
- #define __kmp_gettid() syscall(SYS_gettid)
- #else
- #warning No gettid found, use getpid instead
- #define __kmp_gettid() getpid()
- #endif
+// On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard
+// headers.
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#if KMP_OS_DARWIN
+// OS X
+#define __kmp_gettid() syscall(SYS_thread_selfid)
+#elif defined(SYS_gettid)
+// Hopefully other Unix systems define SYS_gettid syscall for getting os thread
+// id
+#define __kmp_gettid() syscall(SYS_gettid)
+#else
+#warning No gettid found, use getpid instead
+#define __kmp_gettid() getpid()
+#endif
#elif KMP_OS_WINDOWS
- // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h".
- #include <process.h>
- // Let us simulate Unix.
- typedef int pid_t;
- #define getpid _getpid
- #define __kmp_gettid() GetCurrentThreadId()
+// On Windows* OS _getpid() returns int (not pid_t) and is declared in
+// "process.h".
+#include <process.h>
+// Let us simulate Unix.
+typedef int pid_t;
+#define getpid _getpid
+#define __kmp_gettid() GetCurrentThreadId()
#else
- #error Unknown or unsupported OS.
+#error Unknown or unsupported OS.
#endif
-/*
- TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good.
- But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t
- be longer that int?). It seems all pid prints should be rewritten as
-
- printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid );
+/* TODO: All the libomp source code uses pid_t type for storing the result of
+ getpid(), it is good. But often it printed as "%d", that is not good, because
+ it ignores pid_t definition (may pid_t be longer that int?). It seems all pid
+ prints should be rewritten as:
- or (at least) as
+ printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid );
- printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid );
+ or (at least) as
- (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".)
+ printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid );
-*/
+ (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in
+ "kmp_os.h".) */
#endif // KMP_WRAPPER_GETPID_H
Modified: openmp/trunk/runtime/src/kmp_wrapper_malloc.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wrapper_malloc.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wrapper_malloc.h (original)
+++ openmp/trunk/runtime/src/kmp_wrapper_malloc.h Fri May 12 13:01:32 2017
@@ -17,21 +17,18 @@
#ifndef KMP_WRAPPER_MALLOC_H
#define KMP_WRAPPER_MALLOC_H
-/*
- This header serves for 3 purposes:
-
- 1. Declaring standard memory allocation rourines in OS-independent way.
- 2. Passing source location info through memory allocation wrappers.
- 3. Enabling native memory debugging capabilities.
-
-
- 1. Declaring standard memory allocation rourines in OS-independent way.
- -----------------------------------------------------------------------
-
- On Linux* OS, alloca() function is declared in <alloca.h> header, while on Windows* OS there is no
- <alloca.h> header, function _alloca() (note underscore!) is declared in <malloc.h>. This header
- eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on
- following routines:
+/* This header serves for 3 purposes:
+ 1. Declaring standard memory allocation rourines in OS-independent way.
+ 2. Passing source location info through memory allocation wrappers.
+ 3. Enabling native memory debugging capabilities.
+
+ 1. Declaring standard memory allocation rourines in OS-independent way.
+ -----------------------------------------------------------------------
+ On Linux* OS, alloca() function is declared in <alloca.h> header, while on
+ Windows* OS there is no <alloca.h> header, function _alloca() (note
+ underscore!) is declared in <malloc.h>. This header eliminates these
+ differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on
+ following routines:
malloc
calloc
@@ -39,60 +36,56 @@
free
alloca
- in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently
- it is available only on Windows* OS.)
-
-
- 2. Passing source location info through memory allocation wrappers.
- -------------------------------------------------------------------
-
- Some tools may help debugging memory errors, for example, report memory leaks. However, memory
- allocation wrappers may hinder source location.
-
- For example:
-
- void * aligned_malloc( int size ) {
- void * ptr = malloc( size ); // All the memory leaks will be reported at this line.
- // some adjustments...
- return ptr;
- };
-
- ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-(
-
- To overcome the problem, information about original source location should be passed through all
- the memory allocation wrappers, for example:
-
- void * aligned_malloc( int size, char const * file, int line ) {
- void * ptr = _malloc_dbg( size, file, line );
- // some adjustments...
- return ptr;
- };
-
- void * ptr = aligned_malloc( size, __FILE__, __LINE__ );
-
- This is a good idea for debug, but passing additional arguments impacts performance. Disabling
- extra arguments in release version of the software introduces too many conditional compilation,
- which makes code unreadable. This header defines few macros and functions facilitating it:
-
- void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
- void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
- // some adjustments...
- return ptr;
- };
- #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
- // Use macro instead of direct call to function.
-
- void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be reported at this line.
-
-
- 3. Enabling native memory debugging capabilities.
- -------------------------------------------------
-
- Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft
- RTL tracks all memory allocations and can report memory leaks. This header enables this, and
- makes report more useful (see "Passing source location info through memory allocation
- wrappers").
+ in OS-independent way. It also enables memory tracking capabilities in debug
+ build. (Currently it is available only on Windows* OS.)
+ 2. Passing source location info through memory allocation wrappers.
+ -------------------------------------------------------------------
+ Some tools may help debugging memory errors, for example, report memory
+ leaks. However, memory allocation wrappers may hinder source location.
+ For example:
+
+ void * aligned_malloc( int size ) {
+ void * ptr = malloc( size ); // All the memory leaks will be reported at
+ // this line.
+ // some adjustments...
+ return ptr;
+ };
+
+ ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-(
+
+ To overcome the problem, information about original source location should
+ be passed through all the memory allocation wrappers, for example:
+
+ void * aligned_malloc( int size, char const * file, int line ) {
+ void * ptr = _malloc_dbg( size, file, line );
+ // some adjustments...
+ return ptr;
+ };
+ void * ptr = aligned_malloc( size, __FILE__, __LINE__ );
+
+ This is a good idea for debug, but passing additional arguments impacts
+ performance. Disabling extra arguments in release version of the software
+ introduces too many conditional compilation, which makes code unreadable.
+ This header defines few macros and functions facilitating it:
+
+ void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
+ void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
+ // some adjustments...
+ return ptr;
+ };
+ #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
+ // Use macro instead of direct call to function.
+
+ void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be
+ // reported at this line.
+
+ 3. Enabling native memory debugging capabilities.
+ -------------------------------------------------
+ Some platforms may offer memory debugging capabilities. For example, debug
+ version of Microsoft RTL tracks all memory allocations and can report memory
+ leaks. This header enables this, and makes report more useful (see "Passing
+ source location info through memory allocation wrappers").
*/
#include <stdlib.h>
@@ -101,102 +94,101 @@
// Include alloca() declaration.
#if KMP_OS_WINDOWS
- #include <malloc.h> // Windows* OS: _alloca() declared in "malloc.h".
- #define alloca _alloca // Allow to use alloca() with no underscore.
+#include <malloc.h> // Windows* OS: _alloca() declared in "malloc.h".
+#define alloca _alloca // Allow to use alloca() with no underscore.
#elif KMP_OS_FREEBSD || KMP_OS_NETBSD
- // Declared in "stdlib.h".
+// Declared in "stdlib.h".
#elif KMP_OS_UNIX
- #include <alloca.h> // Linux* OS and OS X*: alloc() declared in "alloca".
+#include <alloca.h> // Linux* OS and OS X*: alloc() declared in "alloca".
#else
- #error Unknown or unsupported OS.
+#error Unknown or unsupported OS.
#endif
-/*
- KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration.
- KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying
- levels.
- KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at
- top-level.
-
- Typical usage:
-
- void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
- // Note: Comma is missed before KMP_SRC_LOC_DECL.
- KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) );
- ...
- }
- #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
- // Use macro instead of direct call to function -- macro passes info about current
- // source location to the func.
+/* KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in
+ function declaration.
+ KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass
+ parameters to underlying levels.
+ KMP_SRC_LOC_CURR -- Source location arguments describing current location,
+ to be used at top-level.
+
+ Typical usage:
+ void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
+ // Note: Comma is missed before KMP_SRC_LOC_DECL.
+ KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) );
+ ...
+ }
+ #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
+ // Use macro instead of direct call to function -- macro passes info
+ // about current source location to the func.
*/
#if KMP_DEBUG
- #define KMP_SRC_LOC_DECL , char const * _file_, int _line_
- #define KMP_SRC_LOC_PARM , _file_, _line_
- #define KMP_SRC_LOC_CURR , __FILE__, __LINE__
+#define KMP_SRC_LOC_DECL , char const *_file_, int _line_
+#define KMP_SRC_LOC_PARM , _file_, _line_
+#define KMP_SRC_LOC_CURR , __FILE__, __LINE__
#else
- #define KMP_SRC_LOC_DECL
- #define KMP_SRC_LOC_PARM
- #define KMP_SRC_LOC_CURR
+#define KMP_SRC_LOC_DECL
+#define KMP_SRC_LOC_PARM
+#define KMP_SRC_LOC_CURR
#endif // KMP_DEBUG
-/*
- malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra
- arguments (source location info) in debug mode. They should be used in place of malloc() and
- free(), this allows enabling native memory debugging capabilities (if any).
-
- Typical usage:
-
- ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
- // Inside memory allocation wrapper, or
- ptr = malloc_src_loc( size KMP_SRC_LOC_CURR );
- // Outside of memory allocation wrapper.
-
-
+/* malloc_src_loc() and free_src_loc() are pseudo-functions (really macros)
+ with accepts extra arguments (source location info) in debug mode. They
+ should be used in place of malloc() and free(), this allows enabling native
+ memory debugging capabilities (if any).
+
+ Typical usage:
+ ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
+ // Inside memory allocation wrapper, or
+ ptr = malloc_src_loc( size KMP_SRC_LOC_CURR );
+ // Outside of memory allocation wrapper.
*/
-#define malloc_src_loc( args ) _malloc_src_loc( args )
-#define free_src_loc( args ) _free_src_loc( args )
- /*
- Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3
- parameters, but calls to malloc_src_loc() are always the same:
-
- ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR
-
- Compiler issues warning/error "too few arguments in macro invocation". Declaring two
- macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem.
- */
+#define malloc_src_loc(args) _malloc_src_loc(args)
+#define free_src_loc(args) _free_src_loc(args)
+/* Depending on build mode (debug or release), malloc_src_loc is declared with
+ 1 or 3 parameters, but calls to malloc_src_loc() are always the same:
+
+ ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR
+
+ Compiler issues warning/error "too few arguments in macro invocation".
+ Declaring two macros, malloc_src_loc() and _malloc_src_loc(), overcomes the
+ problem. */
#if KMP_DEBUG
- #if KMP_OS_WINDOWS && _DEBUG
- // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined.
+#if KMP_OS_WINDOWS && _DEBUG
+// KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined.
- // Windows* OS has native memory debugging capabilities. Enable them.
+// Windows* OS has native memory debugging capabilities. Enable them.
- #include <crtdbg.h>
+#include <crtdbg.h>
- #define KMP_MEM_BLOCK _CLIENT_BLOCK
- #define malloc( size ) _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
- #define calloc( num, size ) _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
- #define realloc( ptr, size ) _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
- #define free( ptr ) _free_dbg( (ptr), KMP_MEM_BLOCK )
+#define KMP_MEM_BLOCK _CLIENT_BLOCK
+#define malloc(size) _malloc_dbg((size), KMP_MEM_BLOCK, __FILE__, __LINE__)
+#define calloc(num, size) \
+ _calloc_dbg((num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__)
+#define realloc(ptr, size) \
+ _realloc_dbg((ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__)
+#define free(ptr) _free_dbg((ptr), KMP_MEM_BLOCK)
+
+#define _malloc_src_loc(size, file, line) \
+ _malloc_dbg((size), KMP_MEM_BLOCK, (file), (line))
+#define _free_src_loc(ptr, file, line) _free_dbg((ptr), KMP_MEM_BLOCK)
- #define _malloc_src_loc( size, file, line ) _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) )
- #define _free_src_loc( ptr, file, line ) _free_dbg( (ptr), KMP_MEM_BLOCK )
-
- #else
+#else
- // Linux* OS, OS X*, or non-debug Windows* OS.
+// Linux* OS, OS X*, or non-debug Windows* OS.
- #define _malloc_src_loc( size, file, line ) malloc( (size) )
- #define _free_src_loc( ptr, file, line ) free( (ptr) )
+#define _malloc_src_loc(size, file, line) malloc((size))
+#define _free_src_loc(ptr, file, line) free((ptr))
- #endif
+#endif
#else
- // In release build malloc_src_loc() and free_src_loc() do not have extra parameters.
- #define _malloc_src_loc( size ) malloc( (size) )
- #define _free_src_loc( ptr ) free( (ptr) )
+// In release build malloc_src_loc() and free_src_loc() do not have extra
+// parameters.
+#define _malloc_src_loc(size) malloc((size))
+#define _free_src_loc(ptr) free((ptr))
#endif // KMP_DEBUG
Modified: openmp/trunk/runtime/src/ompt-event-specific.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-event-specific.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-event-specific.h (original)
+++ openmp/trunk/runtime/src/ompt-event-specific.h Fri May 12 13:01:32 2017
@@ -1,5 +1,5 @@
-#ifndef __OMPT_EVENT_SPECIFIC_H__
-#define __OMPT_EVENT_SPECIFIC_H__
+#ifndef __OMPT_EVENT_SPECIFIC_H__
+#define __OMPT_EVENT_SPECIFIC_H__
/******************************************************************************
* File: ompt-event-specific.h
@@ -10,10 +10,9 @@
* and the level of their implementation by a runtime system.
*****************************************************************************/
-#define _ompt_tokenpaste_helper(x,y) x ## y
-#define _ompt_tokenpaste(x,y) _ompt_tokenpaste_helper(x,y)
-#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented)
-
+#define _ompt_tokenpaste_helper(x, y) x##y
+#define _ompt_tokenpaste(x, y) _ompt_tokenpaste_helper(x, y)
+#define ompt_event_implementation_status(e) _ompt_tokenpaste(e, _implemented)
/*----------------------------------------------------------------------------
| Specify whether an event may occur or not, and whether event callbacks
@@ -23,130 +22,132 @@
| the OMPT TR. They are exposed to tools through ompt_set_callback.
+--------------------------------------------------------------------------*/
-#define ompt_event_NEVER ompt_set_result_event_never_occurs
-#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback
-#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some
-#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always
+#define ompt_event_NEVER ompt_set_result_event_never_occurs
+#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback
+#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some
+#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always
#if OMPT_TRACE
-#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS
+#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS
#else
-#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED
+#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED
#endif
#if OMPT_BLAME
-#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS
+#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS
#else
-#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED
+#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED
#endif
/*----------------------------------------------------------------------------
| Mandatory Events
+--------------------------------------------------------------------------*/
-#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS
-
-#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_control_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS
-#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_control_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS
/*----------------------------------------------------------------------------
| Optional Events (blame shifting)
+--------------------------------------------------------------------------*/
-#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME
-
-#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
-
-#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_nest_lock_last_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
+
+#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_nest_lock_last_implemented \
+ ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME
/*----------------------------------------------------------------------------
| Optional Events (synchronous events)
+--------------------------------------------------------------------------*/
-#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_release_nest_lock_prev_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_release_nest_lock_prev_implemented \
+ ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_nest_lock_next_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED
-#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_nest_lock_first_implemented \
+ ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_nest_lock_next_implemented \
+ ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED
#if OMP_40_ENABLED
-# define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE
-# define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE
#else
-# define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED
-# define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED
#endif /* OMP_40_ENABLED */
#endif
Modified: openmp/trunk/runtime/src/ompt-general.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-general.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-general.cpp (original)
+++ openmp/trunk/runtime/src/ompt-general.cpp Fri May 12 13:01:32 2017
@@ -9,16 +9,12 @@
#include <stdlib.h>
#include <string.h>
-
-
/*****************************************************************************
* ompt include files
****************************************************************************/
#include "ompt-specific.cpp"
-
-
/*****************************************************************************
* macros
****************************************************************************/
@@ -34,32 +30,25 @@
#define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
#endif
-
/*****************************************************************************
* types
****************************************************************************/
typedef struct {
- const char *state_name;
- ompt_state_t state_id;
+ const char *state_name;
+ ompt_state_t state_id;
} ompt_state_info_t;
-
enum tool_setting_e {
- omp_tool_error,
- omp_tool_unset,
- omp_tool_disabled,
- omp_tool_enabled
+ omp_tool_error,
+ omp_tool_unset,
+ omp_tool_disabled,
+ omp_tool_enabled
};
-
-typedef void (*ompt_initialize_t) (
- ompt_function_lookup_t ompt_fn_lookup,
- const char *version,
- unsigned int ompt_version
-);
-
-
+typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup,
+ const char *version,
+ unsigned int ompt_version);
/*****************************************************************************
* global variables
@@ -68,16 +57,14 @@ typedef void (*ompt_initialize_t) (
int ompt_enabled = 0;
ompt_state_info_t ompt_state_info[] = {
-#define ompt_state_macro(state, code) { # state, state },
+#define ompt_state_macro(state, code) {#state, state},
FOREACH_OMPT_STATE(ompt_state_macro)
#undef ompt_state_macro
};
ompt_callbacks_t ompt_callbacks;
-static ompt_initialize_t ompt_initialize_fn = NULL;
-
-
+static ompt_initialize_t ompt_initialize_fn = NULL;
/*****************************************************************************
* forward declarations
@@ -87,7 +74,6 @@ static ompt_interface_fn_t ompt_fn_looku
OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void);
-
/*****************************************************************************
* initialization and finalization (private operations)
****************************************************************************/
@@ -102,13 +88,11 @@ OMPT_API_ROUTINE ompt_thread_id_t ompt_g
* NULL is returned and OMPT won't be enabled */
#if OMPT_HAVE_WEAK_ATTRIBUTE
_OMP_EXTERN
-__attribute__ (( weak ))
-ompt_initialize_t ompt_tool()
-{
+__attribute__((weak)) ompt_initialize_t ompt_tool() {
#if OMPT_DEBUG
- printf("ompt_tool() is called from the RTL\n");
+ printf("ompt_tool() is called from the RTL\n");
#endif
- return NULL;
+ return NULL;
}
#elif OMPT_HAVE_PSAPI
@@ -120,161 +104,154 @@ ompt_initialize_t ompt_tool()
// The number of loaded modules to start enumeration with EnumProcessModules()
#define NUM_MODULES 128
-static
-ompt_initialize_t ompt_tool_windows()
-{
- int i;
- DWORD needed, new_size;
- HMODULE *modules;
- HANDLE process = GetCurrentProcess();
- modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) );
- ompt_initialize_t (*ompt_tool_p)() = NULL;
+static ompt_initialize_t ompt_tool_windows() {
+ int i;
+ DWORD needed, new_size;
+ HMODULE *modules;
+ HANDLE process = GetCurrentProcess();
+ modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE));
+ ompt_initialize_t (*ompt_tool_p)() = NULL;
#if OMPT_DEBUG
- printf("ompt_tool_windows(): looking for ompt_tool\n");
+ printf("ompt_tool_windows(): looking for ompt_tool\n");
#endif
- if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE),
- &needed)) {
- // Regardless of the error reason use the stub initialization function
- free(modules);
- return NULL;
- }
- // Check if NUM_MODULES is enough to list all modules
- new_size = needed / sizeof(HMODULE);
- if (new_size > NUM_MODULES) {
+ if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE),
+ &needed)) {
+ // Regardless of the error reason use the stub initialization function
+ free(modules);
+ return NULL;
+ }
+ // Check if NUM_MODULES is enough to list all modules
+ new_size = needed / sizeof(HMODULE);
+ if (new_size > NUM_MODULES) {
#if OMPT_DEBUG
printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed);
#endif
- modules = (HMODULE*)realloc( modules, needed );
- // If resizing failed use the stub function.
- if (!EnumProcessModules(process, modules, needed, &needed)) {
- free(modules);
- return NULL;
- }
- }
- for (i = 0; i < new_size; ++i) {
- (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
- if (ompt_tool_p) {
+ modules = (HMODULE *)realloc(modules, needed);
+ // If resizing failed use the stub function.
+ if (!EnumProcessModules(process, modules, needed, &needed)) {
+ free(modules);
+ return NULL;
+ }
+ }
+ for (i = 0; i < new_size; ++i) {
+ (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
+ if (ompt_tool_p) {
#if OMPT_DEBUG
- TCHAR modName[MAX_PATH];
- if (GetModuleFileName(modules[i], modName, MAX_PATH))
- printf("ompt_tool_windows(): ompt_tool found in module %s\n",
- modName);
+ TCHAR modName[MAX_PATH];
+ if (GetModuleFileName(modules[i], modName, MAX_PATH))
+ printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName);
#endif
- free(modules);
- return ompt_tool_p();
- }
+ free(modules);
+ return ompt_tool_p();
+ }
#if OMPT_DEBUG
- else {
- TCHAR modName[MAX_PATH];
- if (GetModuleFileName(modules[i], modName, MAX_PATH))
- printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
- modName);
- }
-#endif
+ else {
+ TCHAR modName[MAX_PATH];
+ if (GetModuleFileName(modules[i], modName, MAX_PATH))
+ printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
+ modName);
}
- free(modules);
- return NULL;
+#endif
+ }
+ free(modules);
+ return NULL;
}
#else
-# error Either __attribute__((weak)) or psapi.dll are required for OMPT support
+#error Either __attribute__((weak)) or psapi.dll are required for OMPT support
#endif // OMPT_HAVE_WEAK_ATTRIBUTE
-void ompt_pre_init()
-{
- //--------------------------------------------------
- // Execute the pre-initialization logic only once.
- //--------------------------------------------------
- static int ompt_pre_initialized = 0;
-
- if (ompt_pre_initialized) return;
-
- ompt_pre_initialized = 1;
-
- //--------------------------------------------------
- // Use a tool iff a tool is enabled and available.
- //--------------------------------------------------
- const char *ompt_env_var = getenv("OMP_TOOL");
- tool_setting_e tool_setting = omp_tool_error;
-
- if (!ompt_env_var || !strcmp(ompt_env_var, ""))
- tool_setting = omp_tool_unset;
- else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
- tool_setting = omp_tool_disabled;
- else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
- tool_setting = omp_tool_enabled;
+void ompt_pre_init() {
+ //--------------------------------------------------
+ // Execute the pre-initialization logic only once.
+ //--------------------------------------------------
+ static int ompt_pre_initialized = 0;
+
+ if (ompt_pre_initialized)
+ return;
+
+ ompt_pre_initialized = 1;
+
+ //--------------------------------------------------
+ // Use a tool iff a tool is enabled and available.
+ //--------------------------------------------------
+ const char *ompt_env_var = getenv("OMP_TOOL");
+ tool_setting_e tool_setting = omp_tool_error;
+
+ if (!ompt_env_var || !strcmp(ompt_env_var, ""))
+ tool_setting = omp_tool_unset;
+ else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
+ tool_setting = omp_tool_disabled;
+ else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
+ tool_setting = omp_tool_enabled;
#if OMPT_DEBUG
- printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
+ printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
#endif
- switch(tool_setting) {
- case omp_tool_disabled:
- break;
-
- case omp_tool_unset:
- case omp_tool_enabled:
- ompt_initialize_fn = ompt_tool();
- if (ompt_initialize_fn) {
- ompt_enabled = 1;
- }
- break;
-
- case omp_tool_error:
- fprintf(stderr,
- "Warning: OMP_TOOL has invalid value \"%s\".\n"
- " legal values are (NULL,\"\",\"disabled\","
- "\"enabled\").\n", ompt_env_var);
- break;
- }
+ switch (tool_setting) {
+ case omp_tool_disabled:
+ break;
+
+ case omp_tool_unset:
+ case omp_tool_enabled:
+ ompt_initialize_fn = ompt_tool();
+ if (ompt_initialize_fn) {
+ ompt_enabled = 1;
+ }
+ break;
+
+ case omp_tool_error:
+ fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n"
+ " legal values are (NULL,\"\",\"disabled\","
+ "\"enabled\").\n",
+ ompt_env_var);
+ break;
+ }
#if OMPT_DEBUG
- printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
+ printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
#endif
}
+void ompt_post_init() {
+ //--------------------------------------------------
+ // Execute the post-initialization logic only once.
+ //--------------------------------------------------
+ static int ompt_post_initialized = 0;
-void ompt_post_init()
-{
- //--------------------------------------------------
- // Execute the post-initialization logic only once.
- //--------------------------------------------------
- static int ompt_post_initialized = 0;
-
- if (ompt_post_initialized) return;
-
- ompt_post_initialized = 1;
-
- //--------------------------------------------------
- // Initialize the tool if so indicated.
- //--------------------------------------------------
- if (ompt_enabled) {
- ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),
- OMPT_VERSION);
-
- ompt_thread_t *root_thread = ompt_get_thread();
-
- ompt_set_thread_state(root_thread, ompt_state_overhead);
-
- if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
- ompt_callbacks.ompt_callback(ompt_event_thread_begin)
- (ompt_thread_initial, ompt_get_thread_id());
- }
+ if (ompt_post_initialized)
+ return;
- ompt_set_thread_state(root_thread, ompt_state_work_serial);
- }
-}
+ ompt_post_initialized = 1;
+
+ //--------------------------------------------------
+ // Initialize the tool if so indicated.
+ //--------------------------------------------------
+ if (ompt_enabled) {
+ ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),
+ OMPT_VERSION);
+ ompt_thread_t *root_thread = ompt_get_thread();
-void ompt_fini()
-{
- if (ompt_enabled) {
- if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
- ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
- }
+ ompt_set_thread_state(root_thread, ompt_state_overhead);
+
+ if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
+ ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
+ ompt_thread_initial, ompt_get_thread_id());
}
- ompt_enabled = 0;
+ ompt_set_thread_state(root_thread, ompt_state_work_serial);
+ }
}
+void ompt_fini() {
+ if (ompt_enabled) {
+ if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
+ ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
+ }
+ }
+
+ ompt_enabled = 0;
+}
/*****************************************************************************
* interface operations
@@ -285,148 +262,122 @@ void ompt_fini()
****************************************************************************/
OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
- const char **next_state_name)
-{
- const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
- int i = 0;
-
- for (i = 0; i < len - 1; i++) {
- if (ompt_state_info[i].state_id == current_state) {
- *next_state = ompt_state_info[i+1].state_id;
- *next_state_name = ompt_state_info[i+1].state_name;
- return 1;
- }
+ const char **next_state_name) {
+ const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
+ int i = 0;
+
+ for (i = 0; i < len - 1; i++) {
+ if (ompt_state_info[i].state_id == current_state) {
+ *next_state = ompt_state_info[i + 1].state_id;
+ *next_state_name = ompt_state_info[i + 1].state_name;
+ return 1;
}
+ }
- return 0;
+ return 0;
}
-
-
/*****************************************************************************
* callbacks
****************************************************************************/
-OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb)
-{
- switch (evid) {
+OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) {
+ switch (evid) {
#define ompt_event_macro(event_name, callback_type, event_id) \
- case event_name: \
- if (ompt_event_implementation_status(event_name)) { \
- ompt_callbacks.ompt_callback(event_name) = (callback_type) cb; \
- } \
- return ompt_event_implementation_status(event_name);
+ case event_name: \
+ if (ompt_event_implementation_status(event_name)) { \
+ ompt_callbacks.ompt_callback(event_name) = (callback_type)cb; \
+ } \
+ return ompt_event_implementation_status(event_name);
FOREACH_OMPT_EVENT(ompt_event_macro)
#undef ompt_event_macro
- default: return ompt_set_result_registration_error;
- }
+ default:
+ return ompt_set_result_registration_error;
+ }
}
-
-OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb)
-{
- switch (evid) {
+OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
+ switch (evid) {
#define ompt_event_macro(event_name, callback_type, event_id) \
- case event_name: \
- if (ompt_event_implementation_status(event_name)) { \
- ompt_callback_t mycb = \
- (ompt_callback_t) ompt_callbacks.ompt_callback(event_name); \
- if (mycb) { \
- *cb = mycb; \
- return ompt_get_callback_success; \
- } \
- } \
- return ompt_get_callback_failure;
+ case event_name: \
+ if (ompt_event_implementation_status(event_name)) { \
+ ompt_callback_t mycb = \
+ (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \
+ if (mycb) { \
+ *cb = mycb; \
+ return ompt_get_callback_success; \
+ } \
+ } \
+ return ompt_get_callback_failure;
FOREACH_OMPT_EVENT(ompt_event_macro)
#undef ompt_event_macro
- default: return ompt_get_callback_failure;
- }
+ default:
+ return ompt_get_callback_failure;
+ }
}
-
/*****************************************************************************
* parallel regions
****************************************************************************/
-OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level)
-{
- return __ompt_get_parallel_id_internal(ancestor_level);
+OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) {
+ return __ompt_get_parallel_id_internal(ancestor_level);
}
-
-OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level)
-{
- return __ompt_get_parallel_team_size_internal(ancestor_level);
+OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) {
+ return __ompt_get_parallel_team_size_internal(ancestor_level);
}
-
-OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level)
-{
- return __ompt_get_parallel_function_internal(ancestor_level);
+OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) {
+ return __ompt_get_parallel_function_internal(ancestor_level);
}
+OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) {
+ ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
-OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id)
-{
- ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
-
- if (thread_state == ompt_state_undefined) {
- thread_state = ompt_state_work_serial;
- }
+ if (thread_state == ompt_state_undefined) {
+ thread_state = ompt_state_work_serial;
+ }
- return thread_state;
+ return thread_state;
}
-
-
/*****************************************************************************
* threads
****************************************************************************/
-
-OMPT_API_ROUTINE void *ompt_get_idle_frame()
-{
- return __ompt_get_idle_frame_internal();
+OMPT_API_ROUTINE void *ompt_get_idle_frame() {
+ return __ompt_get_idle_frame_internal();
}
-
-
/*****************************************************************************
* tasks
****************************************************************************/
-
-OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void)
-{
- return __ompt_get_thread_id_internal();
+OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) {
+ return __ompt_get_thread_id_internal();
}
-OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth)
-{
- return __ompt_get_task_id_internal(depth);
+OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) {
+ return __ompt_get_task_id_internal(depth);
}
-
-OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth)
-{
- return __ompt_get_task_frame_internal(depth);
+OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) {
+ return __ompt_get_task_frame_internal(depth);
}
-
-OMPT_API_ROUTINE void *ompt_get_task_function(int depth)
-{
- return __ompt_get_task_function_internal(depth);
+OMPT_API_ROUTINE void *ompt_get_task_function(int depth) {
+ return __ompt_get_task_function_internal(depth);
}
-
/*****************************************************************************
* placeholders
****************************************************************************/
@@ -440,96 +391,76 @@ OMPT_API_ROUTINE void *ompt_get_task_fun
extern "C" {
#endif
-
-OMPT_API_PLACEHOLDER void ompt_idle(void)
-{
- // This function is a placeholder used to represent the calling context of
- // idle OpenMP worker threads. It is not meant to be invoked.
- assert(0);
+OMPT_API_PLACEHOLDER void ompt_idle(void) {
+ // This function is a placeholder used to represent the calling context of
+ // idle OpenMP worker threads. It is not meant to be invoked.
+ assert(0);
}
-
-OMPT_API_PLACEHOLDER void ompt_overhead(void)
-{
- // This function is a placeholder used to represent the OpenMP context of
- // threads working in the OpenMP runtime. It is not meant to be invoked.
- assert(0);
+OMPT_API_PLACEHOLDER void ompt_overhead(void) {
+ // This function is a placeholder used to represent the OpenMP context of
+ // threads working in the OpenMP runtime. It is not meant to be invoked.
+ assert(0);
}
-
-OMPT_API_PLACEHOLDER void ompt_barrier_wait(void)
-{
- // This function is a placeholder used to represent the OpenMP context of
- // threads waiting for a barrier in the OpenMP runtime. It is not meant
- // to be invoked.
- assert(0);
+OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) {
+ // This function is a placeholder used to represent the OpenMP context of
+ // threads waiting for a barrier in the OpenMP runtime. It is not meant
+ // to be invoked.
+ assert(0);
}
-
-OMPT_API_PLACEHOLDER void ompt_task_wait(void)
-{
- // This function is a placeholder used to represent the OpenMP context of
- // threads waiting for a task in the OpenMP runtime. It is not meant
- // to be invoked.
- assert(0);
+OMPT_API_PLACEHOLDER void ompt_task_wait(void) {
+ // This function is a placeholder used to represent the OpenMP context of
+ // threads waiting for a task in the OpenMP runtime. It is not meant
+ // to be invoked.
+ assert(0);
}
-
-OMPT_API_PLACEHOLDER void ompt_mutex_wait(void)
-{
- // This function is a placeholder used to represent the OpenMP context of
- // threads waiting for a mutex in the OpenMP runtime. It is not meant
- // to be invoked.
- assert(0);
+OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) {
+ // This function is a placeholder used to represent the OpenMP context of
+ // threads waiting for a mutex in the OpenMP runtime. It is not meant
+ // to be invoked.
+ assert(0);
}
#ifdef __cplusplus
};
#endif
-
/*****************************************************************************
* compatability
****************************************************************************/
-OMPT_API_ROUTINE int ompt_get_ompt_version()
-{
- return OMPT_VERSION;
-}
-
-
+OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; }
/*****************************************************************************
* application-facing API
****************************************************************************/
-
/*----------------------------------------------------------------------------
| control
---------------------------------------------------------------------------*/
-_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier)
-{
- if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
- ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
- }
+_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) {
+ if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
+ ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
+ }
}
-
-
/*****************************************************************************
* API inquiry for tool
****************************************************************************/
-static ompt_interface_fn_t ompt_fn_lookup(const char *s)
-{
+static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
-#define ompt_interface_fn(fn) \
- if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn;
+#define ompt_interface_fn(fn) \
+ if (strcmp(s, #fn) == 0) \
+ return (ompt_interface_fn_t)fn;
- FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
+ FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
- FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn)
+ FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn)
- return (ompt_interface_fn_t) 0;
+ return (ompt_interface_fn_t)0;
}
Modified: openmp/trunk/runtime/src/ompt-internal.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-internal.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-internal.h (original)
+++ openmp/trunk/runtime/src/ompt-internal.h Fri May 12 13:01:32 2017
@@ -1,79 +1,71 @@
#ifndef __OMPT_INTERNAL_H__
#define __OMPT_INTERNAL_H__
-#include "ompt.h"
#include "ompt-event-specific.h"
+#include "ompt.h"
#define OMPT_VERSION 1
#define _OMP_EXTERN extern "C"
-#define OMPT_INVOKER(x) \
+#define OMPT_INVOKER(x) \
((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime)
-
-#define ompt_callback(e) e ## _callback
-
+#define ompt_callback(e) e##_callback
typedef struct ompt_callbacks_s {
-#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event);
+#define ompt_event_macro(event, callback, eventid) \
+ callback ompt_callback(event);
- FOREACH_OMPT_EVENT(ompt_event_macro)
+ FOREACH_OMPT_EVENT(ompt_event_macro)
#undef ompt_event_macro
} ompt_callbacks_t;
-
-
typedef struct {
- ompt_frame_t frame;
- void* function;
- ompt_task_id_t task_id;
+ ompt_frame_t frame;
+ void *function;
+ ompt_task_id_t task_id;
#if OMP_40_ENABLED
- int ndeps;
- ompt_task_dependence_t *deps;
+ int ndeps;
+ ompt_task_dependence_t *deps;
#endif /* OMP_40_ENABLED */
} ompt_task_info_t;
-
typedef struct {
- ompt_parallel_id_t parallel_id;
- void *microtask;
+ ompt_parallel_id_t parallel_id;
+ void *microtask;
} ompt_team_info_t;
-
typedef struct ompt_lw_taskteam_s {
- ompt_team_info_t ompt_team_info;
- ompt_task_info_t ompt_task_info;
- struct ompt_lw_taskteam_s *parent;
+ ompt_team_info_t ompt_team_info;
+ ompt_task_info_t ompt_task_info;
+ struct ompt_lw_taskteam_s *parent;
} ompt_lw_taskteam_t;
-
typedef struct ompt_parallel_info_s {
- ompt_task_id_t parent_task_id; /* id of parent task */
- ompt_parallel_id_t parallel_id; /* id of parallel region */
- ompt_frame_t *parent_task_frame; /* frame data of parent task */
- void *parallel_function; /* pointer to outlined function */
+ ompt_task_id_t parent_task_id; /* id of parent task */
+ ompt_parallel_id_t parallel_id; /* id of parallel region */
+ ompt_frame_t *parent_task_frame; /* frame data of parent task */
+ void *parallel_function; /* pointer to outlined function */
} ompt_parallel_info_t;
-
typedef struct {
- ompt_state_t state;
- ompt_wait_id_t wait_id;
- void *idle_frame;
+ ompt_state_t state;
+ ompt_wait_id_t wait_id;
+ void *idle_frame;
} ompt_thread_info_t;
-
extern ompt_callbacks_t ompt_callbacks;
#if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
#if USE_FAST_MEMORY
-# define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate
-# define KMP_OMPT_DEPS_FREE __kmp_fast_free
-# else
-# define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc
-# define KMP_OMPT_DEPS_FREE __kmp_thread_free
-# endif
+#define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate
+#define KMP_OMPT_DEPS_FREE __kmp_fast_free
+#else
+#define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc
+#define KMP_OMPT_DEPS_FREE __kmp_thread_free
+#endif
#endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
#ifdef __cplusplus
Modified: openmp/trunk/runtime/src/ompt-specific.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-specific.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-specific.cpp (original)
+++ openmp/trunk/runtime/src/ompt-specific.cpp Fri May 12 13:01:32 2017
@@ -10,7 +10,7 @@
// macros
//******************************************************************************
-#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0)
+#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0)
#define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info;
@@ -26,10 +26,10 @@
// when using fetch_and_add to generate the IDs, there isn't any reason to waste
// bits for thread id.
#if 0
-#define NEXT_ID(id_ptr,tid) \
+#define NEXT_ID(id_ptr, tid) \
((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid))
#else
-#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
+#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
#endif
//******************************************************************************
@@ -43,89 +43,87 @@
// kept consistent
//----------------------------------------------------------
-ompt_team_info_t *
-__ompt_get_teaminfo(int depth, int *size)
-{
- kmp_info_t *thr = ompt_get_thread();
-
- if (thr) {
- kmp_team *team = thr->th.th_team;
- if (team == NULL) return NULL;
-
- ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
-
- while(depth > 0) {
- // next lightweight team (if any)
- if (lwt) lwt = lwt->parent;
-
- // next heavyweight team (if any) after
- // lightweight teams are exhausted
- if (!lwt && team) {
- team=team->t.t_parent;
- if (team) {
- lwt = LWT_FROM_TEAM(team);
- }
- }
+ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
+ kmp_info_t *thr = ompt_get_thread();
- depth--;
+ if (thr) {
+ kmp_team *team = thr->th.th_team;
+ if (team == NULL)
+ return NULL;
+
+ ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
+
+ while (depth > 0) {
+ // next lightweight team (if any)
+ if (lwt)
+ lwt = lwt->parent;
+
+ // next heavyweight team (if any) after
+ // lightweight teams are exhausted
+ if (!lwt && team) {
+ team = team->t.t_parent;
+ if (team) {
+ lwt = LWT_FROM_TEAM(team);
}
+ }
- if (lwt) {
- // lightweight teams have one task
- if (size) *size = 1;
-
- // return team info for lightweight team
- return &lwt->ompt_team_info;
- } else if (team) {
- // extract size from heavyweight team
- if (size) *size = team->t.t_nproc;
+ depth--;
+ }
- // return team info for heavyweight team
- return &team->t.ompt_team_info;
- }
+ if (lwt) {
+ // lightweight teams have one task
+ if (size)
+ *size = 1;
+
+ // return team info for lightweight team
+ return &lwt->ompt_team_info;
+ } else if (team) {
+ // extract size from heavyweight team
+ if (size)
+ *size = team->t.t_nproc;
+
+ // return team info for heavyweight team
+ return &team->t.ompt_team_info;
}
+ }
- return NULL;
+ return NULL;
}
-
-ompt_task_info_t *
-__ompt_get_taskinfo(int depth)
-{
- ompt_task_info_t *info = NULL;
- kmp_info_t *thr = ompt_get_thread();
-
- if (thr) {
- kmp_taskdata_t *taskdata = thr->th.th_current_task;
- ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
-
- while (depth > 0) {
- // next lightweight team (if any)
- if (lwt) lwt = lwt->parent;
-
- // next heavyweight team (if any) after
- // lightweight teams are exhausted
- if (!lwt && taskdata) {
- taskdata = taskdata->td_parent;
- if (taskdata) {
- lwt = LWT_FROM_TEAM(taskdata->td_team);
- }
- }
- depth--;
+ompt_task_info_t *__ompt_get_taskinfo(int depth) {
+ ompt_task_info_t *info = NULL;
+ kmp_info_t *thr = ompt_get_thread();
+
+ if (thr) {
+ kmp_taskdata_t *taskdata = thr->th.th_current_task;
+ ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
+
+ while (depth > 0) {
+ // next lightweight team (if any)
+ if (lwt)
+ lwt = lwt->parent;
+
+ // next heavyweight team (if any) after
+ // lightweight teams are exhausted
+ if (!lwt && taskdata) {
+ taskdata = taskdata->td_parent;
+ if (taskdata) {
+ lwt = LWT_FROM_TEAM(taskdata->td_team);
}
+ }
+ depth--;
+ }
- if (lwt) {
- info = &lwt->ompt_task_info;
- } else if (taskdata) {
- info = &taskdata->ompt_task_info;
- }
+ if (lwt) {
+ info = &lwt->ompt_task_info;
+ } else if (taskdata) {
+ info = &taskdata->ompt_task_info;
}
+ }
- return info;
+ return info;
}
-
-
//******************************************************************************
// interface operations
//******************************************************************************
@@ -134,204 +132,151 @@ __ompt_get_taskinfo(int depth)
// thread support
//----------------------------------------------------------
-ompt_parallel_id_t
-__ompt_thread_id_new()
-{
- static uint64_t ompt_thread_id = 1;
- return NEXT_ID(&ompt_thread_id, 0);
+ompt_parallel_id_t __ompt_thread_id_new() {
+ static uint64_t ompt_thread_id = 1;
+ return NEXT_ID(&ompt_thread_id, 0);
}
-void
-__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid)
-{
- ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
- thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
+void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) {
+ ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
+ thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
}
-
-void
-__ompt_thread_end(ompt_thread_type_t thread_type, int gtid)
-{
- ompt_callbacks.ompt_callback(ompt_event_thread_end)(
- thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
+void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) {
+ ompt_callbacks.ompt_callback(ompt_event_thread_end)(
+ thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
}
+ompt_thread_id_t __ompt_get_thread_id_internal() {
+ // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid
+ // since the return value might be negative, we need to test that before
+ // assigning it to an ompt_thread_id_t, which is unsigned.
+ int id = __kmp_get_gtid();
+ assert(id >= 0);
-ompt_thread_id_t
-__ompt_get_thread_id_internal()
-{
- // FIXME
- // until we have a better way of assigning ids, use __kmp_get_gtid
- // since the return value might be negative, we need to test that before
- // assigning it to an ompt_thread_id_t, which is unsigned.
- int id = __kmp_get_gtid();
- assert(id >= 0);
-
- return GTID_TO_OMPT_THREAD_ID(id);
+ return GTID_TO_OMPT_THREAD_ID(id);
}
//----------------------------------------------------------
// state support
//----------------------------------------------------------
-void
-__ompt_thread_assign_wait_id(void *variable)
-{
- int gtid = __kmp_gtid_get_specific();
- kmp_info_t *ti = ompt_get_thread_gtid(gtid);
-
- ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable;
-}
-
-ompt_state_t
-__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id)
-{
- kmp_info_t *ti = ompt_get_thread();
-
- if (ti) {
- if (ompt_wait_id)
- *ompt_wait_id = ti->th.ompt_thread_info.wait_id;
- return ti->th.ompt_thread_info.state;
- }
- return ompt_state_undefined;
+void __ompt_thread_assign_wait_id(void *variable) {
+ int gtid = __kmp_gtid_get_specific();
+ kmp_info_t *ti = ompt_get_thread_gtid(gtid);
+
+ ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
+}
+
+ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
+ kmp_info_t *ti = ompt_get_thread();
+
+ if (ti) {
+ if (ompt_wait_id)
+ *ompt_wait_id = ti->th.ompt_thread_info.wait_id;
+ return ti->th.ompt_thread_info.state;
+ }
+ return ompt_state_undefined;
}
//----------------------------------------------------------
// idle frame support
//----------------------------------------------------------
-void *
-__ompt_get_idle_frame_internal(void)
-{
- kmp_info_t *ti = ompt_get_thread();
- return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
+void *__ompt_get_idle_frame_internal(void) {
+ kmp_info_t *ti = ompt_get_thread();
+ return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
}
-
//----------------------------------------------------------
// parallel region support
//----------------------------------------------------------
-ompt_parallel_id_t
-__ompt_parallel_id_new(int gtid)
-{
- static uint64_t ompt_parallel_id = 1;
- return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
-}
-
-
-void *
-__ompt_get_parallel_function_internal(int depth)
-{
- ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
- void *function = info ? info->microtask : NULL;
- return function;
+ompt_parallel_id_t __ompt_parallel_id_new(int gtid) {
+ static uint64_t ompt_parallel_id = 1;
+ return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
+}
+
+void *__ompt_get_parallel_function_internal(int depth) {
+ ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
+ void *function = info ? info->microtask : NULL;
+ return function;
+}
+
+ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) {
+ ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
+ ompt_parallel_id_t id = info ? info->parallel_id : 0;
+ return id;
+}
+
+int __ompt_get_parallel_team_size_internal(int depth) {
+ // initialize the return value with the error value.
+ // if there is a team at the specified depth, the default
+ // value will be overwritten the size of that team.
+ int size = -1;
+ (void)__ompt_get_teaminfo(depth, &size);
+ return size;
}
-
-ompt_parallel_id_t
-__ompt_get_parallel_id_internal(int depth)
-{
- ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
- ompt_parallel_id_t id = info ? info->parallel_id : 0;
- return id;
-}
-
-
-int
-__ompt_get_parallel_team_size_internal(int depth)
-{
- // initialize the return value with the error value.
- // if there is a team at the specified depth, the default
- // value will be overwritten the size of that team.
- int size = -1;
- (void) __ompt_get_teaminfo(depth, &size);
- return size;
-}
-
-
//----------------------------------------------------------
// lightweight task team support
//----------------------------------------------------------
-void
-__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
- int gtid, void *microtask,
- ompt_parallel_id_t ompt_pid)
-{
- lwt->ompt_team_info.parallel_id = ompt_pid;
- lwt->ompt_team_info.microtask = microtask;
- lwt->ompt_task_info.task_id = 0;
- lwt->ompt_task_info.frame.reenter_runtime_frame = NULL;
- lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
- lwt->ompt_task_info.function = NULL;
- lwt->parent = 0;
-}
-
-
-void
-__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr)
-{
- ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
- lwt->parent = my_parent;
- thr->th.th_team->t.ompt_serialized_team_info = lwt;
+void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
+ void *microtask, ompt_parallel_id_t ompt_pid) {
+ lwt->ompt_team_info.parallel_id = ompt_pid;
+ lwt->ompt_team_info.microtask = microtask;
+ lwt->ompt_task_info.task_id = 0;
+ lwt->ompt_task_info.frame.reenter_runtime_frame = NULL;
+ lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
+ lwt->ompt_task_info.function = NULL;
+ lwt->parent = 0;
+}
+
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) {
+ ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
+ lwt->parent = my_parent;
+ thr->th.th_team->t.ompt_serialized_team_info = lwt;
+}
+
+ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) {
+ ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
+ if (lwtask)
+ thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
+ return lwtask;
}
-
-ompt_lw_taskteam_t *
-__ompt_lw_taskteam_unlink(kmp_info_t *thr)
-{
- ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
- if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
- return lwtask;
-}
-
-
//----------------------------------------------------------
// task support
//----------------------------------------------------------
-ompt_task_id_t
-__ompt_task_id_new(int gtid)
-{
- static uint64_t ompt_task_id = 1;
- return NEXT_ID(&ompt_task_id, gtid);
+ompt_task_id_t __ompt_task_id_new(int gtid) {
+ static uint64_t ompt_task_id = 1;
+ return NEXT_ID(&ompt_task_id, gtid);
}
-
-ompt_task_id_t
-__ompt_get_task_id_internal(int depth)
-{
- ompt_task_info_t *info = __ompt_get_taskinfo(depth);
- ompt_task_id_t task_id = info ? info->task_id : 0;
- return task_id;
+ompt_task_id_t __ompt_get_task_id_internal(int depth) {
+ ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+ ompt_task_id_t task_id = info ? info->task_id : 0;
+ return task_id;
}
-
-void *
-__ompt_get_task_function_internal(int depth)
-{
- ompt_task_info_t *info = __ompt_get_taskinfo(depth);
- void *function = info ? info->function : NULL;
- return function;
+void *__ompt_get_task_function_internal(int depth) {
+ ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+ void *function = info ? info->function : NULL;
+ return function;
}
-
-ompt_frame_t *
-__ompt_get_task_frame_internal(int depth)
-{
- ompt_task_info_t *info = __ompt_get_taskinfo(depth);
- ompt_frame_t *frame = info ? frame = &info->frame : NULL;
- return frame;
+ompt_frame_t *__ompt_get_task_frame_internal(int depth) {
+ ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+ ompt_frame_t *frame = info ? frame = &info->frame : NULL;
+ return frame;
}
-
//----------------------------------------------------------
// team support
//----------------------------------------------------------
-void
-__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid)
-{
- team->t.ompt_team_info.parallel_id = ompt_pid;
+void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) {
+ team->t.ompt_team_info.parallel_id = ompt_pid;
}
Modified: openmp/trunk/runtime/src/ompt-specific.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-specific.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-specific.h (original)
+++ openmp/trunk/runtime/src/ompt-specific.h Fri May 12 13:01:32 2017
@@ -9,8 +9,6 @@
typedef kmp_info_t ompt_thread_t;
-
-
/*****************************************************************************
* forward declarations
****************************************************************************/
@@ -22,9 +20,9 @@ void __ompt_lw_taskteam_init(ompt_lw_tas
int gtid, void *microtask,
ompt_parallel_id_t ompt_pid);
-void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr);
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr);
-ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr);
+ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr);
ompt_parallel_id_t __ompt_parallel_id_new(int gtid);
ompt_task_id_t __ompt_task_id_new(int gtid);
@@ -43,8 +41,6 @@ ompt_task_id_t __ompt_get_task_id_intern
ompt_frame_t *__ompt_get_task_frame_internal(int depth);
-
-
/*****************************************************************************
* macros
****************************************************************************/
@@ -53,38 +49,25 @@ ompt_frame_t *__ompt_get_task_frame_inte
#define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI
#define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle)
-
-
//******************************************************************************
// inline functions
//******************************************************************************
-inline ompt_thread_t *
-ompt_get_thread_gtid(int gtid)
-{
- return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL;
+inline ompt_thread_t *ompt_get_thread_gtid(int gtid) {
+ return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL;
}
-
-inline ompt_thread_t *
-ompt_get_thread()
-{
- int gtid = __kmp_get_gtid();
- return ompt_get_thread_gtid(gtid);
+inline ompt_thread_t *ompt_get_thread() {
+ int gtid = __kmp_get_gtid();
+ return ompt_get_thread_gtid(gtid);
}
-
-inline void
-ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state)
-{
- thread->th.ompt_thread_info.state = state;
+inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) {
+ thread->th.ompt_thread_info.state = state;
}
-
-inline const char *
-ompt_get_runtime_version()
-{
- return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
+inline const char *ompt_get_runtime_version() {
+ return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
}
#endif
Modified: openmp/trunk/runtime/src/tsan_annotations.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/tsan_annotations.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/tsan_annotations.cpp (original)
+++ openmp/trunk/runtime/src/tsan_annotations.cpp Fri May 12 13:01:32 2017
@@ -3,7 +3,6 @@
* race detection in OpenMP programs.
*/
-
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
@@ -18,46 +17,92 @@
#include <stdio.h>
typedef unsigned long uptr;
-typedef signed long sptr;
+typedef signed long sptr;
-extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f, int l, uptr cv) {}
-extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f, int l, uptr cv) {}
-extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f, int l, uptr mu) {}
-extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l, uptr cv, uptr lock) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l, uptr m) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f, int l, uptr m) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {}
-extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l, uptr mem) {}
-extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size) {}
-extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l, uptr mem) {}
-extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateEnableRaceDetection( const char *f, int l, int enable) {}
-extern "C" __attribute__((weak)) void AnnotateMutexIsUsedAsCondVar( const char *f, int l, uptr mu) {}
-extern "C" __attribute__((weak)) void AnnotatePCQGet( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotatePCQPut( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotatePCQDestroy( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotatePCQCreate( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotateExpectRace( const char *f, int l, uptr mem, char *desc) {}
-extern "C" __attribute__((weak)) void AnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr size, char *desc) {}
-extern "C" __attribute__((weak)) void AnnotateBenignRace( const char *f, int l, uptr mem, char *desc) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotatePublishMemoryRange( const char *f, int l, uptr addr, uptr size) {}
-extern "C" __attribute__((weak)) void AnnotateUnpublishMemoryRange( const char *f, int l, uptr addr, uptr size) {}
-extern "C" __attribute__((weak)) void AnnotateThreadName( const char *f, int l, char *name) {}
-extern "C" __attribute__((weak)) void WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void WTFAnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr sz, char *desc) {}
-extern "C" __attribute__((weak)) int RunningOnValgrind() {return 0;}
-extern "C" __attribute__((weak)) double ValgrindSlowdown(void) {return 0;}
-extern "C" __attribute__((weak)) const char __attribute__((weak))* ThreadSanitizerQuery(const char *query) {return 0;}
-extern "C" __attribute__((weak)) void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {}
+extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f,
+ int l, uptr addr) {}
+extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l,
+ uptr addr) {}
+extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f,
+ int l, uptr cv) {}
+extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f,
+ int l, uptr cv) {
+}
+extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f,
+ int l, uptr mu) {}
+extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l,
+ uptr cv, uptr lock) {}
+extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l,
+ uptr m) {}
+extern "C" __attribute__((weak)) void
+AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {}
+extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f,
+ int l, uptr m) {}
+extern "C" __attribute__((weak)) void
+AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {}
+extern "C" __attribute__((weak)) void
+AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {}
+extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l,
+ uptr mem) {}
+extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) {
+}
+extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l,
+ uptr mem, uptr size) {}
+extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l,
+ uptr mem) {}
+extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void
+AnnotateEnableRaceDetection(const char *f, int l, int enable) {}
+extern "C" __attribute__((weak)) void
+AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu) {}
+extern "C" __attribute__((weak)) void AnnotatePCQGet(const char *f, int l,
+ uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotatePCQPut(const char *f, int l,
+ uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotatePCQDestroy(const char *f, int l,
+ uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotatePCQCreate(const char *f, int l,
+ uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotateExpectRace(const char *f, int l,
+ uptr mem, char *desc) {
+}
+extern "C" __attribute__((weak)) void
+AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size, char *desc) {
+}
+extern "C" __attribute__((weak)) void AnnotateBenignRace(const char *f, int l,
+ uptr mem, char *desc) {
+}
+extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f,
+ int l) {}
+extern "C" __attribute__((weak)) void
+AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size) {}
+extern "C" __attribute__((weak)) void
+AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size) {}
+extern "C" __attribute__((weak)) void AnnotateThreadName(const char *f, int l,
+ char *name) {}
+extern "C" __attribute__((weak)) void
+WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {}
+extern "C" __attribute__((weak)) void
+WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {}
+extern "C" __attribute__((weak)) void
+WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz,
+ char *desc) {}
+extern "C" __attribute__((weak)) int RunningOnValgrind() { return 0; }
+extern "C" __attribute__((weak)) double ValgrindSlowdown(void) { return 0; }
+extern "C" __attribute__((weak)) const char __attribute__((weak)) *
+ ThreadSanitizerQuery(const char *query) {
+ return 0;
+}
+extern "C" __attribute__((weak)) void
+AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {}
Modified: openmp/trunk/runtime/src/tsan_annotations.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/tsan_annotations.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/tsan_annotations.h (original)
+++ openmp/trunk/runtime/src/tsan_annotations.h Fri May 12 13:01:32 2017
@@ -4,7 +4,6 @@
* race detection in OpenMP programs.
*/
-
//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
@@ -21,7 +20,7 @@
/* types as used in tsan/rtl/tsan_interface_ann.cc */
typedef unsigned long uptr;
-typedef signed long sptr;
+typedef signed long sptr;
#ifdef __cplusplus
extern "C" {
@@ -44,30 +43,32 @@ void AnnotateFlushState(const char *f, i
void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size);
void AnnotateNoOp(const char *f, int l, uptr mem);
void AnnotateFlushExpectedRaces(const char *f, int l);
-void AnnotateEnableRaceDetection( const char *f, int l, int enable);
-void AnnotateMutexIsUsedAsCondVar( const char *f, int l, uptr mu);
-void AnnotatePCQGet( const char *f, int l, uptr pcq);
-void AnnotatePCQPut( const char *f, int l, uptr pcq);
-void AnnotatePCQDestroy( const char *f, int l, uptr pcq);
-void AnnotatePCQCreate( const char *f, int l, uptr pcq);
-void AnnotateExpectRace( const char *f, int l, uptr mem, char *desc);
-void AnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr size, char *desc);
-void AnnotateBenignRace( const char *f, int l, uptr mem, char *desc);
+void AnnotateEnableRaceDetection(const char *f, int l, int enable);
+void AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu);
+void AnnotatePCQGet(const char *f, int l, uptr pcq);
+void AnnotatePCQPut(const char *f, int l, uptr pcq);
+void AnnotatePCQDestroy(const char *f, int l, uptr pcq);
+void AnnotatePCQCreate(const char *f, int l, uptr pcq);
+void AnnotateExpectRace(const char *f, int l, uptr mem, char *desc);
+void AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size,
+ char *desc);
+void AnnotateBenignRace(const char *f, int l, uptr mem, char *desc);
void AnnotateIgnoreReadsBegin(const char *f, int l);
void AnnotateIgnoreReadsEnd(const char *f, int l);
void AnnotateIgnoreWritesBegin(const char *f, int l);
void AnnotateIgnoreWritesEnd(const char *f, int l);
void AnnotateIgnoreSyncBegin(const char *f, int l);
void AnnotateIgnoreSyncEnd(const char *f, int l);
-void AnnotatePublishMemoryRange( const char *f, int l, uptr addr, uptr size);
-void AnnotateUnpublishMemoryRange( const char *f, int l, uptr addr, uptr size);
-void AnnotateThreadName( const char *f, int l, char *name);
+void AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size);
+void AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size);
+void AnnotateThreadName(const char *f, int l, char *name);
void WTFAnnotateHappensBefore(const char *f, int l, uptr addr);
void WTFAnnotateHappensAfter(const char *f, int l, uptr addr);
-void WTFAnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr sz, char *desc);
+void WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz,
+ char *desc);
int RunningOnValgrind();
double ValgrindSlowdown(void);
-const char * ThreadSanitizerQuery(const char *query);
+const char *ThreadSanitizerQuery(const char *query);
void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz);
#ifdef __cplusplus
@@ -75,17 +76,27 @@ void AnnotateMemoryIsInitialized(const c
#endif
#ifdef TSAN_SUPPORT
-#define ANNOTATE_HAPPENS_AFTER(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_HAPPENS_BEFORE(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_IGNORE_WRITES_BEGIN() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+#define ANNOTATE_HAPPENS_AFTER(addr) \
+ AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_HAPPENS_BEFORE(addr) \
+ AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_IGNORE_WRITES_BEGIN() \
+ AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
#define ANNOTATE_IGNORE_WRITES_END() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-#define ANNOTATE_RWLOCK_CREATE(lck) AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck)
-#define ANNOTATE_RWLOCK_RELEASED(lck) AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1)
-#define ANNOTATE_RWLOCK_ACQUIRED(lck) AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1)
-#define ANNOTATE_BARRIER_BEGIN(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_BARRIER_END(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_REDUCE_AFTER(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_REDUCE_BEFORE(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_RWLOCK_CREATE(lck) \
+ AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck)
+#define ANNOTATE_RWLOCK_RELEASED(lck) \
+ AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1)
+#define ANNOTATE_RWLOCK_ACQUIRED(lck) \
+ AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1)
+#define ANNOTATE_BARRIER_BEGIN(addr) \
+ AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_BARRIER_END(addr) \
+ AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_REDUCE_AFTER(addr) \
+ AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_REDUCE_BEFORE(addr) \
+ AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
#else
#define ANNOTATE_HAPPENS_AFTER(addr)
#define ANNOTATE_HAPPENS_BEFORE(addr)
Modified: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s Fri May 12 13:01:32 2017
@@ -21,7 +21,6 @@
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
# if KMP_MIC
-//
// the 'delay r16/r32/r64' should be used instead of the 'pause'.
// The delay operation has the effect of removing the current thread from
// the round-robin HT mechanism, and therefore speeds up the issue rate of
@@ -70,9 +69,10 @@
KMP_PREFIX_UNDERSCORE($0):
.endmacro
# else // KMP_OS_DARWIN
-# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
+# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
// Format labels so that they don't override function names in gdb's backtraces
-// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*)
+// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
+// on OS X*)
# if KMP_MIC
# define KMP_LABEL(x) L_##x // local label
# else
@@ -163,12 +163,10 @@ KMP_PREFIX_UNDERSCORE(\proc):
#ifdef KMP_GOMP_COMPAT
-//
// Support for unnamed common blocks.
//
// Because the symbol ".gomp_critical_user_" contains a ".", we have to
// put this stuff in assembly.
-//
# if KMP_ARCH_X86
# if KMP_OS_DARWIN
@@ -221,14 +219,12 @@ __kmp_unnamed_critical_addr:
// microtasking routines specifically written for IA-32 architecture
// running Linux* OS
// -----------------------------------------------------------------------
-//
.ident "Intel Corporation"
.data
ALIGN 4
// void
// __kmp_x86_pause( void );
-//
.text
PROC __kmp_x86_pause
@@ -238,10 +234,9 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_x86_pause
-//
// void
// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
-//
+
PROC __kmp_x86_cpuid
pushl %ebp
@@ -253,7 +248,7 @@ __kmp_unnamed_critical_addr:
movl 8(%ebp), %eax
movl 12(%ebp), %ecx
- cpuid // Query the CPUID for the current processor
+ cpuid // Query the CPUID for the current processor
movl 16(%ebp), %edi
movl %eax, 0(%edi)
@@ -275,10 +270,8 @@ __kmp_unnamed_critical_addr:
# if !KMP_ASM_INTRINS
//------------------------------------------------------------------------
-//
// kmp_int32
// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
-//
PROC __kmp_test_then_add32
@@ -291,7 +284,6 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_test_then_add32
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed8
//
// kmp_int32
@@ -302,7 +294,6 @@ __kmp_unnamed_critical_addr:
// d: 8(%esp)
//
// return: %al
-
PROC __kmp_xchg_fixed8
movl 4(%esp), %ecx // "p"
@@ -316,7 +307,6 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed16
//
// kmp_int16
@@ -326,7 +316,6 @@ __kmp_unnamed_critical_addr:
// p: 4(%esp)
// d: 8(%esp)
// return: %ax
-
PROC __kmp_xchg_fixed16
movl 4(%esp), %ecx // "p"
@@ -340,7 +329,6 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed32
//
// kmp_int32
@@ -351,7 +339,6 @@ __kmp_unnamed_critical_addr:
// d: 8(%esp)
//
// return: %eax
-
PROC __kmp_xchg_fixed32
movl 4(%esp), %ecx // "p"
@@ -364,11 +351,8 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_xchg_fixed32
-//
// kmp_int8
// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
-//
-
PROC __kmp_compare_and_store8
movl 4(%esp), %ecx
@@ -382,11 +366,8 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_compare_and_store8
-//
// kmp_int16
-// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
-//
-
+// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
PROC __kmp_compare_and_store16
movl 4(%esp), %ecx
@@ -400,11 +381,8 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_compare_and_store16
-//
// kmp_int32
-// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
-//
-
+// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
PROC __kmp_compare_and_store32
movl 4(%esp), %ecx
@@ -412,16 +390,14 @@ __kmp_unnamed_critical_addr:
movl 12(%esp), %edx
lock
cmpxchgl %edx,(%ecx)
- sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
+ sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0
+ and $1, %eax // sign extend previous instruction
ret
DEBUG_INFO __kmp_compare_and_store32
-//
// kmp_int32
-// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
-//
+// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
PROC __kmp_compare_and_store64
pushl %ebp
@@ -435,8 +411,8 @@ __kmp_unnamed_critical_addr:
movl 24(%ebp), %ecx // "sv" high order word
lock
cmpxchg8b (%edi)
- sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
- and $1, %eax // sign extend previous instruction
+ sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0
+ and $1, %eax // sign extend previous instruction
popl %edi
popl %ebx
movl %ebp, %esp
@@ -445,11 +421,8 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_compare_and_store64
-//
// kmp_int8
-// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
-//
-
+// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
PROC __kmp_compare_and_store_ret8
movl 4(%esp), %ecx
@@ -461,11 +434,9 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_compare_and_store_ret8
-//
// kmp_int16
-// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
-//
-
+// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
+// kmp_int16 sv);
PROC __kmp_compare_and_store_ret16
movl 4(%esp), %ecx
@@ -477,11 +448,9 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_compare_and_store_ret16
-//
// kmp_int32
-// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
-//
-
+// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
+// kmp_int32 sv);
PROC __kmp_compare_and_store_ret32
movl 4(%esp), %ecx
@@ -493,10 +462,9 @@ __kmp_unnamed_critical_addr:
DEBUG_INFO __kmp_compare_and_store_ret32
-//
// kmp_int64
-// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
-//
+// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
+// kmp_int64 sv);
PROC __kmp_compare_and_store_ret64
pushl %ebp
@@ -520,7 +488,6 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_real32
//
// kmp_real32
@@ -531,8 +498,6 @@ __kmp_unnamed_critical_addr:
// data: 8(%esp)
//
// return: %eax
-
-
PROC __kmp_xchg_real32
pushl %ebp
@@ -565,7 +530,6 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_load_x87_fpu_control_word
//
// void
@@ -573,8 +537,6 @@ __kmp_unnamed_critical_addr:
//
// parameters:
// p: 4(%esp)
-//
-
PROC __kmp_load_x87_fpu_control_word
movl 4(%esp), %eax
@@ -585,7 +547,6 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_store_x87_fpu_control_word
//
// void
@@ -593,8 +554,6 @@ __kmp_unnamed_critical_addr:
//
// parameters:
// p: 4(%esp)
-//
-
PROC __kmp_store_x87_fpu_control_word
movl 4(%esp), %eax
@@ -605,14 +564,10 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_clear_x87_fpu_status_word
//
// void
// __kmp_clear_x87_fpu_status_word();
-//
-//
-
PROC __kmp_clear_x87_fpu_status_word
fnclex
@@ -622,7 +577,6 @@ __kmp_unnamed_critical_addr:
//------------------------------------------------------------------------
-//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int
@@ -714,7 +668,6 @@ KMP_LABEL(invoke_3):
DEBUG_INFO __kmp_hardware_timestamp
// -- End __kmp_hardware_timestamp
-// -----------------------------------------------------------------------
#endif /* KMP_ARCH_X86 */
@@ -732,9 +685,9 @@ KMP_LABEL(invoke_3):
.data
ALIGN 4
-// To prevent getting our code into .data section .text added to every routine definition for x86_64.
+// To prevent getting our code into .data section .text added to every routine
+// definition for x86_64.
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_x86_cpuid
//
// void
@@ -744,7 +697,6 @@ KMP_LABEL(invoke_3):
// mode: %edi
// mode2: %esi
// cpuid_buffer: %rdx
-
.text
PROC __kmp_x86_cpuid
@@ -774,7 +726,6 @@ KMP_LABEL(invoke_3):
# if !KMP_ASM_INTRINS
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_test_then_add32
//
// kmp_int32
@@ -785,7 +736,6 @@ KMP_LABEL(invoke_3):
// d: %esi
//
// return: %eax
-
.text
PROC __kmp_test_then_add32
@@ -798,7 +748,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_test_then_add64
//
// kmp_int64
@@ -808,7 +757,6 @@ KMP_LABEL(invoke_3):
// p: %rdi
// d: %rsi
// return: %rax
-
.text
PROC __kmp_test_then_add64
@@ -821,7 +769,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed8
//
// kmp_int32
@@ -832,7 +779,6 @@ KMP_LABEL(invoke_3):
// d: %sil
//
// return: %al
-
.text
PROC __kmp_xchg_fixed8
@@ -846,7 +792,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed16
//
// kmp_int16
@@ -856,7 +801,6 @@ KMP_LABEL(invoke_3):
// p: %rdi
// d: %si
// return: %ax
-
.text
PROC __kmp_xchg_fixed16
@@ -870,7 +814,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed32
//
// kmp_int32
@@ -881,7 +824,6 @@ KMP_LABEL(invoke_3):
// d: %esi
//
// return: %eax
-
.text
PROC __kmp_xchg_fixed32
@@ -895,7 +837,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_fixed64
//
// kmp_int64
@@ -905,7 +846,6 @@ KMP_LABEL(invoke_3):
// p: %rdi
// d: %rsi
// return: %rax
-
.text
PROC __kmp_xchg_fixed64
@@ -919,7 +859,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store8
//
// kmp_int8
@@ -931,7 +870,6 @@ KMP_LABEL(invoke_3):
// sv: %edx
//
// return: %eax
-
.text
PROC __kmp_compare_and_store8
@@ -946,7 +884,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store16
//
// kmp_int16
@@ -958,7 +895,6 @@ KMP_LABEL(invoke_3):
// sv: %dx
//
// return: %eax
-
.text
PROC __kmp_compare_and_store16
@@ -973,7 +909,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store32
//
// kmp_int32
@@ -985,7 +920,6 @@ KMP_LABEL(invoke_3):
// sv: %edx
//
// return: %eax
-
.text
PROC __kmp_compare_and_store32
@@ -1000,7 +934,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store64
//
// kmp_int32
@@ -1011,7 +944,6 @@ KMP_LABEL(invoke_3):
// cv: %rsi
// sv: %rdx
// return: %eax
-
.text
PROC __kmp_compare_and_store64
@@ -1025,7 +957,6 @@ KMP_LABEL(invoke_3):
DEBUG_INFO __kmp_compare_and_store64
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store_ret8
//
// kmp_int8
@@ -1037,7 +968,6 @@ KMP_LABEL(invoke_3):
// sv: %edx
//
// return: %eax
-
.text
PROC __kmp_compare_and_store_ret8
@@ -1050,7 +980,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store_ret16
//
// kmp_int16
@@ -1062,7 +991,6 @@ KMP_LABEL(invoke_3):
// sv: %dx
//
// return: %eax
-
.text
PROC __kmp_compare_and_store_ret16
@@ -1075,7 +1003,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store_ret32
//
// kmp_int32
@@ -1087,7 +1014,6 @@ KMP_LABEL(invoke_3):
// sv: %edx
//
// return: %eax
-
.text
PROC __kmp_compare_and_store_ret32
@@ -1100,7 +1026,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_compare_and_store_ret64
//
// kmp_int64
@@ -1111,7 +1036,6 @@ KMP_LABEL(invoke_3):
// cv: %rsi
// sv: %rdx
// return: %eax
-
.text
PROC __kmp_compare_and_store_ret64
@@ -1130,7 +1054,6 @@ KMP_LABEL(invoke_3):
# if !KMP_ASM_INTRINS
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_real32
//
// kmp_real32
@@ -1141,7 +1064,6 @@ KMP_LABEL(invoke_3):
// data: %xmm0 (lower 4 bytes)
//
// return: %xmm0 (lower 4 bytes)
-
.text
PROC __kmp_xchg_real32
@@ -1158,7 +1080,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_xchg_real64
//
// kmp_real64
@@ -1168,8 +1089,6 @@ KMP_LABEL(invoke_3):
// addr: %rdi
// data: %xmm0 (lower 8 bytes)
// return: %xmm0 (lower 8 bytes)
-//
-
.text
PROC __kmp_xchg_real64
@@ -1190,7 +1109,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_load_x87_fpu_control_word
//
// void
@@ -1198,8 +1116,6 @@ KMP_LABEL(invoke_3):
//
// parameters:
// p: %rdi
-//
-
.text
PROC __kmp_load_x87_fpu_control_word
@@ -1210,7 +1126,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_store_x87_fpu_control_word
//
// void
@@ -1218,8 +1133,6 @@ KMP_LABEL(invoke_3):
//
// parameters:
// p: %rdi
-//
-
.text
PROC __kmp_store_x87_fpu_control_word
@@ -1230,14 +1143,10 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_clear_x87_fpu_status_word
//
// void
// __kmp_clear_x87_fpu_status_word();
-//
-//
-
.text
PROC __kmp_clear_x87_fpu_status_word
@@ -1256,7 +1165,6 @@ KMP_LABEL(invoke_3):
//------------------------------------------------------------------------
-//
// typedef void (*microtask_t)( int *gtid, int *tid, ... );
//
// int
@@ -1267,8 +1175,7 @@ KMP_LABEL(invoke_3):
// return 1;
// }
//
-// note:
-// at call to pkfn must have %rsp 128-byte aligned for compiler
+// note: at call to pkfn must have %rsp 128-byte aligned for compiler
//
// parameters:
// %rdi: pkfn
@@ -1291,8 +1198,6 @@ KMP_LABEL(invoke_3):
// %rbx: used to hold pkfn address, and zero constant, callee-save
//
// return: %eax (always 1/TRUE)
-//
-
__gtid = -16
__tid = -24
@@ -1442,13 +1347,10 @@ KMP_LABEL(kmp_1_exit):
// -- End __kmp_hardware_timestamp
//------------------------------------------------------------------------
-//
// FUNCTION __kmp_bsr32
//
// int
// __kmp_bsr32( int );
-//
-
.text
PROC __kmp_bsr32
More information about the Openmp-commits
mailing list