[Openmp-commits] [openmp] r302929 - Clang-format and whitespace cleanup of source code

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Fri May 12 11:01:35 PDT 2017


Modified: openmp/trunk/runtime/src/kmp_stats.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats.cpp Fri May 12 13:01:32 2017
@@ -12,196 +12,186 @@
 //
 //===----------------------------------------------------------------------===//
 
+
 #include "kmp.h"
-#include "kmp_str.h"
 #include "kmp_lock.h"
 #include "kmp_stats.h"
+#include "kmp_str.h"
 
 #include <algorithm>
-#include <sstream>
-#include <iomanip>
-#include <stdlib.h>                             // for atexit
 #include <ctime>
+#include <iomanip>
+#include <sstream>
+#include <stdlib.h> // for atexit
 
 #define STRINGIZE2(x) #x
 #define STRINGIZE(x) STRINGIZE2(x)
 
-#define expandName(name,flags,ignore)  {STRINGIZE(name),flags},
+#define expandName(name, flags, ignore) {STRINGIZE(name), flags},
 statInfo timeStat::timerInfo[] = {
-    KMP_FOREACH_TIMER(expandName,0)
-    {"TIMER_LAST", 0}
-};
+    KMP_FOREACH_TIMER(expandName, 0){"TIMER_LAST", 0}};
 const statInfo counter::counterInfo[] = {
-    KMP_FOREACH_COUNTER(expandName,0)
-    {"COUNTER_LAST", 0}
-};
+    KMP_FOREACH_COUNTER(expandName, 0){"COUNTER_LAST", 0}};
 #undef expandName
 
-#define expandName(ignore1,ignore2,ignore3)  {0.0,0.0,0.0},
+#define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0},
 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
-    KMP_FOREACH_TIMER(expandName,0)
-    {0.0,0.0,0.0}
-};
+    KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
 #undef expandName
 
-const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = {
-    {1.0, 0.0, 0.0}, // red
-    {1.0, 0.6, 0.0}, // orange
-    {1.0, 1.0, 0.0}, // yellow
-    {0.0, 1.0, 0.0}, // green
-    {0.0, 0.0, 1.0}, // blue
-    {0.6, 0.2, 0.8}, // purple
-    {1.0, 0.0, 1.0}, // magenta
-    {0.0, 0.4, 0.2}, // dark green
-    {1.0, 1.0, 0.6}, // light yellow
-    {0.6, 0.4, 0.6}, // dirty purple
-    {0.0, 1.0, 1.0}, // cyan
-    {1.0, 0.4, 0.8}, // pink
-    {0.5, 0.5, 0.5}, // grey
-    {0.8, 0.7, 0.5}, // brown
-    {0.6, 0.6, 1.0}, // light blue
-    {1.0, 0.7, 0.5}, // peach
-    {0.8, 0.5, 1.0}, // lavender
-    {0.6, 0.0, 0.0}, // dark red
-    {0.7, 0.6, 0.0}, // gold
-    {0.0, 0.0, 0.0}  // black
+const kmp_stats_output_module::rgb_color
+    kmp_stats_output_module::globalColorArray[] = {
+        {1.0, 0.0, 0.0}, // red
+        {1.0, 0.6, 0.0}, // orange
+        {1.0, 1.0, 0.0}, // yellow
+        {0.0, 1.0, 0.0}, // green
+        {0.0, 0.0, 1.0}, // blue
+        {0.6, 0.2, 0.8}, // purple
+        {1.0, 0.0, 1.0}, // magenta
+        {0.0, 0.4, 0.2}, // dark green
+        {1.0, 1.0, 0.6}, // light yellow
+        {0.6, 0.4, 0.6}, // dirty purple
+        {0.0, 1.0, 1.0}, // cyan
+        {1.0, 0.4, 0.8}, // pink
+        {0.5, 0.5, 0.5}, // grey
+        {0.8, 0.7, 0.5}, // brown
+        {0.6, 0.6, 1.0}, // light blue
+        {1.0, 0.7, 0.5}, // peach
+        {0.8, 0.5, 1.0}, // lavender
+        {0.6, 0.0, 0.0}, // dark red
+        {0.7, 0.6, 0.0}, // gold
+        {0.0, 0.0, 0.0} // black
 };
 
 // Ensure that the atexit handler only runs once.
 static uint32_t statsPrinted = 0;
 
 // output interface
-static kmp_stats_output_module* __kmp_stats_global_output = NULL;
+static kmp_stats_output_module *__kmp_stats_global_output = NULL;
 
-/* ****************************************************** */
 /* ************* statistic member functions ************* */
 
-void statistic::addSample(double sample)
-{
-    double delta = sample - meanVal;
-
-    sampleCount = sampleCount + 1;
-    meanVal     = meanVal + delta/sampleCount;
-    m2          = m2 + delta*(sample - meanVal);
-
-    minVal = std::min(minVal, sample);
-    maxVal = std::max(maxVal, sample);
-}
-
-statistic & statistic::operator+= (const statistic & other)
-{
-    if (sampleCount == 0)
-    {
-        *this = other;
-        return *this;
-    }
+void statistic::addSample(double sample) {
+  double delta = sample - meanVal;
 
-    uint64_t newSampleCount = sampleCount + other.sampleCount;
-    double dnsc  = double(newSampleCount);
-    double dsc   = double(sampleCount);
-    double dscBydnsc = dsc/dnsc;
-    double dosc  = double(other.sampleCount);
-    double delta = other.meanVal - meanVal;
-
-    // Try to order these calculations to avoid overflows.
-    // If this were Fortran, then the compiler would not be able to re-order over brackets.
-    // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition
-    // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation
-    // really is associative (which floating addition isn't...)).
-    meanVal     = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc);
-    m2          = m2 + other.m2 + dscBydnsc*dosc*delta*delta;
-    minVal      = std::min (minVal, other.minVal);
-    maxVal      = std::max (maxVal, other.maxVal);
-    sampleCount = newSampleCount;
+  sampleCount = sampleCount + 1;
+  meanVal = meanVal + delta / sampleCount;
+  m2 = m2 + delta * (sample - meanVal);
 
-
-    return *this;
+  minVal = std::min(minVal, sample);
+  maxVal = std::max(maxVal, sample);
 }
 
-void statistic::scale(double factor)
-{
-    minVal = minVal*factor;
-    maxVal = maxVal*factor;
-    meanVal= meanVal*factor;
-    m2     = m2*factor*factor;
-    return;
-}
+statistic &statistic::operator+=(const statistic &other) {
+  if (sampleCount == 0) {
+    *this = other;
+    return *this;
+  }
 
-std::string statistic::format(char unit, bool total) const
-{
-    std::string result = formatSI(sampleCount,9,' ');
-
-    if (sampleCount == 0)
-    {
-        result = result + std::string(", ") + formatSI(0.0, 9, unit);
-        result = result + std::string(", ") + formatSI(0.0, 9, unit);
-        result = result + std::string(", ") + formatSI(0.0, 9, unit);
-        if (total)
-            result = result + std::string(", ") + formatSI(0.0, 9, unit);
-        result = result + std::string(", ") + formatSI(0.0, 9, unit);
-    }
-    else
-    {
-        result = result + std::string(", ") + formatSI(minVal,  9, unit);
-        result = result + std::string(", ") + formatSI(meanVal, 9, unit);
-        result = result + std::string(", ") + formatSI(maxVal,  9, unit);
-        if (total)
-            result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit);
-        result = result + std::string(", ") + formatSI(getSD(), 9, unit);
-    }
-    return result;
+  uint64_t newSampleCount = sampleCount + other.sampleCount;
+  double dnsc = double(newSampleCount);
+  double dsc = double(sampleCount);
+  double dscBydnsc = dsc / dnsc;
+  double dosc = double(other.sampleCount);
+  double delta = other.meanVal - meanVal;
+
+  // Try to order these calculations to avoid overflows. If this were Fortran,
+  // then the compiler would not be able to re-order over brackets. In C++ it
+  // may be legal to do that (we certainly hope it doesn't, and CC+ Programming
+  // Language 2nd edition suggests it shouldn't, since it says that exploitation
+  // of associativity can only be made if the operation really is associative
+  // (which floating addition isn't...)).
+  meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
+  m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
+  minVal = std::min(minVal, other.minVal);
+  maxVal = std::max(maxVal, other.maxVal);
+  sampleCount = newSampleCount;
+
+  return *this;
+}
+
+void statistic::scale(double factor) {
+  minVal = minVal * factor;
+  maxVal = maxVal * factor;
+  meanVal = meanVal * factor;
+  m2 = m2 * factor * factor;
+  return;
+}
+
+std::string statistic::format(char unit, bool total) const {
+  std::string result = formatSI(sampleCount, 9, ' ');
+
+  if (sampleCount == 0) {
+    result = result + std::string(", ") + formatSI(0.0, 9, unit);
+    result = result + std::string(", ") + formatSI(0.0, 9, unit);
+    result = result + std::string(", ") + formatSI(0.0, 9, unit);
+    if (total)
+      result = result + std::string(", ") + formatSI(0.0, 9, unit);
+    result = result + std::string(", ") + formatSI(0.0, 9, unit);
+  } else {
+    result = result + std::string(", ") + formatSI(minVal, 9, unit);
+    result = result + std::string(", ") + formatSI(meanVal, 9, unit);
+    result = result + std::string(", ") + formatSI(maxVal, 9, unit);
+    if (total)
+      result =
+          result + std::string(", ") + formatSI(meanVal * sampleCount, 9, unit);
+    result = result + std::string(", ") + formatSI(getSD(), 9, unit);
+  }
+  return result;
 }
 
-/* ********************************************************** */
 /* ************* explicitTimer member functions ************* */
 
 void explicitTimer::start(timer_e timerEnumValue) {
-    startTime = tsc_tick_count::now();
-    totalPauseTime = 0;
-    if(timeStat::logEvent(timerEnumValue)) {
-        __kmp_stats_thread_ptr->incrementNestValue();
-    }
-    return;
+  startTime = tsc_tick_count::now();
+  totalPauseTime = 0;
+  if (timeStat::logEvent(timerEnumValue)) {
+    __kmp_stats_thread_ptr->incrementNestValue();
+  }
+  return;
 }
 
-void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) {
-    if (startTime.getValue() == 0)
-        return;
-
-    tsc_tick_count finishTime = tsc_tick_count::now();
-
-    //stat->addSample ((tsc_tick_count::now() - startTime).ticks());
-    stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
-
-    if(timeStat::logEvent(timerEnumValue)) {
-        if(!stats_ptr)
-            stats_ptr = __kmp_stats_thread_ptr;
-        stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
-        stats_ptr->decrementNestValue();
-    }
-
-    /* We accept the risk that we drop a sample because it really did start at t==0. */
-    startTime = 0;
+void explicitTimer::stop(timer_e timerEnumValue,
+                         kmp_stats_list *stats_ptr /* = nullptr */) {
+  if (startTime.getValue() == 0)
     return;
+
+  tsc_tick_count finishTime = tsc_tick_count::now();
+
+  // stat->addSample ((tsc_tick_count::now() - startTime).ticks());
+  stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
+
+  if (timeStat::logEvent(timerEnumValue)) {
+    if (!stats_ptr)
+      stats_ptr = __kmp_stats_thread_ptr;
+    stats_ptr->push_event(
+        startTime.getValue() - __kmp_stats_start_time.getValue(),
+        finishTime.getValue() - __kmp_stats_start_time.getValue(),
+        __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
+    stats_ptr->decrementNestValue();
+  }
+
+  /* We accept the risk that we drop a sample because it really did start at
+     t==0. */
+  startTime = 0;
+  return;
 }
 
-/* ************************************************************** */
 /* ************* partitionedTimers member functions ************* */
-partitionedTimers::partitionedTimers() {
-    timer_stack.reserve(8);
-}
+partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
 
 // add a timer to this collection of partitioned timers.
-void partitionedTimers::add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer) {
-    KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST+1);
-    timers[timer_index] = timer_pointer;
+void partitionedTimers::add_timer(explicit_timer_e timer_index,
+                                  explicitTimer *timer_pointer) {
+  KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST + 1);
+  timers[timer_index] = timer_pointer;
 }
 
 // initialize the paritioned timers to an initial timer
 void partitionedTimers::init(timerPair init_timer_pair) {
-    KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
-    timer_stack.push_back(init_timer_pair);
-    timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
+  KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
+  timer_stack.push_back(init_timer_pair);
+  timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
 }
 
 // stop/save the current timer, and start the new timer (timer_pair)
@@ -209,33 +199,33 @@ void partitionedTimers::init(timerPair i
 // the one you are trying to push, then it only manipulates the stack,
 // and it won't stop/start the currently running timer.
 void partitionedTimers::push(timerPair timer_pair) {
-    // get the current timer
-    // stop current timer
-    // push new timer
-    // start the new timer
-    KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
-    timerPair current_timer = timer_stack.back();
-    timer_stack.push_back(timer_pair);
-    if(current_timer != timer_pair) {
-        timers[current_timer.get_index()]->pause();
-        timers[timer_pair.get_index()]->start(timer_pair.get_timer());
-    }
+  // get the current timer
+  // stop current timer
+  // push new timer
+  // start the new timer
+  KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
+  timerPair current_timer = timer_stack.back();
+  timer_stack.push_back(timer_pair);
+  if (current_timer != timer_pair) {
+    timers[current_timer.get_index()]->pause();
+    timers[timer_pair.get_index()]->start(timer_pair.get_timer());
+  }
 }
 
 // stop/discard the current timer, and start the previously saved timer
 void partitionedTimers::pop() {
-    // get the current timer
-    // stop current timer
-    // pop current timer
-    // get the new current timer and start it back up
-    KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
-    timerPair current_timer = timer_stack.back();
-    timer_stack.pop_back();
-    timerPair new_timer = timer_stack.back();
-    if(current_timer != new_timer) {
-        timers[current_timer.get_index()]->stop(current_timer.get_timer());
-        timers[new_timer.get_index()]->resume();
-    }
+  // get the current timer
+  // stop current timer
+  // pop current timer
+  // get the new current timer and start it back up
+  KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
+  timerPair current_timer = timer_stack.back();
+  timer_stack.pop_back();
+  timerPair new_timer = timer_stack.back();
+  if (current_timer != new_timer) {
+    timers[current_timer.get_index()]->stop(current_timer.get_timer());
+    timers[new_timer.get_index()]->resume();
+  }
 }
 
 // Wind up all the currently running timers.
@@ -243,481 +233,483 @@ void partitionedTimers::pop() {
 // After this is called, init() must be run again to initialize the
 // stack of timers
 void partitionedTimers::windup() {
-    while(timer_stack.size() > 1) {
-        this->pop();
-    }
-    if(timer_stack.size() > 0) {
-        timerPair last_timer = timer_stack.back();
-        timer_stack.pop_back();
-        timers[last_timer.get_index()]->stop(last_timer.get_timer());
-    }
+  while (timer_stack.size() > 1) {
+    this->pop();
+  }
+  if (timer_stack.size() > 0) {
+    timerPair last_timer = timer_stack.back();
+    timer_stack.pop_back();
+    timers[last_timer.get_index()]->stop(last_timer.get_timer());
+  }
 }
 
-/* ******************************************************************* */
 /* ************* kmp_stats_event_vector member functions ************* */
 
 void kmp_stats_event_vector::deallocate() {
-    __kmp_free(events);
-    internal_size = 0;
-    allocated_size = 0;
-    events = NULL;
+  __kmp_free(events);
+  internal_size = 0;
+  allocated_size = 0;
+  events = NULL;
 }
 
 // This function is for qsort() which requires the compare function to return
-// either a negative number if event1 < event2, a positive number if event1 > event2
-// or zero if event1 == event2.
-// This sorts by start time (lowest to highest).
-int compare_two_events(const void* event1, const void* event2) {
-    kmp_stats_event* ev1 = (kmp_stats_event*)event1;
-    kmp_stats_event* ev2 = (kmp_stats_event*)event2;
-
-    if(ev1->getStart() < ev2->getStart()) return -1;
-    else if(ev1->getStart() > ev2->getStart()) return 1;
-    else return 0;
+// either a negative number if event1 < event2, a positive number if event1 >
+// event2 or zero if event1 == event2. This sorts by start time (lowest to
+// highest).
+int compare_two_events(const void *event1, const void *event2) {
+  kmp_stats_event *ev1 = (kmp_stats_event *)event1;
+  kmp_stats_event *ev2 = (kmp_stats_event *)event2;
+
+  if (ev1->getStart() < ev2->getStart())
+    return -1;
+  else if (ev1->getStart() > ev2->getStart())
+    return 1;
+  else
+    return 0;
 }
 
 void kmp_stats_event_vector::sort() {
-    qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
+  qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
 }
 
-/* *********************************************************** */
 /* ************* kmp_stats_list member functions ************* */
 
 // returns a pointer to newly created stats node
-kmp_stats_list* kmp_stats_list::push_back(int gtid) {
-    kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list));
-    // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used)
-    new (newnode) kmp_stats_list();
-    newnode->setGtid(gtid);
-    newnode->prev = this->prev;
-    newnode->next = this;
-    newnode->prev->next = newnode;
-    newnode->next->prev = newnode;
-    return newnode;
+kmp_stats_list *kmp_stats_list::push_back(int gtid) {
+  kmp_stats_list *newnode =
+      (kmp_stats_list *)__kmp_allocate(sizeof(kmp_stats_list));
+  // placement new, only requires space and pointer and initializes (so
+  // __kmp_allocate instead of C++ new[] is used)
+  new (newnode) kmp_stats_list();
+  newnode->setGtid(gtid);
+  newnode->prev = this->prev;
+  newnode->next = this;
+  newnode->prev->next = newnode;
+  newnode->next->prev = newnode;
+  return newnode;
 }
 void kmp_stats_list::deallocate() {
-    kmp_stats_list* ptr = this->next;
-    kmp_stats_list* delptr = this->next;
-    while(ptr != this) {
-        delptr = ptr;
-        ptr=ptr->next;
-        // placement new means we have to explicitly call destructor.
-        delptr->_event_vector.deallocate();
-        delptr->~kmp_stats_list();
-        __kmp_free(delptr);
-    }
+  kmp_stats_list *ptr = this->next;
+  kmp_stats_list *delptr = this->next;
+  while (ptr != this) {
+    delptr = ptr;
+    ptr = ptr->next;
+    // placement new means we have to explicitly call destructor.
+    delptr->_event_vector.deallocate();
+    delptr->~kmp_stats_list();
+    __kmp_free(delptr);
+  }
 }
 kmp_stats_list::iterator kmp_stats_list::begin() {
-    kmp_stats_list::iterator it;
-    it.ptr = this->next;
-    return it;
+  kmp_stats_list::iterator it;
+  it.ptr = this->next;
+  return it;
 }
 kmp_stats_list::iterator kmp_stats_list::end() {
-    kmp_stats_list::iterator it;
-    it.ptr = this;
-    return it;
+  kmp_stats_list::iterator it;
+  it.ptr = this;
+  return it;
 }
 int kmp_stats_list::size() {
-    int retval;
-    kmp_stats_list::iterator it;
-    for(retval=0, it=begin(); it!=end(); it++, retval++) {}
-    return retval;
+  int retval;
+  kmp_stats_list::iterator it;
+  for (retval = 0, it = begin(); it != end(); it++, retval++) {
+  }
+  return retval;
 }
 
-/* ********************************************************************* */
 /* ************* kmp_stats_list::iterator member functions ************* */
 
 kmp_stats_list::iterator::iterator() : ptr(NULL) {}
 kmp_stats_list::iterator::~iterator() {}
 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
-    this->ptr = this->ptr->next;
-    return *this;
+  this->ptr = this->ptr->next;
+  return *this;
 }
 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
-    this->ptr = this->ptr->next;
-    return *this;
+  this->ptr = this->ptr->next;
+  return *this;
 }
 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
-    this->ptr = this->ptr->prev;
-    return *this;
+  this->ptr = this->ptr->prev;
+  return *this;
 }
 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
-    this->ptr = this->ptr->prev;
-    return *this;
+  this->ptr = this->ptr->prev;
+  return *this;
 }
-bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) {
-   return this->ptr!=rhs.ptr;
+bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator &rhs) {
+  return this->ptr != rhs.ptr;
 }
-bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) {
-   return this->ptr==rhs.ptr;
+bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator &rhs) {
+  return this->ptr == rhs.ptr;
 }
-kmp_stats_list* kmp_stats_list::iterator::operator*() const {
-    return this->ptr;
+kmp_stats_list *kmp_stats_list::iterator::operator*() const {
+  return this->ptr;
 }
 
-/* *************************************************************** */
 /* *************  kmp_stats_output_module functions ************** */
 
-const char* kmp_stats_output_module::eventsFileName = NULL;
-const char* kmp_stats_output_module::plotFileName   = NULL;
-int kmp_stats_output_module::printPerThreadFlag       = 0;
+const char *kmp_stats_output_module::eventsFileName = NULL;
+const char *kmp_stats_output_module::plotFileName = NULL;
+int kmp_stats_output_module::printPerThreadFlag = 0;
 int kmp_stats_output_module::printPerThreadEventsFlag = 0;
 
-// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output
-void kmp_stats_output_module::init()
-{
-    char * statsFileName  = getenv("KMP_STATS_FILE");
-    eventsFileName        = getenv("KMP_STATS_EVENTS_FILE");
-    plotFileName          = getenv("KMP_STATS_PLOT_FILE");
-    char * threadStats    = getenv("KMP_STATS_THREADS");
-    char * threadEvents   = getenv("KMP_STATS_EVENTS");
-
-    // set the stats output filenames based on environment variables and defaults
-    if(statsFileName) {
-        // append the process id to the output filename
-        // events.csv --> events-pid.csv
-        size_t index;
-        std::string baseFileName, pid, suffix;
-        std::stringstream ss;
-        outputFileName = std::string(statsFileName);
-        index = outputFileName.find_last_of('.');
-        if(index == std::string::npos) {
-            baseFileName = outputFileName;
-        } else {
-            baseFileName = outputFileName.substr(0, index);
-            suffix = outputFileName.substr(index);
-        }
-        ss << getpid();
-        pid = ss.str();
-        outputFileName = baseFileName + "-" + pid + suffix;
-    }
-    eventsFileName = eventsFileName ? eventsFileName : "events.dat";
-    plotFileName   = plotFileName   ? plotFileName   : "events.plt";
-
-    // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes
-    printPerThreadFlag        = __kmp_str_match_true(threadStats);
-    printPerThreadEventsFlag  = __kmp_str_match_true(threadEvents);
-
-    if(printPerThreadEventsFlag) {
-        // assigns a color to each timer for printing
-        setupEventColors();
+// init() is called very near the beginning of execution time in the constructor
+// of __kmp_stats_global_output
+void kmp_stats_output_module::init() {
+  char *statsFileName = getenv("KMP_STATS_FILE");
+  eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
+  plotFileName = getenv("KMP_STATS_PLOT_FILE");
+  char *threadStats = getenv("KMP_STATS_THREADS");
+  char *threadEvents = getenv("KMP_STATS_EVENTS");
+
+  // set the stats output filenames based on environment variables and defaults
+  if (statsFileName) {
+    // append the process id to the output filename
+    // events.csv --> events-pid.csv
+    size_t index;
+    std::string baseFileName, pid, suffix;
+    std::stringstream ss;
+    outputFileName = std::string(statsFileName);
+    index = outputFileName.find_last_of('.');
+    if (index == std::string::npos) {
+      baseFileName = outputFileName;
     } else {
-        // will clear flag so that no event will be logged
-        timeStat::clearEventFlags();
+      baseFileName = outputFileName.substr(0, index);
+      suffix = outputFileName.substr(index);
     }
+    ss << getpid();
+    pid = ss.str();
+    outputFileName = baseFileName + "-" + pid + suffix;
+  }
+  eventsFileName = eventsFileName ? eventsFileName : "events.dat";
+  plotFileName = plotFileName ? plotFileName : "events.plt";
+
+  // set the flags based on environment variables matching: true, on, 1, .true.
+  // , .t. , yes
+  printPerThreadFlag = __kmp_str_match_true(threadStats);
+  printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
+
+  if (printPerThreadEventsFlag) {
+    // assigns a color to each timer for printing
+    setupEventColors();
+  } else {
+    // will clear flag so that no event will be logged
+    timeStat::clearEventFlags();
+  }
 
-    return;
+  return;
 }
 
 void kmp_stats_output_module::setupEventColors() {
-    int i;
-    int globalColorIndex = 0;
-    int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
-    for(i=0;i<TIMER_LAST;i++) {
-        if(timeStat::logEvent((timer_e)i)) {
-            timerColorInfo[i] = globalColorArray[globalColorIndex];
-            globalColorIndex = (globalColorIndex+1)%numGlobalColors;
-        }
-    }
-    return;
-}
-
-void kmp_stats_output_module::printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats)
-{
-    fprintf (statsOut, "Timer,                      SampleCount,    Min,      Mean,       Max,     Total,        SD\n");
-    for (timer_e s = timer_e(0); s<TIMER_LAST; s = timer_e(s+1)) {
-        statistic const * stat = &theStats[s];
-        char tag = timeStat::noUnits(s) ? ' ' : 'T';
-
-        fprintf (statsOut, "%-28s, %s\n", timeStat::name(s), stat->format(tag, true).c_str());
-    }
-    // Also print the Total_ versions of times.
-    for (timer_e s = timer_e(0); s<TIMER_LAST; s = timer_e(s+1)) {
-        char tag = timeStat::noUnits(s) ? ' ' : 'T';
-        if (totalStats && !timeStat::noTotal(s))
-            fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s), totalStats[s].format(tag, true).c_str());
-    }
-}
-
-void kmp_stats_output_module::printCounterStats(FILE *statsOut, statistic const * theStats)
-{
-    fprintf (statsOut, "Counter,                 ThreadCount,    Min,      Mean,       Max,     Total,        SD\n");
-    for (int s = 0; s<COUNTER_LAST; s++) {
-        statistic const * stat = &theStats[s];
-        fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str());
-    }
-}
-
-void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters)
-{
-    // We print all the counters even if they are zero.
-    // That makes it easier to slice them into a spreadsheet if you need to.
-    fprintf (statsOut, "\nCounter,                    Count\n");
-    for (int c = 0; c<COUNTER_LAST; c++) {
-        counter const * stat = &theCounters[c];
-        fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str());
-    }
-}
-
-void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) {
-    // sort by start time before printing
-    theEvents->sort();
-    for (int i = 0; i < theEvents->size(); i++) {
-        kmp_stats_event ev = theEvents->at(i);
-        rgb_color color = getEventColor(ev.getTimerName());
-        fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n",
-                gtid,
-                ev.getStart(),
-                ev.getStop(),
-                1.2 - (ev.getNestLevel() * 0.2),
-                color.r, color.g, color.b,
-                timeStat::name(ev.getTimerName())
-               );
-    }
-    return;
-}
-
-void kmp_stats_output_module::windupExplicitTimers()
-{
-    // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads
-    // and say "it's over".
-    // If the timer wasn't running, this won't record anything anyway.
-    kmp_stats_list::iterator it;
-    for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
-        kmp_stats_list* ptr = *it;
-        ptr->getPartitionedTimers()->windup();
-        for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
-            ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
-        }
+  int i;
+  int globalColorIndex = 0;
+  int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
+  for (i = 0; i < TIMER_LAST; i++) {
+    if (timeStat::logEvent((timer_e)i)) {
+      timerColorInfo[i] = globalColorArray[globalColorIndex];
+      globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
+    }
+  }
+  return;
+}
+
+void kmp_stats_output_module::printTimerStats(FILE *statsOut,
+                                              statistic const *theStats,
+                                              statistic const *totalStats) {
+  fprintf(statsOut, "Timer,                      SampleCount,    Min,      "
+                    "Mean,       Max,     Total,        SD\n");
+  for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
+    statistic const *stat = &theStats[s];
+    char tag = timeStat::noUnits(s) ? ' ' : 'T';
+
+    fprintf(statsOut, "%-28s, %s\n", timeStat::name(s),
+            stat->format(tag, true).c_str());
+  }
+  // Also print the Total_ versions of times.
+  for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
+    char tag = timeStat::noUnits(s) ? ' ' : 'T';
+    if (totalStats && !timeStat::noTotal(s))
+      fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s),
+              totalStats[s].format(tag, true).c_str());
+  }
+}
+
+void kmp_stats_output_module::printCounterStats(FILE *statsOut,
+                                                statistic const *theStats) {
+  fprintf(statsOut, "Counter,                 ThreadCount,    Min,      Mean,  "
+                    "     Max,     Total,        SD\n");
+  for (int s = 0; s < COUNTER_LAST; s++) {
+    statistic const *stat = &theStats[s];
+    fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)),
+            stat->format(' ', true).c_str());
+  }
+}
+
+void kmp_stats_output_module::printCounters(FILE *statsOut,
+                                            counter const *theCounters) {
+  // We print all the counters even if they are zero.
+  // That makes it easier to slice them into a spreadsheet if you need to.
+  fprintf(statsOut, "\nCounter,                    Count\n");
+  for (int c = 0; c < COUNTER_LAST; c++) {
+    counter const *stat = &theCounters[c];
+    fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(c)),
+            formatSI(stat->getValue(), 9, ' ').c_str());
+  }
+}
+
+void kmp_stats_output_module::printEvents(FILE *eventsOut,
+                                          kmp_stats_event_vector *theEvents,
+                                          int gtid) {
+  // sort by start time before printing
+  theEvents->sort();
+  for (int i = 0; i < theEvents->size(); i++) {
+    kmp_stats_event ev = theEvents->at(i);
+    rgb_color color = getEventColor(ev.getTimerName());
+    fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
+            ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2),
+            color.r, color.g, color.b, timeStat::name(ev.getTimerName()));
+  }
+  return;
+}
+
+void kmp_stats_output_module::windupExplicitTimers() {
+  // Wind up any explicit timers. We assume that it's fair at this point to just
+  // walk all the explcit timers in all threads and say "it's over".
+  // If the timer wasn't running, this won't record anything anyway.
+  kmp_stats_list::iterator it;
+  for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
+    kmp_stats_list *ptr = *it;
+    ptr->getPartitionedTimers()->windup();
+    for (int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) {
+      ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
     }
+  }
 }
 
 void kmp_stats_output_module::printPloticusFile() {
-    int i;
-    int size = __kmp_stats_list->size();
-    FILE* plotOut = fopen(plotFileName, "w+");
-
-    fprintf(plotOut, "#proc page\n"
-                     "   pagesize: 15 10\n"
-                     "   scale: 1.0\n\n");
-
-    fprintf(plotOut, "#proc getdata\n"
-                     "   file: %s\n\n",
-                     eventsFileName);
-
-    fprintf(plotOut, "#proc areadef\n"
-                     "   title: OpenMP Sampling Timeline\n"
-                     "   titledetails: align=center size=16\n"
-                     "   rectangle: 1 1 13 9\n"
-                     "   xautorange: datafield=2,3\n"
-                     "   yautorange: -1 %d\n\n",
-                     size);
-
-    fprintf(plotOut, "#proc xaxis\n"
-                     "   stubs: inc\n"
-                     "   stubdetails: size=12\n"
-                     "   label: Time (ticks)\n"
-                     "   labeldetails: size=14\n\n");
-
-    fprintf(plotOut, "#proc yaxis\n"
-                     "   stubs: inc 1\n"
-                     "   stubrange: 0 %d\n"
-                     "   stubdetails: size=12\n"
-                     "   label: Thread #\n"
-                     "   labeldetails: size=14\n\n",
-                     size-1);
-
-    fprintf(plotOut, "#proc bars\n"
-                     "   exactcolorfield: 5\n"
-                     "   axis: x\n"
-                     "   locfield: 1\n"
-                     "   segmentfields: 2 3\n"
-                     "   barwidthfield: 4\n\n");
-
-    // create legend entries corresponding to the timer color
-    for(i=0;i<TIMER_LAST;i++) {
-        if(timeStat::logEvent((timer_e)i)) {
-            rgb_color c = getEventColor((timer_e)i);
-            fprintf(plotOut, "#proc legendentry\n"
-                             "   sampletype: color\n"
-                             "   label: %s\n"
-                             "   details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
-                             timeStat::name((timer_e)i),
-                             c.r, c.g, c.b);
-
-        }
-    }
-
-    fprintf(plotOut, "#proc legend\n"
-                     "   format: down\n"
-                     "   location: max max\n\n");
-    fclose(plotOut);
-    return;
-}
-
-/*
- * Print some useful information about
- *    * the date and time this experiment ran.
- *    * the machine on which it ran.
- * We output all of this as stylised comments, though we may decide to parse some of it.
- */
-void kmp_stats_output_module::printHeaderInfo(FILE * statsOut)
-{
-    std::time_t now = std::time(0);
-    char buffer[40];
-    char hostName[80];
-
-    std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now));
-    fprintf (statsOut, "# Time of run: %s\n", &buffer[0]);
-    if (gethostname(&hostName[0], sizeof(hostName)) == 0)
-        fprintf (statsOut,"# Hostname: %s\n", &hostName[0]);
+  int i;
+  int size = __kmp_stats_list->size();
+  FILE *plotOut = fopen(plotFileName, "w+");
+
+  fprintf(plotOut, "#proc page\n"
+                   "   pagesize: 15 10\n"
+                   "   scale: 1.0\n\n");
+
+  fprintf(plotOut, "#proc getdata\n"
+                   "   file: %s\n\n",
+          eventsFileName);
+
+  fprintf(plotOut, "#proc areadef\n"
+                   "   title: OpenMP Sampling Timeline\n"
+                   "   titledetails: align=center size=16\n"
+                   "   rectangle: 1 1 13 9\n"
+                   "   xautorange: datafield=2,3\n"
+                   "   yautorange: -1 %d\n\n",
+          size);
+
+  fprintf(plotOut, "#proc xaxis\n"
+                   "   stubs: inc\n"
+                   "   stubdetails: size=12\n"
+                   "   label: Time (ticks)\n"
+                   "   labeldetails: size=14\n\n");
+
+  fprintf(plotOut, "#proc yaxis\n"
+                   "   stubs: inc 1\n"
+                   "   stubrange: 0 %d\n"
+                   "   stubdetails: size=12\n"
+                   "   label: Thread #\n"
+                   "   labeldetails: size=14\n\n",
+          size - 1);
+
+  fprintf(plotOut, "#proc bars\n"
+                   "   exactcolorfield: 5\n"
+                   "   axis: x\n"
+                   "   locfield: 1\n"
+                   "   segmentfields: 2 3\n"
+                   "   barwidthfield: 4\n\n");
+
+  // create legend entries corresponding to the timer color
+  for (i = 0; i < TIMER_LAST; i++) {
+    if (timeStat::logEvent((timer_e)i)) {
+      rgb_color c = getEventColor((timer_e)i);
+      fprintf(plotOut, "#proc legendentry\n"
+                       "   sampletype: color\n"
+                       "   label: %s\n"
+                       "   details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
+              timeStat::name((timer_e)i), c.r, c.g, c.b);
+    }
+  }
+
+  fprintf(plotOut, "#proc legend\n"
+                   "   format: down\n"
+                   "   location: max max\n\n");
+  fclose(plotOut);
+  return;
+}
+
+/* Print some useful information about
+   * the date and time this experiment ran.
+   * the machine on which it ran.
+   We output all of this as stylised comments, though we may decide to parse
+   some of it. */
+void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
+  std::time_t now = std::time(0);
+  char buffer[40];
+  char hostName[80];
+
+  std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now));
+  fprintf(statsOut, "# Time of run: %s\n", &buffer[0]);
+  if (gethostname(&hostName[0], sizeof(hostName)) == 0)
+    fprintf(statsOut, "# Hostname: %s\n", &hostName[0]);
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
-    fprintf (statsOut, "# CPU:  %s\n", &__kmp_cpuinfo.name[0]);
-    fprintf (statsOut, "# Family: %d, Model: %d, Stepping: %d\n", __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
-    if (__kmp_cpuinfo.frequency == 0)
-        fprintf (statsOut, "# Nominal frequency: Unknown\n");
-    else
-        fprintf (statsOut, "# Nominal frequency: %sz\n", formatSI(double(__kmp_cpuinfo.frequency),9,'H').c_str());
+  fprintf(statsOut, "# CPU:  %s\n", &__kmp_cpuinfo.name[0]);
+  fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n",
+          __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
+  if (__kmp_cpuinfo.frequency == 0)
+    fprintf(statsOut, "# Nominal frequency: Unknown\n");
+  else
+    fprintf(statsOut, "# Nominal frequency: %sz\n",
+            formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str());
 #endif
 }
 
-void kmp_stats_output_module::outputStats(const char* heading)
-{
-    // Stop all the explicit timers in all threads
-    // Do this before declaring the local statistics because thay have constructors so will take time to create.
-    windupExplicitTimers();
-
-    statistic allStats[TIMER_LAST];
-    statistic totalStats[TIMER_LAST];           /* Synthesized, cross threads versions of normal timer stats */
-    statistic allCounters[COUNTER_LAST];
-
-    FILE * statsOut = !outputFileName.empty() ? fopen (outputFileName.c_str(), "a+") : stderr;
-    if (!statsOut)
-        statsOut = stderr;
-
-    FILE * eventsOut;
-    if (eventPrintingEnabled()) {
-        eventsOut = fopen(eventsFileName, "w+");
-    }
-
-    printHeaderInfo (statsOut);
-    fprintf(statsOut, "%s\n",heading);
-    // Accumulate across threads.
-    kmp_stats_list::iterator it;
-    for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
-        int t = (*it)->getGtid();
-        // Output per thread stats if requested.
-        if (printPerThreadFlag) {
-            fprintf (statsOut, "Thread %d\n", t);
-            printTimerStats (statsOut, (*it)->getTimers(), 0);
-            printCounters   (statsOut, (*it)->getCounters());
-            fprintf (statsOut,"\n");
-        }
-        // Output per thread events if requested.
-        if (eventPrintingEnabled()) {
-            kmp_stats_event_vector events = (*it)->getEventVector();
-            printEvents(eventsOut, &events, t);
-        }
-
-        // Accumulate timers.
-        for (timer_e s = timer_e(0); s<TIMER_LAST; s = timer_e(s+1)) {
-            // See if we should ignore this timer when aggregating
-            if ((timeStat::masterOnly(s) && (t != 0)) || // Timer is only valid on the master and this thread is a worker
-                (timeStat::workerOnly(s) && (t == 0))    // Timer is only valid on a worker and this thread is the master
-               )
-            {
-                continue;
-            }
-
-            statistic * threadStat = (*it)->getTimer(s);
-            allStats[s] += *threadStat;
-
-            // Add Total stats for timers that are valid in more than one thread
-            if (!timeStat::noTotal(s))
-                totalStats[s].addSample(threadStat->getTotal());
-        }
-
-        // Accumulate counters.
-        for (counter_e c = counter_e(0); c<COUNTER_LAST; c = counter_e(c+1)) {
-            if (counter::masterOnly(c) && t != 0)
-                continue;
-            allCounters[c].addSample ((*it)->getCounter(c)->getValue());
-        }
+void kmp_stats_output_module::outputStats(const char *heading) {
+  // Stop all the explicit timers in all threads
+  // Do this before declaring the local statistics because thay have
+  // constructors so will take time to create.
+  windupExplicitTimers();
+
+  statistic allStats[TIMER_LAST];
+  statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of
+                                       normal timer stats */
+  statistic allCounters[COUNTER_LAST];
+
+  FILE *statsOut =
+      !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr;
+  if (!statsOut)
+    statsOut = stderr;
+
+  FILE *eventsOut;
+  if (eventPrintingEnabled()) {
+    eventsOut = fopen(eventsFileName, "w+");
+  }
+
+  printHeaderInfo(statsOut);
+  fprintf(statsOut, "%s\n", heading);
+  // Accumulate across threads.
+  kmp_stats_list::iterator it;
+  for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
+    int t = (*it)->getGtid();
+    // Output per thread stats if requested.
+    if (printPerThreadFlag) {
+      fprintf(statsOut, "Thread %d\n", t);
+      printTimerStats(statsOut, (*it)->getTimers(), 0);
+      printCounters(statsOut, (*it)->getCounters());
+      fprintf(statsOut, "\n");
     }
-
+    // Output per thread events if requested.
     if (eventPrintingEnabled()) {
-        printPloticusFile();
-        fclose(eventsOut);
+      kmp_stats_event_vector events = (*it)->getEventVector();
+      printEvents(eventsOut, &events, t);
     }
 
-    fprintf (statsOut, "Aggregate for all threads\n");
-    printTimerStats (statsOut, &allStats[0], &totalStats[0]);
-    fprintf (statsOut, "\n");
-    printCounterStats (statsOut, &allCounters[0]);
+    // Accumulate timers.
+    for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
+      // See if we should ignore this timer when aggregating
+      if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master
+          // and this thread is worker
+          (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker
+          // and this thread is the master
+          ) {
+        continue;
+      }
+
+      statistic *threadStat = (*it)->getTimer(s);
+      allStats[s] += *threadStat;
+
+      // Add Total stats for timers that are valid in more than one thread
+      if (!timeStat::noTotal(s))
+        totalStats[s].addSample(threadStat->getTotal());
+    }
+
+    // Accumulate counters.
+    for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
+      if (counter::masterOnly(c) && t != 0)
+        continue;
+      allCounters[c].addSample((*it)->getCounter(c)->getValue());
+    }
+  }
+
+  if (eventPrintingEnabled()) {
+    printPloticusFile();
+    fclose(eventsOut);
+  }
+
+  fprintf(statsOut, "Aggregate for all threads\n");
+  printTimerStats(statsOut, &allStats[0], &totalStats[0]);
+  fprintf(statsOut, "\n");
+  printCounterStats(statsOut, &allCounters[0]);
 
-    if (statsOut != stderr)
-        fclose(statsOut);
+  if (statsOut != stderr)
+    fclose(statsOut);
 }
 
-/* ************************************************** */
 /* *************  exported C functions ************** */
 
-// no name mangling for these functions, we want the c files to be able to get at these functions
+// no name mangling for these functions, we want the c files to be able to get
+// at these functions
 extern "C" {
 
-void __kmp_reset_stats()
-{
-    kmp_stats_list::iterator it;
-    for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
-        timeStat * timers     = (*it)->getTimers();
-        counter * counters    = (*it)->getCounters();
-        explicitTimer * eTimers = (*it)->getExplicitTimers();
-
-        for (int t = 0; t<TIMER_LAST; t++)
-            timers[t].reset();
+void __kmp_reset_stats() {
+  kmp_stats_list::iterator it;
+  for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
+    timeStat *timers = (*it)->getTimers();
+    counter *counters = (*it)->getCounters();
+    explicitTimer *eTimers = (*it)->getExplicitTimers();
 
-        for (int c = 0; c<COUNTER_LAST; c++)
-            counters[c].reset();
+    for (int t = 0; t < TIMER_LAST; t++)
+      timers[t].reset();
 
-        for (int t=0; t<EXPLICIT_TIMER_LAST; t++)
-            eTimers[t].reset();
+    for (int c = 0; c < COUNTER_LAST; c++)
+      counters[c].reset();
 
-        // reset the event vector so all previous events are "erased"
-        (*it)->resetEventVector();
-    }
+    for (int t = 0; t < EXPLICIT_TIMER_LAST; t++)
+      eTimers[t].reset();
+
+    // reset the event vector so all previous events are "erased"
+    (*it)->resetEventVector();
+  }
 }
 
-// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
-void __kmp_output_stats(const char * heading)
-{
-    __kmp_stats_global_output->outputStats(heading);
-    __kmp_reset_stats();
-}
-
-void __kmp_accumulate_stats_at_exit(void)
-{
-    // Only do this once.
-    if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
-        return;
-
-    __kmp_output_stats("Statistics on exit");
-}
-
-void __kmp_stats_init(void)
-{
-    __kmp_init_tas_lock( & __kmp_stats_lock );
-    __kmp_stats_start_time = tsc_tick_count::now();
-    __kmp_stats_global_output = new kmp_stats_output_module();
-    __kmp_stats_list = new kmp_stats_list();
-}
-
-void __kmp_stats_fini(void)
-{
-    __kmp_accumulate_stats_at_exit();
-    __kmp_stats_list->deallocate();
-    delete __kmp_stats_global_output;
-    delete __kmp_stats_list;
+// This function will reset all stats and stop all threads' explicit timers if
+// they haven't been stopped already.
+void __kmp_output_stats(const char *heading) {
+  __kmp_stats_global_output->outputStats(heading);
+  __kmp_reset_stats();
 }
 
-} // extern "C"
+void __kmp_accumulate_stats_at_exit(void) {
+  // Only do this once.
+  if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
+    return;
+
+  __kmp_output_stats("Statistics on exit");
+}
 
+void __kmp_stats_init(void) {
+  __kmp_init_tas_lock(&__kmp_stats_lock);
+  __kmp_stats_start_time = tsc_tick_count::now();
+  __kmp_stats_global_output = new kmp_stats_output_module();
+  __kmp_stats_list = new kmp_stats_list();
+}
+
+void __kmp_stats_fini(void) {
+  __kmp_accumulate_stats_at_exit();
+  __kmp_stats_list->deallocate();
+  delete __kmp_stats_global_output;
+  delete __kmp_stats_list;
+}
+
+} // extern "C"

Modified: openmp/trunk/runtime/src/kmp_stats.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats.h (original)
+++ openmp/trunk/runtime/src/kmp_stats.h Fri May 12 13:01:32 2017
@@ -15,28 +15,29 @@
 //
 //===----------------------------------------------------------------------===//
 
+
 #include "kmp_config.h"
 
 #if KMP_STATS_ENABLED
-/*
- * Statistics accumulator.
- * Accumulates number of samples and computes min, max, mean, standard deviation on the fly.
- *
- * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
+/* Statistics accumulator.
+   Accumulates number of samples and computes min, max, mean, standard deviation
+   on the fly.
+
+   Online variance calculation algorithm from
+   http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
  */
 
+#include "kmp_stats_timing.h"
 #include <limits>
 #include <math.h>
-#include <vector>
-#include <string>
-#include <stdint.h>
 #include <new> // placement new
-#include "kmp_stats_timing.h"
+#include <stdint.h>
+#include <string>
+#include <vector>
 
-/*
- * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and
- * are intended for the runtime library developer.
- */
+/* Enable developer statistics here if you want them. They are more detailed
+   than is useful for application characterisation and are intended for the
+   runtime library developer. */
 // #define KMP_DEVELOPER_STATS 1
 
 /*!
@@ -45,11 +46,13 @@
  *
  */
 enum stats_flags_e {
-    noTotal      = 1<<0,     //!< do not show a TOTAL_aggregation for this statistic
-    onlyInMaster = 1<<1,     //!< statistic is valid only for master
-    noUnits      = 1<<2,     //!< statistic doesn't need units printed next to it in output
-    notInMaster  = 1<<3,     //!< statistic is valid only for non-master threads
-    logEvent     = 1<<4      //!< statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers)
+  noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic
+  onlyInMaster = 1 << 1, //!< statistic is valid only for master
+  noUnits =
+      1 << 2, //!< statistic doesn't need units printed next to it in output
+  notInMaster = 1 << 3, //!< statistic is valid only for non-master threads
+  logEvent = 1 << 4 //!< statistic can be logged on the event timeline when
+  //! KMP_STATS_EVENTS is on (valid only for timers)
 };
 
 /*!
@@ -58,123 +61,143 @@ enum stats_flags_e {
  *
  */
 enum stats_state_e {
-    IDLE,
-    SERIAL_REGION,
-    FORK_JOIN_BARRIER,
-    PLAIN_BARRIER,
-    TASKWAIT,
-    TASKYIELD,
-    TASKGROUP,
-    IMPLICIT_TASK,
-    EXPLICIT_TASK
+  IDLE,
+  SERIAL_REGION,
+  FORK_JOIN_BARRIER,
+  PLAIN_BARRIER,
+  TASKWAIT,
+  TASKYIELD,
+  TASKGROUP,
+  IMPLICIT_TASK,
+  EXPLICIT_TASK
 };
 
 /*!
  * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h
  *
- * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg)
+ * @param macro a user defined macro that takes three arguments -
+ * macro(COUNTER_NAME, flags, arg)
  * @param arg a user defined argument to send to the user defined macro
  *
- * \details A counter counts the occurrence of some event.
- * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread
- * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement).
- * The min,mean,max are therefore the values for the threads.
- * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do.
- * All of the tables and printing is generated from this macro.
+ * \details A counter counts the occurrence of some event. Each thread
+ * accumulates its own count, at the end of execution the counts are aggregated
+ * treating each thread as a separate measurement. (Unless onlyInMaster is set,
+ * in which case there's only a single measurement). The min,mean,max are
+ * therefore the values for the threads. Adding the counter here and then
+ * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you
+ * need to do. All of the tables and printing is generated from this macro.
  * Format is "macro(name, flags, arg)"
  *
  * @ingroup STATS_GATHERING
  */
-#define KMP_FOREACH_COUNTER(macro, arg)                         \
-    macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \
-    macro (OMP_NESTED_PARALLEL, 0, arg)                         \
-    macro (OMP_FOR_static, 0, arg)                              \
-    macro (OMP_FOR_static_steal, 0, arg)                        \
-    macro (OMP_FOR_dynamic, 0, arg)                             \
-    macro (OMP_DISTRIBUTE, 0, arg)                              \
-    macro (OMP_BARRIER, 0, arg)                                 \
-    macro (OMP_CRITICAL,0, arg)                                 \
-    macro (OMP_SINGLE, 0, arg)                                  \
-    macro (OMP_MASTER, 0, arg)                                  \
-    macro (OMP_TEAMS, 0, arg)                                   \
-    macro (OMP_set_lock, 0, arg)                                \
-    macro (OMP_test_lock, 0, arg)                               \
-    macro (REDUCE_wait, 0, arg)                                 \
-    macro (REDUCE_nowait, 0, arg)                               \
-    macro (OMP_TASKYIELD, 0, arg)                               \
-    macro (OMP_TASKLOOP, 0, arg)                                \
-    macro (TASK_executed, 0, arg)                               \
-    macro (TASK_cancelled, 0, arg)                              \
-    macro (TASK_stolen, 0, arg)
+// clang-format off
+#define KMP_FOREACH_COUNTER(macro, arg)                                        \
+  macro(OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal,    \
+        arg) macro(OMP_NESTED_PARALLEL, 0, arg) macro(OMP_FOR_static, 0, arg)  \
+      macro(OMP_FOR_static_steal, 0, arg) macro(OMP_FOR_dynamic, 0, arg)       \
+          macro(OMP_DISTRIBUTE, 0, arg) macro(OMP_BARRIER, 0, arg)             \
+              macro(OMP_CRITICAL, 0, arg) macro(OMP_SINGLE, 0, arg)            \
+                  macro(OMP_MASTER, 0, arg) macro(OMP_TEAMS, 0, arg)           \
+                      macro(OMP_set_lock, 0, arg) macro(OMP_test_lock, 0, arg) \
+                          macro(REDUCE_wait, 0, arg)                           \
+                              macro(REDUCE_nowait, 0, arg)                     \
+                                  macro(OMP_TASKYIELD, 0, arg)                 \
+                                      macro(OMP_TASKLOOP, 0, arg)              \
+                                          macro(TASK_executed, 0, arg)         \
+                                              macro(TASK_cancelled, 0, arg)    \
+                                                  macro(TASK_stolen, 0, arg)
+// clang-format on
 
 /*!
  * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h
  *
- * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg)
+ * @param macro a user defined macro that takes three arguments -
+ * macro(TIMER_NAME, flags, arg)
  * @param arg a user defined argument to send to the user defined macro
  *
- * \details A timer collects multiple samples of some count in each thread and then finally aggregates alll of the samples from all of the threads.
- * For most timers the printing code also provides an aggregation over the thread totals. These are printed as TOTAL_foo.
- * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork"
- * as well).
- * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level.
- * Format is "macro(name, flags, arg)"
+ * \details A timer collects multiple samples of some count in each thread and
+ * then finally aggregates alll of the samples from all of the threads. For most
+ * timers the printing code also provides an aggregation over the thread totals.
+ * These are printed as TOTAL_foo. The count is normally a time (in ticks),
+ * hence the name "timer". (But can be any value, so we use this for "number of
+ * arguments passed to fork" as well). For timers the threads are not
+ * significant, it's the individual observations that count, so the statistics
+ * are at that level. Format is "macro(name, flags, arg)"
  *
  * @ingroup STATS_GATHERING2
  */
-#define KMP_FOREACH_TIMER(macro, arg)                              \
-    macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg)   \
-    macro (FOR_static_scheduling, 0, arg)                          \
-    macro (FOR_dynamic_scheduling, 0, arg)                         \
-    macro (OMP_critical,  0, arg)                                  \
-    macro (OMP_critical_wait,  0, arg)                             \
-    macro (OMP_single,    0, arg)                                  \
-    macro (OMP_master,    0, arg)                                  \
-    macro (OMP_idle, stats_flags_e::logEvent, arg)                 \
-    macro (OMP_plain_barrier, stats_flags_e::logEvent, arg)        \
-    macro (OMP_fork_barrier, stats_flags_e::logEvent, arg)         \
-    macro (OMP_join_barrier, stats_flags_e::logEvent, arg)         \
-    macro (OMP_parallel, stats_flags_e::logEvent, arg)             \
-    macro (OMP_task_immediate, 0, arg)                             \
-    macro (OMP_task_taskwait, 0, arg)                              \
-    macro (OMP_task_taskyield, 0, arg)                             \
-    macro (OMP_task_taskgroup, 0, arg)                             \
-    macro (OMP_task_join_bar, 0, arg)                              \
-    macro (OMP_task_plain_bar, 0, arg)                             \
-    macro (OMP_serial, stats_flags_e::logEvent, arg)               \
-    macro (OMP_taskloop_scheduling, 0, arg)                        \
-    macro (OMP_set_numthreads,    stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
-    macro (OMP_PARALLEL_args,     stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
-    macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
-    macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
-    macro (FOR_static_steal_stolen,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
-    macro (FOR_static_steal_chunks,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
+// clang-format off
+#define KMP_FOREACH_TIMER(macro, arg)                                          \
+    macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg)               \
+    macro (FOR_static_scheduling, 0, arg)                                      \
+    macro (FOR_dynamic_scheduling, 0, arg)                                     \
+    macro (OMP_critical, 0, arg)                                               \
+    macro (OMP_critical_wait, 0, arg)                                          \
+    macro (OMP_single, 0, arg)                                                 \
+    macro (OMP_master, 0, arg)                                                 \
+    macro (OMP_idle, stats_flags_e::logEvent, arg)                             \
+    macro (OMP_plain_barrier, stats_flags_e::logEvent, arg)                    \
+    macro (OMP_fork_barrier, stats_flags_e::logEvent, arg)                     \
+    macro (OMP_join_barrier, stats_flags_e::logEvent, arg)                     \
+    macro (OMP_parallel, stats_flags_e::logEvent, arg)                         \
+    macro (OMP_task_immediate, 0, arg)                                         \
+    macro (OMP_task_taskwait, 0, arg)                                          \
+    macro (OMP_task_taskyield, 0, arg)                                         \
+    macro (OMP_task_taskgroup, 0, arg)                                         \
+    macro (OMP_task_join_bar, 0, arg)                                          \
+    macro (OMP_task_plain_bar, 0, arg)                                         \
+    macro (OMP_serial, stats_flags_e::logEvent, arg)                           \
+    macro (OMP_taskloop_scheduling, 0, arg)                                    \
+    macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\
+           arg)                                                                \
+    macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
+           arg)                                                                \
+    macro (FOR_static_iterations,                                              \
+           stats_flags_e::noUnits | stats_flags_e::noTotal, arg)               \
+    macro (FOR_dynamic_iterations,                                             \
+           stats_flags_e::noUnits | stats_flags_e::noTotal, arg)               \
+    macro (FOR_static_steal_stolen,                                            \
+           stats_flags_e::noUnits | stats_flags_e::noTotal, arg)               \
+    macro (FOR_static_steal_chunks,                                            \
+           stats_flags_e::noUnits | stats_flags_e::noTotal, arg)               \
     KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+// clang-format on
 
-
-// OMP_start_end          -- Time from when OpenMP is initialized until the stats are printed at exit
+// OMP_start_end          -- Time from when OpenMP is initialized until the
+//                           stats are printed at exit
 // OMP_serial             -- Thread zero time executing serial code
-// OMP_work               -- Elapsed time in code dispatched by a fork (measured in the thread)
+// OMP_work               -- Elapsed time in code dispatched by a fork (measured
+//                           in the thread)
 // OMP_barrier            -- Time at "real" barriers (includes task time)
 // FOR_static_scheduling  -- Time spent doing scheduling for a static "for"
 // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
-// OMP_idle               -- Worker threads time spent waiting for inclusion in a parallel region
+// OMP_idle               -- Worker threads time spent waiting for inclusion in
+//                           a parallel region
 // OMP_plain_barrier      -- Time spent in a barrier construct
-// OMP_fork_join_barrier  -- Time spent in a the fork-join barrier surrounding a parallel region
+// OMP_fork_join_barrier  -- Time spent in a the fork-join barrier surrounding a
+//                           parallel region
 // OMP_parallel           -- Time spent inside a parallel construct
 // OMP_task_immediate     -- Time spent executing non-deferred tasks
-// OMP_task_taskwait      -- Time spent executing tasks inside a taskwait construct
-// OMP_task_taskyield     -- Time spent executing tasks inside a taskyield construct
-// OMP_task_taskgroup     -- Time spent executing tasks inside a taskygroup construct
+// OMP_task_taskwait      -- Time spent executing tasks inside a taskwait
+//                           construct
+// OMP_task_taskyield     -- Time spent executing tasks inside a taskyield
+//                           construct
+// OMP_task_taskgroup     -- Time spent executing tasks inside a taskygroup
+//                           construct
 // OMP_task_join_bar      -- Time spent executing tasks inside a join barrier
-// OMP_task_plain_bar     -- Time spent executing tasks inside a barrier construct
+// OMP_task_plain_bar     -- Time spent executing tasks inside a barrier
+//                           construct
 // OMP_single             -- Time spent executing a "single" region
 // OMP_master             -- Time spent executing a "master" region
 // OMP_set_numthreads     -- Values passed to omp_set_num_threads
 // OMP_PARALLEL_args      -- Number of arguments passed to a parallel region
-// FOR_static_iterations  -- Number of available parallel chunks of work in a static for
-// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for
-//                           Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2.
+// FOR_static_iterations  -- Number of available parallel chunks of work in a
+//                           static for
+// FOR_dynamic_iterations -- Number of available parallel chunks of work in a
+//                           dynamic for
+//                           Both adjust for any chunking, so if there were an
+//                           iteration count of 20 but a chunk size of 10, we'd
+//                           record 2.
 
 #if (KMP_DEVELOPER_STATS)
 // Timers which are of interest to runtime library developers, not end users.
@@ -192,227 +215,239 @@ enum stats_state_e {
 // KMP_tree_release       -- time in __kmp_tree_barrier_release
 // KMP_hyper_gather       -- time in __kmp_hyper_barrier_gather
 // KMP_hyper_release      -- time in __kmp_hyper_barrier_release
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
-    macro (KMP_fork_call, 0, arg)                \
-    macro (KMP_join_call, 0, arg)                \
-    macro (KMP_end_split_barrier, 0, arg)        \
-    macro (KMP_hier_gather, 0, arg)              \
-    macro (KMP_hier_release, 0, arg)             \
-    macro (KMP_hyper_gather, 0, arg)             \
-    macro (KMP_hyper_release, 0, arg)            \
-    macro (KMP_linear_gather, 0, arg)            \
-    macro (KMP_linear_release, 0, arg)           \
-    macro (KMP_tree_gather, 0, arg)              \
-    macro (KMP_tree_release, 0, arg)             \
-    macro (USER_resume, 0, arg)                  \
-    macro (USER_suspend, 0, arg)                 \
-    macro (KMP_allocate_team, 0, arg)            \
-    macro (KMP_setup_icv_copy, 0, arg)           \
-    macro (USER_icv_copy, 0, arg)
+#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)                                \
+  macro(KMP_fork_call, 0, arg) macro(KMP_join_call, 0, arg) macro(             \
+      KMP_end_split_barrier, 0, arg) macro(KMP_hier_gather, 0, arg)            \
+      macro(KMP_hier_release, 0, arg) macro(KMP_hyper_gather, 0, arg)          \
+          macro(KMP_hyper_release, 0, arg) macro(KMP_linear_gather, 0, arg)    \
+              macro(KMP_linear_release, 0, arg) macro(KMP_tree_gather, 0, arg) \
+                  macro(KMP_tree_release, 0, arg) macro(USER_resume, 0, arg)   \
+                      macro(USER_suspend, 0, arg)                              \
+                          macro(KMP_allocate_team, 0, arg)                     \
+                              macro(KMP_setup_icv_copy, 0, arg)                \
+                                  macro(USER_icv_copy, 0, arg)
 #else
-# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
+#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
 #endif
 
 /*!
  * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
  *
- * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg)
+ * @param macro a user defined macro that takes three arguments -
+ * macro(TIMER_NAME, flags, arg)
  * @param arg a user defined argument to send to the user defined macro
  *
- * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN!
+ * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE
+ * BAD THINGS WILL HAPPEN!
  *
- * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics).
- * We allocate these on a per-thread basis, and explicitly start and stop them.
- * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't
- * need to be defined here.
- * The name here should be the same as that of a timer above.
+ * \details Explicit timers are ones where we need to allocate a timer itself
+ * (as well as the accumulated timing statistics). We allocate these on a
+ * per-thread basis, and explicitly start and stop them. Block timers just
+ * allocate the timer itself on the stack, and use the destructor to notice
+ * block exit; they don't need to be defined here. The name here should be the
+ * same as that of a timer above.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \
-    KMP_FOREACH_TIMER(macro, arg)
+#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
 
-#define ENUMERATE(name,ignore,prefix) prefix##name,
-enum timer_e {
-    KMP_FOREACH_TIMER(ENUMERATE, TIMER_)
-    TIMER_LAST
-};
+#define ENUMERATE(name, ignore, prefix) prefix##name,
+enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
 
 enum explicit_timer_e {
-    KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_)
-    EXPLICIT_TIMER_LAST
+  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
 };
 
-enum counter_e {
-    KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_)
-    COUNTER_LAST
-};
+enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
 #undef ENUMERATE
 
 class timerPair {
-    explicit_timer_e timer_index;
-    timer_e timer;
- public:
-    timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
-    inline explicit_timer_e get_index() const { return timer_index; }
-    inline timer_e get_timer() const { return timer; }
-    bool operator==(const timerPair & rhs) {
-        return this->get_index() == rhs.get_index();
-    }
-    bool operator!=(const timerPair & rhs) {
-        return !(*this == rhs);
-    }
-};
+  explicit_timer_e timer_index;
+  timer_e timer;
 
-class statistic
-{
-    double   minVal;
-    double   maxVal;
-    double   meanVal;
-    double   m2;
-    uint64_t sampleCount;
-
- public:
-    statistic() { reset(); }
-    statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {}
-
-    double   getMin()   const { return minVal; }
-    double   getMean()  const { return meanVal; }
-    double   getMax()   const { return maxVal; }
-    uint64_t getCount() const { return sampleCount; }
-    double   getSD()    const { return sqrt(m2/sampleCount); }
-    double   getTotal() const { return sampleCount*meanVal; }
-
-    void reset()
-    {
-        minVal =  std::numeric_limits<double>::max();
-        maxVal = -std::numeric_limits<double>::max();
-        meanVal= 0.0;
-        m2     = 0.0;
-        sampleCount = 0;
-    }
-    void addSample(double sample);
-    void scale    (double factor);
-    void scaleDown(double f)  { scale (1./f); }
-    statistic & operator+= (statistic const & other);
-
-    std::string format(char unit, bool total=false) const;
-};
-
-struct statInfo
-{
-    const char * name;
-    uint32_t     flags;
-};
-
-class timeStat : public statistic
-{
-    static statInfo timerInfo[];
-
- public:
-    timeStat() : statistic() {}
-    static const char * name(timer_e e) { return timerInfo[e].name; }
-    static bool  noTotal    (timer_e e) { return timerInfo[e].flags & stats_flags_e::noTotal;      }
-    static bool  masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; }
-    static bool  workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster;  }
-    static bool  noUnits    (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits;      }
-    static bool  logEvent   (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent;     }
-    static void  clearEventFlags()      {
-        for(int i=0;i<TIMER_LAST;i++) {
-            timerInfo[i].flags &= (~(stats_flags_e::logEvent));
-        }
+public:
+  timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
+  inline explicit_timer_e get_index() const { return timer_index; }
+  inline timer_e get_timer() const { return timer; }
+  bool operator==(const timerPair &rhs) {
+    return this->get_index() == rhs.get_index();
+  }
+  bool operator!=(const timerPair &rhs) { return !(*this == rhs); }
+};
+
+class statistic {
+  double minVal;
+  double maxVal;
+  double meanVal;
+  double m2;
+  uint64_t sampleCount;
+
+public:
+  statistic() { reset(); }
+  statistic(statistic const &o)
+      : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
+        sampleCount(o.sampleCount) {}
+
+  double getMin() const { return minVal; }
+  double getMean() const { return meanVal; }
+  double getMax() const { return maxVal; }
+  uint64_t getCount() const { return sampleCount; }
+  double getSD() const { return sqrt(m2 / sampleCount); }
+  double getTotal() const { return sampleCount * meanVal; }
+
+  void reset() {
+    minVal = std::numeric_limits<double>::max();
+    maxVal = -std::numeric_limits<double>::max();
+    meanVal = 0.0;
+    m2 = 0.0;
+    sampleCount = 0;
+  }
+  void addSample(double sample);
+  void scale(double factor);
+  void scaleDown(double f) { scale(1. / f); }
+  statistic &operator+=(statistic const &other);
+
+  std::string format(char unit, bool total = false) const;
+};
+
+struct statInfo {
+  const char *name;
+  uint32_t flags;
+};
+
+class timeStat : public statistic {
+  static statInfo timerInfo[];
+
+public:
+  timeStat() : statistic() {}
+  static const char *name(timer_e e) { return timerInfo[e].name; }
+  static bool noTotal(timer_e e) {
+    return timerInfo[e].flags & stats_flags_e::noTotal;
+  }
+  static bool masterOnly(timer_e e) {
+    return timerInfo[e].flags & stats_flags_e::onlyInMaster;
+  }
+  static bool workerOnly(timer_e e) {
+    return timerInfo[e].flags & stats_flags_e::notInMaster;
+  }
+  static bool noUnits(timer_e e) {
+    return timerInfo[e].flags & stats_flags_e::noUnits;
+  }
+  static bool logEvent(timer_e e) {
+    return timerInfo[e].flags & stats_flags_e::logEvent;
+  }
+  static void clearEventFlags() {
+    for (int i = 0; i < TIMER_LAST; i++) {
+      timerInfo[i].flags &= (~(stats_flags_e::logEvent));
     }
+  }
 };
 
 // Where we need explicitly to start and end the timer, this version can be used
-// Since these timers normally aren't nicely scoped, so don't have a good place to live
-// on the stack of the thread, they're more work to use.
-class explicitTimer
-{
-    timeStat * stat;
-    tsc_tick_count startTime;
-    tsc_tick_count pauseStartTime;
-    tsc_tick_count::tsc_interval_t totalPauseTime;
-
- public:
-    explicitTimer () : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() { }
-    explicitTimer (timeStat * s) : stat(s), startTime(), pauseStartTime(0), totalPauseTime() { }
-
-    void setStat (timeStat *s) { stat = s; }
-    void start(timer_e timerEnumValue);
-    void pause() { pauseStartTime = tsc_tick_count::now(); }
-    void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
-    void stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr = nullptr);
-    void reset() { startTime = 0; pauseStartTime = 0; totalPauseTime = 0; }
+// Since these timers normally aren't nicely scoped, so don't have a good place
+// to live on the stack of the thread, they're more work to use.
+class explicitTimer {
+  timeStat *stat;
+  tsc_tick_count startTime;
+  tsc_tick_count pauseStartTime;
+  tsc_tick_count::tsc_interval_t totalPauseTime;
+
+public:
+  explicitTimer()
+      : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
+  explicitTimer(timeStat *s)
+      : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
+
+  void setStat(timeStat *s) { stat = s; }
+  void start(timer_e timerEnumValue);
+  void pause() { pauseStartTime = tsc_tick_count::now(); }
+  void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
+  void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr = nullptr);
+  void reset() {
+    startTime = 0;
+    pauseStartTime = 0;
+    totalPauseTime = 0;
+  }
 };
 
 // Where all you need is to time a block, this is enough.
 // (It avoids the need to have an explicit end, leaving the scope suffices.)
-class blockTimer : public explicitTimer
-{
-    timer_e timerEnumValue;
- public:
-    blockTimer (timeStat * s, timer_e newTimerEnumValue) : timerEnumValue(newTimerEnumValue), explicitTimer(s) { start(timerEnumValue); }
-    ~blockTimer() { stop(timerEnumValue); }
+class blockTimer : public explicitTimer {
+  timer_e timerEnumValue;
+
+public:
+  blockTimer(timeStat *s, timer_e newTimerEnumValue)
+      : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
+    start(timerEnumValue);
+  }
+  ~blockTimer() { stop(timerEnumValue); }
 };
 
 // Where you need to partition a threads clock ticks into separate states
 // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
-//   DOING_NOTHING would render these conditions:
-//   time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
-//   No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice versa
-class partitionedTimers
-{
- private:
-    explicitTimer* timers[EXPLICIT_TIMER_LAST+1];
-    std::vector<timerPair> timer_stack;
- public:
-    partitionedTimers();
-    void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer);
-    void init(timerPair timer_index);
-    void push(timerPair timer_index);
-    void pop();
-    void windup();
+// DOING_NOTHING would render these conditions:
+// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
+// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
+// versa
+class partitionedTimers {
+private:
+  explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
+  std::vector<timerPair> timer_stack;
+
+public:
+  partitionedTimers();
+  void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
+  void init(timerPair timer_index);
+  void push(timerPair timer_index);
+  void pop();
+  void windup();
 };
 
 // Special wrapper around the partioned timers to aid timing code blocks
 // It avoids the need to have an explicit end, leaving the scope suffices.
-class blockPartitionedTimer
-{
-    partitionedTimers* part_timers;
-    timerPair timer_pair;
- public:
-    blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); }
-   ~blockPartitionedTimer() { part_timers->pop(); }
-};
-
-// Special wrapper around the thread state to aid in keeping state in code blocks
-// It avoids the need to have an explicit end, leaving the scope suffices.
-class blockThreadState
-{
-    stats_state_e* state_pointer;
-    stats_state_e  old_state;
- public:
-    blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
-        *state_pointer = new_state;
-    }
-   ~blockThreadState() { *state_pointer = old_state;  }
+class blockPartitionedTimer {
+  partitionedTimers *part_timers;
+  timerPair timer_pair;
+
+public:
+  blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
+      : part_timers(pt), timer_pair(tp) {
+    part_timers->push(timer_pair);
+  }
+  ~blockPartitionedTimer() { part_timers->pop(); }
+};
+
+// Special wrapper around the thread state to aid in keeping state in code
+// blocks It avoids the need to have an explicit end, leaving the scope
+// suffices.
+class blockThreadState {
+  stats_state_e *state_pointer;
+  stats_state_e old_state;
+
+public:
+  blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
+      : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
+    *state_pointer = new_state;
+  }
+  ~blockThreadState() { *state_pointer = old_state; }
 };
 
 // If all you want is a count, then you can use this...
-// The individual per-thread counts will be aggregated into a statistic at program exit.
-class counter
-{
-    uint64_t value;
-    static const statInfo counterInfo[];
-
- public:
-    counter() : value(0) {}
-    void increment() { value++; }
-    uint64_t getValue() const { return value; }
-    void reset() { value = 0; }
-    static const char * name(counter_e e) { return counterInfo[e].name; }
-    static bool  masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; }
+// The individual per-thread counts will be aggregated into a statistic at
+// program exit.
+class counter {
+  uint64_t value;
+  static const statInfo counterInfo[];
+
+public:
+  counter() : value(0) {}
+  void increment() { value++; }
+  uint64_t getValue() const { return value; }
+  void reset() { value = 0; }
+  static const char *name(counter_e e) { return counterInfo[e].name; }
+  static bool masterOnly(counter_e e) {
+    return counterInfo[e].flags & stats_flags_e::onlyInMaster;
+  }
 };
 
 /* ****************************************************************
@@ -449,17 +484,20 @@ Begin ----------------------------------
 
 **************************************************************** */
 class kmp_stats_event {
-    uint64_t start;
-    uint64_t stop;
-    int nest_level;
-    timer_e timer_name;
- public:
-    kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
-    kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
-    inline uint64_t  getStart() const { return start; }
-    inline uint64_t  getStop() const  { return stop;  }
-    inline int       getNestLevel() const { return nest_level; }
-    inline timer_e   getTimerName() const { return timer_name; }
+  uint64_t start;
+  uint64_t stop;
+  int nest_level;
+  timer_e timer_name;
+
+public:
+  kmp_stats_event()
+      : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
+  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
+      : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
+  inline uint64_t getStart() const { return start; }
+  inline uint64_t getStop() const { return stop; }
+  inline int getNestLevel() const { return nest_level; }
+  inline timer_e getTimerName() const { return timer_name; }
 };
 
 /* ****************************************************************
@@ -479,48 +517,54 @@ class kmp_stats_event {
     to avoid reallocations, then set INIT_SIZE to a large value.
 
     the interface to this class is through six operations:
-    1) reset() -- sets the internal_size back to 0 but does not deallocate any memory
+    1) reset() -- sets the internal_size back to 0 but does not deallocate any
+       memory
     2) size()  -- returns the number of valid elements in the vector
     3) push_back(start, stop, nest, timer_name) -- pushes an event onto
-                                                   the back of the array
+       the back of the array
     4) deallocate() -- frees all memory associated with the vector
     5) sort() -- sorts the vector by start time
     6) operator[index] or at(index) -- returns event reference at that index
-
 **************************************************************** */
 class kmp_stats_event_vector {
-    kmp_stats_event* events;
-    int internal_size;
-    int allocated_size;
-    static const int INIT_SIZE = 1024;
- public:
-    kmp_stats_event_vector() {
-        events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE);
-        internal_size = 0;
-        allocated_size = INIT_SIZE;
-    }
-   ~kmp_stats_event_vector() {}
-    inline void reset() { internal_size = 0; }
-    inline int  size() const { return internal_size; }
-    void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
-        int i;
-        if(internal_size == allocated_size) {
-            kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2);
-            for(i=0;i<internal_size;i++) tmp[i] = events[i];
-            __kmp_free(events);
-            events = tmp;
-            allocated_size*=2;
-        }
-        events[internal_size] = kmp_stats_event(start_time, stop_time, nest_level, name);
-        internal_size++;
-        return;
+  kmp_stats_event *events;
+  int internal_size;
+  int allocated_size;
+  static const int INIT_SIZE = 1024;
+
+public:
+  kmp_stats_event_vector() {
+    events =
+        (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
+    internal_size = 0;
+    allocated_size = INIT_SIZE;
+  }
+  ~kmp_stats_event_vector() {}
+  inline void reset() { internal_size = 0; }
+  inline int size() const { return internal_size; }
+  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
+                 timer_e name) {
+    int i;
+    if (internal_size == allocated_size) {
+      kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
+          sizeof(kmp_stats_event) * allocated_size * 2);
+      for (i = 0; i < internal_size; i++)
+        tmp[i] = events[i];
+      __kmp_free(events);
+      events = tmp;
+      allocated_size *= 2;
     }
-    void deallocate();
-    void sort();
-    const kmp_stats_event & operator[](int index) const { return events[index]; }
-          kmp_stats_event & operator[](int index) { return events[index]; }
-    const kmp_stats_event & at(int index) const { return events[index]; }
-          kmp_stats_event & at(int index) { return events[index]; }
+    events[internal_size] =
+        kmp_stats_event(start_time, stop_time, nest_level, name);
+    internal_size++;
+    return;
+  }
+  void deallocate();
+  void sort();
+  const kmp_stats_event &operator[](int index) const { return events[index]; }
+  kmp_stats_event &operator[](int index) { return events[index]; }
+  const kmp_stats_event &at(int index) const { return events[index]; }
+  kmp_stats_event &at(int index) { return events[index]; }
 };
 
 /* ****************************************************************
@@ -536,13 +580,12 @@ class kmp_stats_event_vector {
     The first node corresponds to thread 0's statistics.
     The second node corresponds to thread 1's statistics and so on...
 
-    Each node has a _timers, _counters, and _explicitTimers array to
-    hold that thread's statistics.  The _explicitTimers
-    point to the correct _timer and update its statistics at every stop() call.
-    The explicitTimers' pointers are set up in the constructor.
-    Each node also has an event vector to hold that thread's timing events.
-    The event vector expands as necessary and records the start-stop times
-    for each timer.
+    Each node has a _timers, _counters, and _explicitTimers array to hold that
+    thread's statistics. The _explicitTimers point to the correct _timer and
+    update its statistics at every stop() call. The explicitTimers' pointers are
+    set up in the constructor. Each node also has an event vector to hold that
+    thread's timing events. The event vector expands as necessary and records
+    the start-stop times for each timer.
 
     The nestLevel variable is for plotting events and is related
     to the bar width in the timeline graph.
@@ -550,138 +593,148 @@ class kmp_stats_event_vector {
     Every thread will have a __thread local pointer to its node in
     the list.  The sentinel node is used by the master thread to
     store "dummy" statistics before __kmp_create_worker() is called.
-
 **************************************************************** */
 class kmp_stats_list {
-    int gtid;
-    timeStat      _timers[TIMER_LAST+1];
-    counter       _counters[COUNTER_LAST+1];
-    explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST+1];
-    partitionedTimers _partitionedTimers;
-    int           _nestLevel; // one per thread
-    kmp_stats_event_vector _event_vector;
-    kmp_stats_list* next;
-    kmp_stats_list* prev;
-    stats_state_e state;
-    int thread_is_idle_flag;
- public:
-    kmp_stats_list() : _nestLevel(0), _event_vector(), next(this), prev(this),
-      state(IDLE), thread_is_idle_flag(0) {
-#define doInit(name,ignore1,ignore2) \
-        getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
-        _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name));
-        KMP_FOREACH_EXPLICIT_TIMER(doInit,0);
+  int gtid;
+  timeStat _timers[TIMER_LAST + 1];
+  counter _counters[COUNTER_LAST + 1];
+  explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
+  partitionedTimers _partitionedTimers;
+  int _nestLevel; // one per thread
+  kmp_stats_event_vector _event_vector;
+  kmp_stats_list *next;
+  kmp_stats_list *prev;
+  stats_state_e state;
+  int thread_is_idle_flag;
+
+public:
+  kmp_stats_list()
+      : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
+        thread_is_idle_flag(0) {
+#define doInit(name, ignore1, ignore2)                                         \
+  getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name));    \
+  _partitionedTimers.add_timer(EXPLICIT_TIMER_##name,                          \
+                               getExplicitTimer(EXPLICIT_TIMER_##name));
+    KMP_FOREACH_EXPLICIT_TIMER(doInit, 0);
 #undef doInit
-    }
-   ~kmp_stats_list() { }
-    inline timeStat *      getTimer(timer_e idx)                  { return &_timers[idx]; }
-    inline counter  *      getCounter(counter_e idx)              { return &_counters[idx]; }
-    inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; }
-    inline partitionedTimers * getPartitionedTimers()             { return &_partitionedTimers; }
-    inline timeStat *      getTimers()                            { return _timers; }
-    inline counter  *      getCounters()                          { return _counters; }
-    inline explicitTimer * getExplicitTimers()                    { return _explicitTimers; }
-    inline kmp_stats_event_vector & getEventVector()              { return _event_vector; }
-    inline void resetEventVector()                                { _event_vector.reset(); }
-    inline void incrementNestValue()                              { _nestLevel++; }
-    inline int  getNestValue()                                    { return _nestLevel; }
-    inline void decrementNestValue()                              { _nestLevel--; }
-    inline int  getGtid() const                                   { return gtid; }
-    inline void setGtid(int newgtid)                              { gtid = newgtid; }
-    inline void setState(stats_state_e newstate)                  { state = newstate; }
-    inline stats_state_e getState() const                         { return state; }
-    inline stats_state_e * getStatePointer()                      { return &state; }
-    inline bool  isIdle()                                         { return thread_is_idle_flag==1; }
-    inline void setIdleFlag()                                     { thread_is_idle_flag = 1; }
-    inline void resetIdleFlag()                                   { thread_is_idle_flag = 0; }
-    kmp_stats_list* push_back(int gtid); // returns newly created list node
-    inline void     push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) {
-        _event_vector.push_back(start_time, stop_time, nest_level, name);
-    }
-    void deallocate();
-    class iterator;
-    kmp_stats_list::iterator begin();
-    kmp_stats_list::iterator end();
-    int size();
-    class iterator {
-        kmp_stats_list* ptr;
-        friend kmp_stats_list::iterator kmp_stats_list::begin();
-        friend kmp_stats_list::iterator kmp_stats_list::end();
-      public:
-        iterator();
-       ~iterator();
-        iterator operator++();
-        iterator operator++(int dummy);
-        iterator operator--();
-        iterator operator--(int dummy);
-        bool operator!=(const iterator & rhs);
-        bool operator==(const iterator & rhs);
-        kmp_stats_list* operator*() const; // dereference operator
-    };
+  }
+  ~kmp_stats_list() {}
+  inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
+  inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
+  inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
+    return &_explicitTimers[idx];
+  }
+  inline partitionedTimers *getPartitionedTimers() {
+    return &_partitionedTimers;
+  }
+  inline timeStat *getTimers() { return _timers; }
+  inline counter *getCounters() { return _counters; }
+  inline explicitTimer *getExplicitTimers() { return _explicitTimers; }
+  inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
+  inline void resetEventVector() { _event_vector.reset(); }
+  inline void incrementNestValue() { _nestLevel++; }
+  inline int getNestValue() { return _nestLevel; }
+  inline void decrementNestValue() { _nestLevel--; }
+  inline int getGtid() const { return gtid; }
+  inline void setGtid(int newgtid) { gtid = newgtid; }
+  inline void setState(stats_state_e newstate) { state = newstate; }
+  inline stats_state_e getState() const { return state; }
+  inline stats_state_e *getStatePointer() { return &state; }
+  inline bool isIdle() { return thread_is_idle_flag == 1; }
+  inline void setIdleFlag() { thread_is_idle_flag = 1; }
+  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
+  kmp_stats_list *push_back(int gtid); // returns newly created list node
+  inline void push_event(uint64_t start_time, uint64_t stop_time,
+                         int nest_level, timer_e name) {
+    _event_vector.push_back(start_time, stop_time, nest_level, name);
+  }
+  void deallocate();
+  class iterator;
+  kmp_stats_list::iterator begin();
+  kmp_stats_list::iterator end();
+  int size();
+  class iterator {
+    kmp_stats_list *ptr;
+    friend kmp_stats_list::iterator kmp_stats_list::begin();
+    friend kmp_stats_list::iterator kmp_stats_list::end();
+
+  public:
+    iterator();
+    ~iterator();
+    iterator operator++();
+    iterator operator++(int dummy);
+    iterator operator--();
+    iterator operator--(int dummy);
+    bool operator!=(const iterator &rhs);
+    bool operator==(const iterator &rhs);
+    kmp_stats_list *operator*() const; // dereference operator
+  };
 };
 
 /* ****************************************************************
    Class to encapsulate all output functions and the environment variables
 
-   This module holds filenames for various outputs (normal stats, events, plot file),
-   as well as coloring information for the plot file.
+   This module holds filenames for various outputs (normal stats, events, plot
+   file), as well as coloring information for the plot file.
 
    The filenames and flags variables are read from environment variables.
-   These are read once by the constructor of the global variable __kmp_stats_output
-   which calls init().
+   These are read once by the constructor of the global variable
+   __kmp_stats_output which calls init().
 
-   During this init() call, event flags for the timeStat::timerInfo[] global array
-   are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
+   During this init() call, event flags for the timeStat::timerInfo[] global
+   array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
 
-   The only interface function that is public is outputStats(heading).  This function
-   should print out everything it needs to, either to files or stderr,
+   The only interface function that is public is outputStats(heading).  This
+   function should print out everything it needs to, either to files or stderr,
    depending on the environment variables described below
 
    ENVIRONMENT VARIABLES:
-   KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file,
-                     otherwise, print to stderr
-   KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either
-                        KMP_STATS_FILE or stderr
+   KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
+                     file, otherwise, print to stderr
+   KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
+                        either KMP_STATS_FILE or stderr
    KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
                           otherwise, the plot file is sent to "events.plt"
-   KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events
+   KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
+                       events
    KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
                             otherwise, output is sent to "events.dat"
-
 **************************************************************** */
 class kmp_stats_output_module {
 
- public:
-    struct rgb_color {
-        float r;
-        float g;
-        float b;
-    };
-
- private:
-    std::string outputFileName;
-    static const char* eventsFileName;
-    static const char* plotFileName;
-    static int printPerThreadFlag;
-    static int printPerThreadEventsFlag;
-    static const rgb_color globalColorArray[];
-    static       rgb_color timerColorInfo[];
-
-    void init();
-    static void setupEventColors();
-    static void printPloticusFile();
-    static void printHeaderInfo(FILE *statsOut);
-    static void printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats);
-    static void printCounterStats(FILE *statsOut, statistic const * theStats);
-    static void printCounters(FILE * statsOut, counter const * theCounters);
-    static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid);
-    static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
-    static void windupExplicitTimers();
-    bool eventPrintingEnabled() const         { return printPerThreadEventsFlag; }
-
- public:
-    kmp_stats_output_module() { init(); }
-    void outputStats(const char* heading);
+public:
+  struct rgb_color {
+    float r;
+    float g;
+    float b;
+  };
+
+private:
+  std::string outputFileName;
+  static const char *eventsFileName;
+  static const char *plotFileName;
+  static int printPerThreadFlag;
+  static int printPerThreadEventsFlag;
+  static const rgb_color globalColorArray[];
+  static rgb_color timerColorInfo[];
+
+  void init();
+  static void setupEventColors();
+  static void printPloticusFile();
+  static void printHeaderInfo(FILE *statsOut);
+  static void printTimerStats(FILE *statsOut, statistic const *theStats,
+                              statistic const *totalStats);
+  static void printCounterStats(FILE *statsOut, statistic const *theStats);
+  static void printCounters(FILE *statsOut, counter const *theCounters);
+  static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
+                          int gtid);
+  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
+  static void windupExplicitTimers();
+  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
+
+public:
+  kmp_stats_output_module() { init(); }
+  void outputStats(const char *heading);
 };
 
 #ifdef __cplusplus
@@ -693,11 +746,11 @@ void __kmp_reset_stats();
 void __kmp_output_stats(const char *);
 void __kmp_accumulate_stats_at_exit(void);
 // thread local pointer to stats node within list
-extern __thread kmp_stats_list* __kmp_stats_thread_ptr;
+extern __thread kmp_stats_list *__kmp_stats_thread_ptr;
 // head to stats list.
-extern kmp_stats_list* __kmp_stats_list;
+extern kmp_stats_list *__kmp_stats_list;
 // lock for __kmp_stats_list
-extern kmp_tas_lock_t  __kmp_stats_lock;
+extern kmp_tas_lock_t __kmp_stats_lock;
 // reference start time
 extern tsc_tick_count __kmp_stats_start_time;
 // interface to output
@@ -709,21 +762,21 @@ extern kmp_stats_output_module __kmp_sta
 
 // Simple, standard interfaces that drop out completely if stats aren't enabled
 
-
 /*!
  * \brief Uses specified timer (name) to time code block.
  *
  * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
  *
- * \details Use KMP_TIME_BLOCK(name) macro to time a code block.  This will record the time taken in the block
- * and use the destructor to stop the timer.  Convenient!
- * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block.
- * I don't think that's a problem.
+ * \details Use KMP_TIME_BLOCK(name) macro to time a code block.  This will
+ * record the time taken in the block and use the destructor to stop the timer.
+ * Convenient! With this definition you can't have more than one KMP_TIME_BLOCK
+ * in the same code block. I don't think that's a problem.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_TIME_BLOCK(name) \
-    blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name)
+#define KMP_TIME_BLOCK(name)                                                   \
+  blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name),     \
+                           TIMER_##name)
 
 /*!
  * \brief Adds value to specified timer (name).
@@ -731,69 +784,83 @@ extern kmp_stats_output_module __kmp_sta
  * @param name timer name as specified under the KMP_FOREACH_TIMER() macro
  * @param value double precision sample value to add to statistics for the timer
  *
- * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics.
+ * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to
+ * a timer statistics.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_COUNT_VALUE(name, value) \
-    __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
+#define KMP_COUNT_VALUE(name, value)                                           \
+  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
 
 /*!
  * \brief Increments specified counter (name).
  *
  * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro
  *
- * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread.
+ * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics
+ * counter for the executing thread.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_COUNT_BLOCK(name) \
-   __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
+#define KMP_COUNT_BLOCK(name)                                                  \
+  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
 
 /*!
- * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro.
+ * \brief "Starts" an explicit timer which will need a corresponding
+ * KMP_STOP_EXPLICIT_TIMER() macro.
  *
- * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro
+ * @param name explicit timer name as specified under the
+ * KMP_FOREACH_EXPLICIT_TIMER() macro
  *
- * \details Use to start a timer.  This will need a corresponding KMP_STOP_EXPLICIT_TIMER()
- * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end
- * of the code block.  All explicit timers are stopped at library exit time before the final statistics are outputted.
+ * \details Use to start a timer.  This will need a corresponding
+ * KMP_STOP_EXPLICIT_TIMER() macro to stop the timer unlike the
+ * KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end
+ * of the code block.  All explicit timers are stopped at library exit time
+ * before the final statistics are outputted.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_START_EXPLICIT_TIMER(name) \
-    __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name)
+#define KMP_START_EXPLICIT_TIMER(name)                                         \
+  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)              \
+      ->start(TIMER_##name)
 
 /*!
  * \brief "Stops" an explicit timer.
  *
- * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro
+ * @param name explicit timer name as specified under the
+ * KMP_FOREACH_EXPLICIT_TIMER() macro
  *
- * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer.  When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name)
- * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value.  The timer will then be reset.
- * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again.
+ * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer.  When this is
+ * done, the time between the last KMP_START_EXPLICIT_TIMER(name) and this
+ * KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The
+ * timer will then be reset. After the KMP_STOP_EXPLICIT_TIMER(name) macro is
+ * called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer
+ * once again.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_STOP_EXPLICIT_TIMER(name) \
-    __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name)
+#define KMP_STOP_EXPLICIT_TIMER(name)                                          \
+  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)              \
+      ->stop(TIMER_##name)
 
 /*!
  * \brief Outputs the current thread statistics and reset them.
  *
  * @param heading_string heading put above the final stats output
  *
- * \details Explicitly stops all timers and outputs all stats.
- * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr
- * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats
- * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread
- * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed
- * It should be noted that all statistics are reset when this macro is called.
+ * \details Explicitly stops all timers and outputs all stats. Environment
+ * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a
+ * filename instead of stderr. Environment variable,
+ * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific
+ * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be
+ * defined with any value, which will print out thread specific stats, or it can
+ * be undefined (not specified in the environment) and thread specific stats
+ * won't be printed. It should be noted that all statistics are reset when this
+ * macro is called.
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_OUTPUT_STATS(heading_string) \
-    __kmp_output_stats(heading_string)
+#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
 
 /*!
  * \brief Initializes the paritioned timers to begin with name.
@@ -802,27 +869,30 @@ extern kmp_stats_output_module __kmp_sta
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_INIT_PARTITIONED_TIMERS(name) \
-    __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
-
-#define KMP_TIME_PARTITIONED_BLOCK(name) \
-    blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \
-        timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
-
-#define KMP_PUSH_PARTITIONED_TIMER(name) \
-    __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
-
-#define KMP_POP_PARTITIONED_TIMER() \
-    __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
-
-#define KMP_SET_THREAD_STATE(state_name) \
-    __kmp_stats_thread_ptr->setState(state_name)
-
-#define KMP_GET_THREAD_STATE() \
-    __kmp_stats_thread_ptr->getState()
-
-#define KMP_SET_THREAD_STATE_BLOCK(state_name) \
-    blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name)
+#define KMP_INIT_PARTITIONED_TIMERS(name)                                      \
+  __kmp_stats_thread_ptr->getPartitionedTimers()->init(                        \
+      timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_TIME_PARTITIONED_BLOCK(name)                                       \
+  blockPartitionedTimer __PBLOCKTIME__(                                        \
+      __kmp_stats_thread_ptr->getPartitionedTimers(),                          \
+      timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_PUSH_PARTITIONED_TIMER(name)                                       \
+  __kmp_stats_thread_ptr->getPartitionedTimers()->push(                        \
+      timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
+
+#define KMP_POP_PARTITIONED_TIMER()                                            \
+  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
+
+#define KMP_SET_THREAD_STATE(state_name)                                       \
+  __kmp_stats_thread_ptr->setState(state_name)
+
+#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
+
+#define KMP_SET_THREAD_STATE_BLOCK(state_name)                                 \
+  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
+                                    state_name)
 
 /*!
  * \brief resets all stats (counters to 0, timers to 0 elapsed ticks)
@@ -831,50 +901,50 @@ extern kmp_stats_output_module __kmp_sta
  *
  * @ingroup STATS_GATHERING
 */
-#define KMP_RESET_STATS()  __kmp_reset_stats()
+#define KMP_RESET_STATS() __kmp_reset_stats()
 
 #if (KMP_DEVELOPER_STATS)
-# define KMP_TIME_DEVELOPER_BLOCK(n)             KMP_TIME_BLOCK(n)
-# define KMP_COUNT_DEVELOPER_VALUE(n,v)          KMP_COUNT_VALUE(n,v)
-# define KMP_COUNT_DEVELOPER_BLOCK(n)            KMP_COUNT_BLOCK(n)
-# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   KMP_START_EXPLICIT_TIMER(n)
-# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    KMP_STOP_EXPLICIT_TIMER(n)
-# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
+#define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
+#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
+#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
 #else
 // Null definitions
-# define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
-# define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0)
-# define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
-# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
-# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
-# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
+#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
 #endif
 
 #else // KMP_STATS_ENABLED
 
 // Null definitions
-#define KMP_TIME_BLOCK(n)             ((void)0)
-#define KMP_COUNT_VALUE(n,v)          ((void)0)
-#define KMP_COUNT_BLOCK(n)            ((void)0)
-#define KMP_START_EXPLICIT_TIMER(n)   ((void)0)
-#define KMP_STOP_EXPLICIT_TIMER(n)    ((void)0)
+#define KMP_TIME_BLOCK(n) ((void)0)
+#define KMP_COUNT_VALUE(n, v) ((void)0)
+#define KMP_COUNT_BLOCK(n) ((void)0)
+#define KMP_START_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
 
 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
-#define KMP_RESET_STATS()  ((void)0)
+#define KMP_RESET_STATS() ((void)0)
 
-#define KMP_TIME_DEVELOPER_BLOCK(n)             ((void)0)
-#define KMP_COUNT_DEVELOPER_VALUE(n,v)          ((void)0)
-#define KMP_COUNT_DEVELOPER_BLOCK(n)            ((void)0)
-#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n)   ((void)0)
-#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n)    ((void)0)
-#define KMP_INIT_PARTITIONED_TIMERS(name)       ((void)0)
-#define KMP_TIME_PARTITIONED_BLOCK(name)        ((void)0)
+#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
+#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
+#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
+#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
+#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
-#define KMP_PUSH_PARTITIONED_TIMER(name)        ((void)0)
-#define KMP_POP_PARTITIONED_TIMER()             ((void)0)
-#define KMP_SET_THREAD_STATE(state_name)        ((void)0)
-#define KMP_GET_THREAD_STATE()                  ((void)0)
-#define KMP_SET_THREAD_STATE_BLOCK(state_name)  ((void)0)
-#endif  // KMP_STATS_ENABLED
+#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
+#define KMP_POP_PARTITIONED_TIMER() ((void)0)
+#define KMP_SET_THREAD_STATE(state_name) ((void)0)
+#define KMP_GET_THREAD_STATE() ((void)0)
+#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
+#endif // KMP_STATS_ENABLED
 
 #endif // KMP_STATS_H

Modified: openmp/trunk/runtime/src/kmp_stats_timing.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats_timing.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats_timing.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stats_timing.cpp Fri May 12 13:01:32 2017
@@ -16,8 +16,8 @@
 #include <stdlib.h>
 #include <unistd.h>
 
-#include <iostream>
 #include <iomanip>
+#include <iostream>
 #include <sstream>
 
 #include "kmp.h"
@@ -26,119 +26,107 @@
 using namespace std;
 
 #if KMP_HAVE_TICK_TIME
-# if KMP_MIC
-double tsc_tick_count::tick_time()
-{
-    // pretty bad assumption of 1GHz clock for MIC
-    return 1/((double)1000*1.e6);
+#if KMP_MIC
+double tsc_tick_count::tick_time() {
+  // pretty bad assumption of 1GHz clock for MIC
+  return 1 / ((double)1000 * 1.e6);
 }
-# elif KMP_ARCH_X86 || KMP_ARCH_X86_64
-#  include <string.h>
+#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
+#include <string.h>
 // Extract the value from the CPUID information
-double tsc_tick_count::tick_time()
-{
-    static double result = 0.0;
-
-    if (result == 0.0)
-    {
-        kmp_cpuid_t cpuinfo;
-        char brand[256];
-
-        __kmp_x86_cpuid(0x80000000, 0, &cpuinfo);
-        memset(brand, 0, sizeof(brand));
-        int ids = cpuinfo.eax;
-
-        for (unsigned int i=2; i<(ids^0x80000000)+2; i++)
-            __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t)));
-
-        char * start = &brand[0];
-        for (;*start == ' '; start++)
-            ;
-
-        char * end = brand + KMP_STRLEN(brand) - 3;
-        uint64_t multiplier;
-
-        if (*end == 'M') multiplier = 1000LL*1000LL;
-        else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL;
-        else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL;
-        else
-        {
-            cout << "Error determining multiplier '" << *end << "'\n";
-            exit (-1);
-        }
-        *end = 0;
-        while (*end != ' ') end--;
-        end++;
-
-        double freq = strtod(end, &start);
-        if (freq == 0.0)
-        {
-            cout << "Error calculating frequency " <<  end << "\n";
-            exit (-1);
-        }
+double tsc_tick_count::tick_time() {
+  static double result = 0.0;
 
-        result = ((double)1.0)/(freq * multiplier);
+  if (result == 0.0) {
+    kmp_cpuid_t cpuinfo;
+    char brand[256];
+
+    __kmp_x86_cpuid(0x80000000, 0, &cpuinfo);
+    memset(brand, 0, sizeof(brand));
+    int ids = cpuinfo.eax;
+
+    for (unsigned int i = 2; i < (ids ^ 0x80000000) + 2; i++)
+      __kmp_x86_cpuid(i | 0x80000000, 0,
+                      (kmp_cpuid_t *)(brand + (i - 2) * sizeof(kmp_cpuid_t)));
+
+    char *start = &brand[0];
+    for (; *start == ' '; start++)
+      ;
+
+    char *end = brand + KMP_STRLEN(brand) - 3;
+    uint64_t multiplier;
+
+    if (*end == 'M')
+      multiplier = 1000LL * 1000LL;
+    else if (*end == 'G')
+      multiplier = 1000LL * 1000LL * 1000LL;
+    else if (*end == 'T')
+      multiplier = 1000LL * 1000LL * 1000LL * 1000LL;
+    else {
+      cout << "Error determining multiplier '" << *end << "'\n";
+      exit(-1);
+    }
+    *end = 0;
+    while (*end != ' ')
+      end--;
+    end++;
+
+    double freq = strtod(end, &start);
+    if (freq == 0.0) {
+      cout << "Error calculating frequency " << end << "\n";
+      exit(-1);
     }
-    return result;
+
+    result = ((double)1.0) / (freq * multiplier);
+  }
+  return result;
 }
-# endif
+#endif
 #endif
 
 static bool useSI = true;
 
 // Return a formatted string after normalising the value into
 // engineering style and using a suitable unit prefix (e.g. ms, us, ns).
-std::string formatSI(double interval, int width, char unit)
-{
-    std::stringstream os;
-
-    if (useSI)
-    {
-        // Preserve accuracy for small numbers, since we only multiply and the positive powers
-        // of ten are precisely representable.
-        static struct { double scale; char prefix; } ranges[] = {
-            {1.e12,'f'},
-            {1.e9, 'p'},
-            {1.e6, 'n'},
-            {1.e3, 'u'},
-            {1.0,  'm'},
-            {1.e-3,' '},
-            {1.e-6,'k'},
-            {1.e-9,'M'},
-            {1.e-12,'G'},
-            {1.e-15,'T'},
-            {1.e-18,'P'},
-            {1.e-21,'E'},
-            {1.e-24,'Z'},
-            {1.e-27,'Y'}
-        };
-
-        if (interval == 0.0)
-        {
-            os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit;
-            return os.str();
-        }
-
-        bool negative = false;
-        if (interval < 0.0)
-        {
-            negative = true;
-            interval = -interval;
-        }
-
-        for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++)
-        {
-            if (interval*ranges[i].scale < 1.e0)
-            {
-                interval = interval * 1000.e0 * ranges[i].scale;
-                os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right <<
-                    (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit;
-
-                return os.str();
-            }
-        }
+std::string formatSI(double interval, int width, char unit) {
+  std::stringstream os;
+
+  if (useSI) {
+    // Preserve accuracy for small numbers, since we only multiply and the
+    // positive powers of ten are precisely representable.
+    static struct {
+      double scale;
+      char prefix;
+    } ranges[] = {{1.e12, 'f'},  {1.e9, 'p'},   {1.e6, 'n'},   {1.e3, 'u'},
+                  {1.0, 'm'},    {1.e-3, ' '},  {1.e-6, 'k'},  {1.e-9, 'M'},
+                  {1.e-12, 'G'}, {1.e-15, 'T'}, {1.e-18, 'P'}, {1.e-21, 'E'},
+                  {1.e-24, 'Z'}, {1.e-27, 'Y'}};
+
+    if (interval == 0.0) {
+      os << std::setw(width - 3) << std::right << "0.00" << std::setw(3)
+         << unit;
+      return os.str();
+    }
+
+    bool negative = false;
+    if (interval < 0.0) {
+      negative = true;
+      interval = -interval;
+    }
+
+    for (int i = 0; i < (int)(sizeof(ranges) / sizeof(ranges[0])); i++) {
+      if (interval * ranges[i].scale < 1.e0) {
+        interval = interval * 1000.e0 * ranges[i].scale;
+        os << std::fixed << std::setprecision(2) << std::setw(width - 3)
+           << std::right << (negative ? -interval : interval) << std::setw(2)
+           << ranges[i].prefix << std::setw(1) << unit;
+
+        return os.str();
+      }
     }
-    os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit;
+  }
+  os << std::setprecision(2) << std::fixed << std::right << std::setw(width - 3)
+     << interval << std::setw(3) << unit;
 
-    return os.str();
+  return os.str();
 }

Modified: openmp/trunk/runtime/src/kmp_stats_timing.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stats_timing.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stats_timing.h (original)
+++ openmp/trunk/runtime/src/kmp_stats_timing.h Fri May 12 13:01:32 2017
@@ -16,97 +16,103 @@
 //===----------------------------------------------------------------------===//
 
 
-
+#include "kmp_os.h"
+#include <limits>
 #include <stdint.h>
 #include <string>
-#include <limits>
-#include "kmp_os.h"
 #if KMP_HAVE_X86INTRIN_H
-# include <x86intrin.h>
+#include <x86intrin.h>
 #endif
 
 class tsc_tick_count {
-  private:
-    int64_t my_count;
+private:
+  int64_t my_count;
+
+public:
+  class tsc_interval_t {
+    int64_t value;
+    explicit tsc_interval_t(int64_t _value) : value(_value) {}
 
   public:
-    class tsc_interval_t {
-        int64_t value;
-        explicit tsc_interval_t(int64_t _value) : value(_value) {}
-     public:
-        tsc_interval_t() : value(0) {}; // Construct 0 time duration
+    tsc_interval_t() : value(0){}; // Construct 0 time duration
 #if KMP_HAVE_TICK_TIME
-        double seconds() const; // Return the length of a time interval in seconds
+    double seconds() const; // Return the length of a time interval in seconds
 #endif
-        double ticks() const { return double(value); }
-        int64_t getValue() const { return value; }
-        tsc_interval_t& operator=(int64_t nvalue) { value = nvalue; return *this; }
-
-        friend class tsc_tick_count;
-
-        friend tsc_interval_t operator-(const tsc_tick_count& t1,
-                                        const tsc_tick_count& t0);
-        friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1,
-                                        const tsc_tick_count::tsc_interval_t& i0);
-        friend tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1,
-                                         const tsc_tick_count::tsc_interval_t& i0);
-    };
+    double ticks() const { return double(value); }
+    int64_t getValue() const { return value; }
+    tsc_interval_t &operator=(int64_t nvalue) {
+      value = nvalue;
+      return *this;
+    }
+
+    friend class tsc_tick_count;
+
+    friend tsc_interval_t operator-(const tsc_tick_count &t1,
+                                    const tsc_tick_count &t0);
+    friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t &i1,
+                                    const tsc_tick_count::tsc_interval_t &i0);
+    friend tsc_interval_t &operator+=(tsc_tick_count::tsc_interval_t &i1,
+                                      const tsc_tick_count::tsc_interval_t &i0);
+  };
 
 #if KMP_HAVE___BUILTIN_READCYCLECOUNTER
-    tsc_tick_count() : my_count(static_cast<int64_t>(__builtin_readcyclecounter())) {}
+  tsc_tick_count()
+      : my_count(static_cast<int64_t>(__builtin_readcyclecounter())) {}
 #elif KMP_HAVE___RDTSC
-    tsc_tick_count() : my_count(static_cast<int64_t>(__rdtsc())) {};
+  tsc_tick_count() : my_count(static_cast<int64_t>(__rdtsc())){};
 #else
-# error Must have high resolution timer defined
+#error Must have high resolution timer defined
 #endif
-    tsc_tick_count(int64_t value) : my_count(value) {};
-    int64_t getValue() const { return my_count; }
-    tsc_tick_count later (tsc_tick_count const other) const {
-        return my_count > other.my_count ? (*this) : other;
-    }
-    tsc_tick_count earlier(tsc_tick_count const other) const {
-        return my_count < other.my_count ? (*this) : other;
-    }
+  tsc_tick_count(int64_t value) : my_count(value){};
+  int64_t getValue() const { return my_count; }
+  tsc_tick_count later(tsc_tick_count const other) const {
+    return my_count > other.my_count ? (*this) : other;
+  }
+  tsc_tick_count earlier(tsc_tick_count const other) const {
+    return my_count < other.my_count ? (*this) : other;
+  }
 #if KMP_HAVE_TICK_TIME
-    static double tick_time(); // returns seconds per cycle (period) of clock
+  static double tick_time(); // returns seconds per cycle (period) of clock
 #endif
-    static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value
-    friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0);
+  static tsc_tick_count now() {
+    return tsc_tick_count();
+  } // returns the rdtsc register value
+  friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1,
+                                                  const tsc_tick_count &t0);
 };
 
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0)
-{
-    return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count );
-}
-
-inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
-{
-    return tsc_tick_count::tsc_interval_t( i1.value-i0.value );
-}
-
-inline tsc_tick_count::tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0)
-{
-    i1.value += i0.value;
-    return i1;
+inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1,
+                                                const tsc_tick_count &t0) {
+  return tsc_tick_count::tsc_interval_t(t1.my_count - t0.my_count);
+}
+
+inline tsc_tick_count::tsc_interval_t
+operator-(const tsc_tick_count::tsc_interval_t &i1,
+          const tsc_tick_count::tsc_interval_t &i0) {
+  return tsc_tick_count::tsc_interval_t(i1.value - i0.value);
+}
+
+inline tsc_tick_count::tsc_interval_t &
+operator+=(tsc_tick_count::tsc_interval_t &i1,
+           const tsc_tick_count::tsc_interval_t &i0) {
+  i1.value += i0.value;
+  return i1;
 }
 
 #if KMP_HAVE_TICK_TIME
-inline double tsc_tick_count::tsc_interval_t::seconds() const
-{
-    return value*tick_time();
+inline double tsc_tick_count::tsc_interval_t::seconds() const {
+  return value * tick_time();
 }
 #endif
 
 extern std::string formatSI(double interval, int width, char unit);
 
-inline std::string formatSeconds(double interval, int width)
-{
-    return formatSI(interval, width, 'S');
+inline std::string formatSeconds(double interval, int width) {
+  return formatSI(interval, width, 'S');
 }
 
-inline std::string formatTicks(double interval, int width)
-{
-    return formatSI(interval, width, 'T');
+inline std::string formatTicks(double interval, int width) {
+  return formatSI(interval, width, 'T');
 }
 
 #endif // KMP_STATS_TIMING_H

Modified: openmp/trunk/runtime/src/kmp_str.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_str.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_str.cpp (original)
+++ openmp/trunk/runtime/src/kmp_str.cpp Fri May 12 13:01:32 2017
@@ -15,866 +15,721 @@
 
 #include "kmp_str.h"
 
-#include <stdarg.h>    // va_*
-#include <stdio.h>     // vsnprintf()
-#include <stdlib.h>    // malloc(), realloc()
+#include <stdarg.h> // va_*
+#include <stdio.h> // vsnprintf()
+#include <stdlib.h> // malloc(), realloc()
 
 #include "kmp.h"
 #include "kmp_i18n.h"
 
-/*
-    ------------------------------------------------------------------------------------------------
-    String buffer.
-    ------------------------------------------------------------------------------------------------
+/* String buffer.
 
-    Usage:
+   Usage:
 
-        // Declare buffer and initialize it.
-        kmp_str_buf_t  buffer;
-	__kmp_str_buf_init( & buffer );
-
-        // Print to buffer.
-        __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 );
-        __kmp_str_buf_print( & buffer, "    <%s>\n", line );
-
-        // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed
-        // characters (not including terminating zero).
-        write( fd, buffer.str, buffer.used );
-
-        // Free buffer.
-        __kmp_str_buf_free( & buffer );
-
-        // Alternatively, you can detach allocated memory from buffer:
-        __kmp_str_buf_detach( & buffer );
-        return buffer.str;    // That memory should be freed eventually.
-
-
-    Notes:
-
-        * Buffer users may use buffer.str and buffer.used. Users should not change any fields of
-          buffer directly.
-
-        * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string ("").
-
-        * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is
-          exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc()
-          as amount of used memory grows.
-
-        * Buffer doubles amount of allocated memory each time it is exhausted.
-
-    ------------------------------------------------------------------------------------------------
+   // Declare buffer and initialize it.
+   kmp_str_buf_t  buffer;
+   __kmp_str_buf_init( & buffer );
+
+   // Print to buffer.
+   __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12);
+   __kmp_str_buf_print(& buffer, "    <%s>\n", line);
+
+   // Use buffer contents. buffer.str is a pointer to data, buffer.used is a
+   // number of printed characters (not including terminating zero).
+   write( fd, buffer.str, buffer.used );
+
+   // Free buffer.
+   __kmp_str_buf_free( & buffer );
+
+   // Alternatively, you can detach allocated memory from buffer:
+   __kmp_str_buf_detach( & buffer );
+   return buffer.str;    // That memory should be freed eventually.
+
+   Notes:
+
+   * Buffer users may use buffer.str and buffer.used. Users should not change
+     any fields of buffer directly.
+   * buffer.str is never NULL. If buffer is empty, buffer.str points to empty
+     string ("").
+   * For performance reasons, buffer uses stack memory (buffer.bulk) first. If
+     stack memory is exhausted, buffer allocates memory on heap by malloc(), and
+     reallocates it by realloc() as amount of used memory grows.
+   * Buffer doubles amount of allocated memory each time it is exhausted.
 */
 
 // TODO: __kmp_str_buf_print() can use thread local memory allocator.
 
-#define KMP_STR_BUF_INVARIANT( b )                                                                \
-    {                                                                                             \
-        KMP_DEBUG_ASSERT( (b)->str != NULL );                                                     \
-        KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) );                                     \
-        KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 );                                 \
-        KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size );                                      \
-        KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 );  \
-        KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 );   \
-    }
-
-void
- __kmp_str_buf_clear(
-     kmp_str_buf_t * buffer
-) {
-    KMP_STR_BUF_INVARIANT( buffer );
-    if ( buffer->used > 0 ) {
-        buffer->used = 0;
-        buffer->str[ 0 ] = 0;
-    }; // if
-    KMP_STR_BUF_INVARIANT( buffer );
+#define KMP_STR_BUF_INVARIANT(b)                                               \
+  {                                                                            \
+    KMP_DEBUG_ASSERT((b)->str != NULL);                                        \
+    KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk));                          \
+    KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0);                      \
+    KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size);                         \
+    KMP_DEBUG_ASSERT(                                                          \
+        (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1);       \
+    KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \
+                                                   : 1);                       \
+  }
+
+void __kmp_str_buf_clear(kmp_str_buf_t *buffer) {
+  KMP_STR_BUF_INVARIANT(buffer);
+  if (buffer->used > 0) {
+    buffer->used = 0;
+    buffer->str[0] = 0;
+  }; // if
+  KMP_STR_BUF_INVARIANT(buffer);
 } // __kmp_str_buf_clear
 
+void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size) {
+  KMP_STR_BUF_INVARIANT(buffer);
+  KMP_DEBUG_ASSERT(size >= 0);
 
-void
-__kmp_str_buf_reserve(
-    kmp_str_buf_t * buffer,
-    int             size
-) {
-
-    KMP_STR_BUF_INVARIANT( buffer );
-    KMP_DEBUG_ASSERT( size >= 0 );
-
-    if ( buffer->size < (unsigned int)size ) {
-
-        // Calculate buffer size.
-        do {
-            buffer->size *= 2;
-        } while ( buffer->size < (unsigned int)size );
-
-        // Enlarge buffer.
-        if ( buffer->str == & buffer->bulk[ 0 ] ) {
-            buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size );
-            if ( buffer->str == NULL ) {
-		KMP_FATAL( MemoryAllocFailed );
-            }; // if
-            KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 );
-        } else {
-            buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size );
-            if ( buffer->str == NULL ) {
-		KMP_FATAL( MemoryAllocFailed );
-            }; // if
-        }; // if
-
-    }; // if
-
-    KMP_DEBUG_ASSERT( buffer->size > 0 );
-    KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size );
-    KMP_STR_BUF_INVARIANT( buffer );
+  if (buffer->size < (unsigned int)size) {
+    // Calculate buffer size.
+    do {
+      buffer->size *= 2;
+    } while (buffer->size < (unsigned int)size);
 
+    // Enlarge buffer.
+    if (buffer->str == &buffer->bulk[0]) {
+      buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
+      if (buffer->str == NULL) {
+        KMP_FATAL(MemoryAllocFailed);
+      }; // if
+      KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
+    } else {
+      buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size);
+      if (buffer->str == NULL) {
+        KMP_FATAL(MemoryAllocFailed);
+      }; // if
+    }; // if
+
+  }; // if
+
+  KMP_DEBUG_ASSERT(buffer->size > 0);
+  KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size);
+  KMP_STR_BUF_INVARIANT(buffer);
 } // __kmp_str_buf_reserve
 
+void __kmp_str_buf_detach(kmp_str_buf_t *buffer) {
+  KMP_STR_BUF_INVARIANT(buffer);
 
-void
-__kmp_str_buf_detach(
-    kmp_str_buf_t *  buffer
-) {
-
-    KMP_STR_BUF_INVARIANT( buffer );
-
-    // If internal bulk is used, allocate memory and copy it.
-    if ( buffer->size <= sizeof( buffer->bulk ) ) {
-        buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size );
-        if ( buffer->str == NULL ) {
-		KMP_FATAL( MemoryAllocFailed );
-        }; // if
-        KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 );
+  // If internal bulk is used, allocate memory and copy it.
+  if (buffer->size <= sizeof(buffer->bulk)) {
+    buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size);
+    if (buffer->str == NULL) {
+      KMP_FATAL(MemoryAllocFailed);
     }; // if
-
+    KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1);
+  }; // if
 } // __kmp_str_buf_detach
 
-
-void
-__kmp_str_buf_free(
-    kmp_str_buf_t * buffer
-) {
-    KMP_STR_BUF_INVARIANT( buffer );
-    if ( buffer->size > sizeof( buffer->bulk ) ) {
-        KMP_INTERNAL_FREE( buffer->str );
-    }; // if
-    buffer->str  = buffer->bulk;
-    buffer->size = sizeof( buffer->bulk );
-    buffer->used = 0;
-    KMP_STR_BUF_INVARIANT( buffer );
+void __kmp_str_buf_free(kmp_str_buf_t *buffer) {
+  KMP_STR_BUF_INVARIANT(buffer);
+  if (buffer->size > sizeof(buffer->bulk)) {
+    KMP_INTERNAL_FREE(buffer->str);
+  }; // if
+  buffer->str = buffer->bulk;
+  buffer->size = sizeof(buffer->bulk);
+  buffer->used = 0;
+  KMP_STR_BUF_INVARIANT(buffer);
 } // __kmp_str_buf_free
 
-
-void
-__kmp_str_buf_cat(
-    kmp_str_buf_t * buffer,
-    char const *    str,
-    int             len
-) {
-    KMP_STR_BUF_INVARIANT( buffer );
-    KMP_DEBUG_ASSERT( str != NULL );
-    KMP_DEBUG_ASSERT( len >= 0 );
-    __kmp_str_buf_reserve( buffer, buffer->used + len + 1 );
-    KMP_MEMCPY( buffer->str + buffer->used, str, len );
-    buffer->str[ buffer->used + len ] = 0;
-    buffer->used += len;
-    KMP_STR_BUF_INVARIANT( buffer );
+void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) {
+  KMP_STR_BUF_INVARIANT(buffer);
+  KMP_DEBUG_ASSERT(str != NULL);
+  KMP_DEBUG_ASSERT(len >= 0);
+  __kmp_str_buf_reserve(buffer, buffer->used + len + 1);
+  KMP_MEMCPY(buffer->str + buffer->used, str, len);
+  buffer->str[buffer->used + len] = 0;
+  buffer->used += len;
+  KMP_STR_BUF_INVARIANT(buffer);
 } // __kmp_str_buf_cat
 
+void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+                          va_list args) {
+  KMP_STR_BUF_INVARIANT(buffer);
+
+  for (;;) {
+    int const free = buffer->size - buffer->used;
+    int rc;
+    int size;
+
+    // Try to format string.
+    {
+/* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf()
+   crashes if it is called for the second time with the same args. To prevent
+   the crash, we have to pass a fresh intact copy of args to vsnprintf() on each
+   iteration.
 
-void
-__kmp_str_buf_vprint(
-    kmp_str_buf_t *  buffer,
-    char const *     format,
-    va_list          args
-) {
-
-    KMP_STR_BUF_INVARIANT( buffer );
-
-    for ( ; ; ) {
-
-        int const free = buffer->size - buffer->used;
-        int       rc;
-        int       size;
-
-        // Try to format string.
-        {
-            /*
-                On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it
-                is called for the second time with the same args. To prevent the crash, we have to
-                pass a fresh intact copy of args to vsnprintf() on each iteration.
-
-                Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it
-                seems vsnprintf() does not modify args argument on Windows* OS.
-            */
-
-            #if ! KMP_OS_WINDOWS
-                va_list _args;
-                __va_copy( _args, args );  // Make copy of args.
-                #define args _args         // Substitute args with its copy, _args.
-            #endif // KMP_OS_WINDOWS
-            rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args );
-            #if ! KMP_OS_WINDOWS
-                #undef args                // Remove substitution.
-                va_end( _args );
-            #endif // KMP_OS_WINDOWS
-        }
-
-        // No errors, string has been formatted.
-        if ( rc >= 0 && rc < free ) {
-            buffer->used += rc;
-            break;
-        }; // if
-
-        // Error occurred, buffer is too small.
-        if ( rc >= 0 ) {
-            // C99-conforming implementation of vsnprintf returns required buffer size.
-            size = buffer->used + rc + 1;
-        } else {
-            // Older implementations just return -1. Double buffer size.
-            size = buffer->size * 2;
-        }; // if
-
-        // Enlarge buffer.
-        __kmp_str_buf_reserve( buffer, size );
+   Unfortunately, standard va_copy() macro is not available on Windows* OS.
+   However, it seems vsnprintf() does not modify args argument on Windows* OS.
+*/
 
-        // And try again.
+#if !KMP_OS_WINDOWS
+      va_list _args;
+      __va_copy(_args, args); // Make copy of args.
+#define args _args // Substitute args with its copy, _args.
+#endif // KMP_OS_WINDOWS
+      rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args);
+#if !KMP_OS_WINDOWS
+#undef args // Remove substitution.
+      va_end(_args);
+#endif // KMP_OS_WINDOWS
+    }
 
-    }; // forever
+    // No errors, string has been formatted.
+    if (rc >= 0 && rc < free) {
+      buffer->used += rc;
+      break;
+    }; // if
 
-    KMP_DEBUG_ASSERT( buffer->size > 0 );
-    KMP_STR_BUF_INVARIANT( buffer );
+    // Error occurred, buffer is too small.
+    if (rc >= 0) {
+      // C99-conforming implementation of vsnprintf returns required buffer size
+      size = buffer->used + rc + 1;
+    } else {
+      // Older implementations just return -1. Double buffer size.
+      size = buffer->size * 2;
+    }; // if
 
-} // __kmp_str_buf_vprint
+    // Enlarge buffer.
+    __kmp_str_buf_reserve(buffer, size);
 
+    // And try again.
+  }; // forever
 
-void
-__kmp_str_buf_print(
-    kmp_str_buf_t *  buffer,
-    char const *     format,
-    ...
-) {
-
-    va_list args;
-    va_start( args, format );
-    __kmp_str_buf_vprint( buffer, format, args );
-    va_end( args );
+  KMP_DEBUG_ASSERT(buffer->size > 0);
+  KMP_STR_BUF_INVARIANT(buffer);
+} // __kmp_str_buf_vprint
 
+void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
+  va_list args;
+  va_start(args, format);
+  __kmp_str_buf_vprint(buffer, format, args);
+  va_end(args);
 } // __kmp_str_buf_print
 
+/* The function prints specified size to buffer. Size is expressed using biggest
+   possible unit, for example 1024 is printed as "1k". */
+void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) {
+  char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"};
+  int const units = sizeof(names) / sizeof(char const *);
+  int u = 0;
+  if (size > 0) {
+    while ((size % 1024 == 0) && (u + 1 < units)) {
+      size = size / 1024;
+      ++u;
+    }; // while
+  }; // if
 
-/*
-    The function prints specified size to buffer. Size is expressed using biggest possible unit, for
-    example 1024 is printed as "1k".
-*/
-
-void
-__kmp_str_buf_print_size(
-    kmp_str_buf_t * buf,
-    size_t          size
-) {
-
-    char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" };
-    int const    units   = sizeof( names ) / sizeof( char const * );
-    int          u       = 0;
-    if ( size > 0 ) {
-        while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) {
-            size = size / 1024;
-            ++ u;
-        }; // while
-    }; // if
-
-    __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] );
-
+  __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]);
 } // __kmp_str_buf_print_size
 
-
-void
-__kmp_str_fname_init(
-    kmp_str_fname_t * fname,
-    char const *      path
-) {
-
-    fname->path = NULL;
-    fname->dir  = NULL;
-    fname->base = NULL;
-
-    if ( path != NULL ) {
-        char * slash = NULL;    // Pointer to the last character of dir.
-        char * base  = NULL;    // Pointer to the beginning of basename.
-        fname->path = __kmp_str_format( "%s", path );
-            // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it
-            // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format().
-        if ( KMP_OS_WINDOWS ) {
-            __kmp_str_replace( fname->path, '\\', '/' );
-        }; // if
-        fname->dir = __kmp_str_format( "%s", fname->path );
-        slash = strrchr( fname->dir, '/' );
-        if ( KMP_OS_WINDOWS && slash == NULL ) {           // On Windows* OS, if slash not found,
-            char first = TOLOWER( fname->dir[ 0 ] );     // look for drive.
-            if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) {
-                slash = & fname->dir[ 1 ];
-            }; // if
-        }; // if
-        base = ( slash == NULL ? fname->dir : slash + 1 );
-        fname->base = __kmp_str_format( "%s", base );    // Copy basename
-        * base = 0;                    // and truncate dir.
-    }; // if
+void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) {
+  fname->path = NULL;
+  fname->dir = NULL;
+  fname->base = NULL;
+
+  if (path != NULL) {
+    char *slash = NULL; // Pointer to the last character of dir.
+    char *base = NULL; // Pointer to the beginning of basename.
+    fname->path = __kmp_str_format("%s", path);
+    // Original code used strdup() function to copy a string, but on Windows* OS
+    // Intel(R) 64 it causes assertioon id debug heap, so I had to replace
+    // strdup with __kmp_str_format().
+    if (KMP_OS_WINDOWS) {
+      __kmp_str_replace(fname->path, '\\', '/');
+    }; // if
+    fname->dir = __kmp_str_format("%s", fname->path);
+    slash = strrchr(fname->dir, '/');
+    if (KMP_OS_WINDOWS &&
+        slash == NULL) { // On Windows* OS, if slash not found,
+      char first = TOLOWER(fname->dir[0]); // look for drive.
+      if ('a' <= first && first <= 'z' && fname->dir[1] == ':') {
+        slash = &fname->dir[1];
+      }; // if
+    }; // if
+    base = (slash == NULL ? fname->dir : slash + 1);
+    fname->base = __kmp_str_format("%s", base); // Copy basename
+    *base = 0; // and truncate dir.
+  }; // if
 
 } // kmp_str_fname_init
 
-
-void
-__kmp_str_fname_free(
-    kmp_str_fname_t * fname
-) {
-    __kmp_str_free( (char const **)( & fname->path ) );
-    __kmp_str_free( (char const **)( & fname->dir  ) );
-    __kmp_str_free( (char const **)( & fname->base ) );
+void __kmp_str_fname_free(kmp_str_fname_t *fname) {
+  __kmp_str_free((char const **)(&fname->path));
+  __kmp_str_free((char const **)(&fname->dir));
+  __kmp_str_free((char const **)(&fname->base));
 } // kmp_str_fname_free
 
+int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) {
+  int dir_match = 1;
+  int base_match = 1;
+
+  if (pattern != NULL) {
+    kmp_str_fname_t ptrn;
+    __kmp_str_fname_init(&ptrn, pattern);
+    dir_match = strcmp(ptrn.dir, "*/") == 0 ||
+                (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir));
+    base_match = strcmp(ptrn.base, "*") == 0 ||
+                 (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base));
+    __kmp_str_fname_free(&ptrn);
+  }; // if
 
-int
-__kmp_str_fname_match(
-    kmp_str_fname_t const * fname,
-    char const *            pattern
-) {
-
-    int dir_match  = 1;
-    int base_match = 1;
-
-    if ( pattern != NULL ) {
-        kmp_str_fname_t ptrn;
-        __kmp_str_fname_init( & ptrn, pattern );
-        dir_match =
-            strcmp( ptrn.dir, "*/" ) == 0
-            ||
-            ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) );
-        base_match =
-            strcmp( ptrn.base, "*" ) == 0
-            ||
-            ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) );
-        __kmp_str_fname_free( & ptrn );
-    }; // if
-
-    return dir_match && base_match;
-
+  return dir_match && base_match;
 } // __kmp_str_fname_match
 
+kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname) {
+  kmp_str_loc_t loc;
 
-kmp_str_loc_t
-__kmp_str_loc_init(
-    char const * psource,
-    int          init_fname
-) {
-
-    kmp_str_loc_t loc;
-
-    loc._bulk = NULL;
-    loc.file  = NULL;
-    loc.func  = NULL;
-    loc.line  = 0;
-    loc.col   = 0;
-
-    if ( psource != NULL ) {
-
-        char * str   = NULL;
-        char * dummy = NULL;
-        char * line  = NULL;
-        char * col   = NULL;
-
-        // Copy psource to keep it intact.
-        loc._bulk = __kmp_str_format( "%s", psource );
-
-        // Parse psource string: ";file;func;line;col;;"
-        str = loc._bulk;
-        __kmp_str_split( str, ';', & dummy,    & str );
-        __kmp_str_split( str, ';', & loc.file, & str );
-        __kmp_str_split( str, ';', & loc.func, & str );
-        __kmp_str_split( str, ';', & line,     & str );
-        __kmp_str_split( str, ';', & col,      & str );
-
-        // Convert line and col into numberic values.
-        if ( line != NULL ) {
-            loc.line = atoi( line );
-            if ( loc.line < 0 ) {
-                loc.line = 0;
-            }; // if
-        }; // if
-        if ( col != NULL ) {
-            loc.col = atoi( col );
-            if ( loc.col < 0 ) {
-                loc.col = 0;
-            }; // if
-        }; // if
-
+  loc._bulk = NULL;
+  loc.file = NULL;
+  loc.func = NULL;
+  loc.line = 0;
+  loc.col = 0;
+
+  if (psource != NULL) {
+    char *str = NULL;
+    char *dummy = NULL;
+    char *line = NULL;
+    char *col = NULL;
+
+    // Copy psource to keep it intact.
+    loc._bulk = __kmp_str_format("%s", psource);
+
+    // Parse psource string: ";file;func;line;col;;"
+    str = loc._bulk;
+    __kmp_str_split(str, ';', &dummy, &str);
+    __kmp_str_split(str, ';', &loc.file, &str);
+    __kmp_str_split(str, ';', &loc.func, &str);
+    __kmp_str_split(str, ';', &line, &str);
+    __kmp_str_split(str, ';', &col, &str);
+
+    // Convert line and col into numberic values.
+    if (line != NULL) {
+      loc.line = atoi(line);
+      if (loc.line < 0) {
+        loc.line = 0;
+      }; // if
+    }; // if
+    if (col != NULL) {
+      loc.col = atoi(col);
+      if (loc.col < 0) {
+        loc.col = 0;
+      }; // if
     }; // if
 
-    __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL );
+  }; // if
 
-    return loc;
+  __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL);
 
+  return loc;
 } // kmp_str_loc_init
 
-
-void
-__kmp_str_loc_free(
-    kmp_str_loc_t * loc
-) {
-    __kmp_str_fname_free( & loc->fname );
-    __kmp_str_free((const char **) &(loc->_bulk));
-    loc->file  = NULL;
-    loc->func  = NULL;
+void __kmp_str_loc_free(kmp_str_loc_t *loc) {
+  __kmp_str_fname_free(&loc->fname);
+  __kmp_str_free((const char **)&(loc->_bulk));
+  loc->file = NULL;
+  loc->func = NULL;
 } // kmp_str_loc_free
 
-
-
-/*
-    This function is intended to compare file names. On Windows* OS file names are case-insensitive,
-    so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive
-    comparison.
-    Note: The function returns *true* if strings are *equal*.
-*/
-
-int
-__kmp_str_eqf(         // True, if strings are equal, false otherwise.
-    char const * lhs,  // First string.
-    char const * rhs   // Second string.
-) {
-    int result;
-    #if KMP_OS_WINDOWS
-        result = ( _stricmp( lhs, rhs ) == 0 );
-    #else
-        result = ( strcmp( lhs, rhs ) == 0 );
-    #endif
-    return result;
+/* This function is intended to compare file names. On Windows* OS file names
+   are case-insensitive, so functions performs case-insensitive comparison. On
+   Linux* OS it performs case-sensitive comparison. Note: The function returns
+   *true* if strings are *equal*. */
+int __kmp_str_eqf( // True, if strings are equal, false otherwise.
+    char const *lhs, // First string.
+    char const *rhs // Second string.
+    ) {
+  int result;
+#if KMP_OS_WINDOWS
+  result = (_stricmp(lhs, rhs) == 0);
+#else
+  result = (strcmp(lhs, rhs) == 0);
+#endif
+  return result;
 } // __kmp_str_eqf
 
-
-/*
-    This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by
-    __kmp_str_free(). The function is very convenient for constructing strings, it successfully
-    replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid
-    buffer overflows. Examples:
-
-        str = __kmp_str_format( "%s", orig );              // strdup(), do not care about buffer size.
-        __kmp_str_free( & str );
-        str = __kmp_str_format( "%s%s", orig1, orig2 );    // strcat(), do not care about buffer size.
-        __kmp_str_free( & str );
-        str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
-        __kmp_str_free( & str );
-
-    Performance note:
-        This function allocates memory with malloc() calls, so do not call it from
-        performance-critical code. In performance-critical code consider using kmp_str_buf_t
-        instead, since it uses stack-allocated buffer for short strings.
-
-    Why does this function use malloc()?
-        1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no
-           reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned
-           memory is not necessary.
-        2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure.
-           We need to perform string operations during library startup (for example, in
-           __kmp_register_library_startup()) when no thread structures are allocated yet.
-    So standard malloc() is the only available option.
+/* This function is like sprintf, but it *allocates* new buffer, which must be
+   freed eventually by __kmp_str_free(). The function is very convenient for
+   constructing strings, it successfully replaces strdup(), strcat(), it frees
+   programmer from buffer allocations and helps to avoid buffer overflows.
+   Examples:
+
+   str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size
+   __kmp_str_free( & str );
+   str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care
+                                                   // about buffer size.
+   __kmp_str_free( & str );
+   str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string.
+   __kmp_str_free( & str );
+
+   Performance note:
+   This function allocates memory with malloc() calls, so do not call it from
+   performance-critical code. In performance-critical code consider using
+   kmp_str_buf_t instead, since it uses stack-allocated buffer for short
+   strings.
+
+   Why does this function use malloc()?
+   1. __kmp_allocate() returns cache-aligned memory allocated with malloc().
+      There are no reasons in using __kmp_allocate() for strings due to extra
+      overhead while cache-aligned memory is not necessary.
+   2. __kmp_thread_malloc() cannot be used because it requires pointer to thread
+      structure. We need to perform string operations during library startup
+      (for example, in __kmp_register_library_startup()) when no thread
+      structures are allocated yet.
+   So standard malloc() is the only available option.
 */
 
-char *
-__kmp_str_format(           // Allocated string.
-    char const * format,    // Format string.
-    ...                     // Other parameters.
-) {
-
-    va_list args;
-    int     size   = 512;
-    char *  buffer = NULL;
-    int     rc;
-
-    // Allocate buffer.
-    buffer = (char *) KMP_INTERNAL_MALLOC( size );
-    if ( buffer == NULL ) {
-	KMP_FATAL( MemoryAllocFailed );
-    }; // if
-
-    for ( ; ; ) {
-
-        // Try to format string.
-        va_start( args, format );
-        rc = KMP_VSNPRINTF( buffer, size, format, args );
-        va_end( args );
-
-        // No errors, string has been formatted.
-        if ( rc >= 0 && rc < size ) {
-            break;
-        }; // if
-
-        // Error occurred, buffer is too small.
-        if ( rc >= 0 ) {
-            // C99-conforming implementation of vsnprintf returns required buffer size.
-            size = rc + 1;
-        } else {
-            // Older implementations just return -1.
-            size = size * 2;
-        }; // if
-
-        // Enlarge buffer and try again.
-        buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size );
-        if ( buffer == NULL ) {
-    	    KMP_FATAL( MemoryAllocFailed );
-        }; // if
-
-    }; // forever
-
-    return buffer;
+char *__kmp_str_format( // Allocated string.
+    char const *format, // Format string.
+    ... // Other parameters.
+    ) {
+  va_list args;
+  int size = 512;
+  char *buffer = NULL;
+  int rc;
+
+  // Allocate buffer.
+  buffer = (char *)KMP_INTERNAL_MALLOC(size);
+  if (buffer == NULL) {
+    KMP_FATAL(MemoryAllocFailed);
+  }; // if
+
+  for (;;) {
+    // Try to format string.
+    va_start(args, format);
+    rc = KMP_VSNPRINTF(buffer, size, format, args);
+    va_end(args);
+
+    // No errors, string has been formatted.
+    if (rc >= 0 && rc < size) {
+      break;
+    }; // if
+
+    // Error occurred, buffer is too small.
+    if (rc >= 0) {
+      // C99-conforming implementation of vsnprintf returns required buffer
+      // size.
+      size = rc + 1;
+    } else {
+      // Older implementations just return -1.
+      size = size * 2;
+    }; // if
+
+    // Enlarge buffer and try again.
+    buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size);
+    if (buffer == NULL) {
+      KMP_FATAL(MemoryAllocFailed);
+    }; // if
+  }; // forever
 
+  return buffer;
 } // func __kmp_str_format
 
-
-void
-__kmp_str_free(
-    char const * * str
-) {
-    KMP_DEBUG_ASSERT( str != NULL );
-    KMP_INTERNAL_FREE( (void *) * str );
-    * str = NULL;
+void __kmp_str_free(char const **str) {
+  KMP_DEBUG_ASSERT(str != NULL);
+  KMP_INTERNAL_FREE((void *)*str);
+  *str = NULL;
 } // func __kmp_str_free
 
-
-/* If len is zero, returns true iff target and data have exact case-insensitive match.
-   If len is negative, returns true iff target is a case-insensitive substring of data.
-   If len is positive, returns true iff target is a case-insensitive substring of data or
-     vice versa, and neither is shorter than len.
-*/
-int
-__kmp_str_match(
-    char const * target,
-    int          len,
-    char const * data
-) {
-    int i;
-    if ( target == NULL || data == NULL ) {
-        return FALSE;
-    }; // if
-    for ( i = 0; target[i] && data[i]; ++ i ) {
-        if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) {
-            return FALSE;
-        }; // if
-    }; // for i
-    return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) );
+/* If len is zero, returns true iff target and data have exact case-insensitive
+   match. If len is negative, returns true iff target is a case-insensitive
+   substring of data. If len is positive, returns true iff target is a
+   case-insensitive substring of data or vice versa, and neither is shorter than
+   len. */
+int __kmp_str_match(char const *target, int len, char const *data) {
+  int i;
+  if (target == NULL || data == NULL) {
+    return FALSE;
+  }; // if
+  for (i = 0; target[i] && data[i]; ++i) {
+    if (TOLOWER(target[i]) != TOLOWER(data[i])) {
+      return FALSE;
+    }; // if
+  }; // for i
+  return ((len > 0) ? i >= len : (!target[i] && (len || !data[i])));
 } // __kmp_str_match
 
-
-int
-__kmp_str_match_false( char const * data ) {
-    int result =
-        __kmp_str_match( "false",   1, data ) ||
-        __kmp_str_match( "off",     2, data ) ||
-        __kmp_str_match( "0",       1, data ) ||
-        __kmp_str_match( ".false.", 2, data ) ||
-        __kmp_str_match( ".f.",     2, data ) ||
-        __kmp_str_match( "no",      1, data );
-    return result;
+int __kmp_str_match_false(char const *data) {
+  int result =
+      __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) ||
+      __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) ||
+      __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data);
+  return result;
 } // __kmp_str_match_false
 
-
-int
-__kmp_str_match_true( char const * data ) {
-    int result =
-        __kmp_str_match( "true",   1, data ) ||
-        __kmp_str_match( "on",     2, data ) ||
-        __kmp_str_match( "1",      1, data ) ||
-        __kmp_str_match( ".true.", 2, data ) ||
-        __kmp_str_match( ".t.",    2, data ) ||
-        __kmp_str_match( "yes",    1, data );
-    return result;
+int __kmp_str_match_true(char const *data) {
+  int result =
+      __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) ||
+      __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) ||
+      __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data);
+  return result;
 } // __kmp_str_match_true
 
-void
-__kmp_str_replace(
-    char * str,
-    char   search_for,
-    char   replace_with
-) {
-
-    char * found = NULL;
-
-    found = strchr( str, search_for );
-    while ( found ) {
-        * found = replace_with;
-        found = strchr( found + 1, search_for );
-    }; // while
+void __kmp_str_replace(char *str, char search_for, char replace_with) {
+  char *found = NULL;
 
+  found = strchr(str, search_for);
+  while (found) {
+    *found = replace_with;
+    found = strchr(found + 1, search_for);
+  }; // while
 } // __kmp_str_replace
 
-
-void
-__kmp_str_split(
-    char *  str,    // I: String to split.
-    char    delim,  // I: Character to split on.
-    char ** head,   // O: Pointer to head (may be NULL).
-    char ** tail    // O: Pointer to tail (may be NULL).
-) {
-    char * h = str;
-    char * t = NULL;
-    if ( str != NULL ) {
-        char * ptr = strchr( str, delim );
-        if ( ptr != NULL ) {
-            * ptr  = 0;
-            t = ptr + 1;
-        }; // if
-    }; // if
-    if ( head != NULL ) {
-        * head = h;
-    }; // if
-    if ( tail != NULL ) {
-        * tail = t;
-    }; // if
+void __kmp_str_split(char *str, // I: String to split.
+                     char delim, // I: Character to split on.
+                     char **head, // O: Pointer to head (may be NULL).
+                     char **tail // O: Pointer to tail (may be NULL).
+                     ) {
+  char *h = str;
+  char *t = NULL;
+  if (str != NULL) {
+    char *ptr = strchr(str, delim);
+    if (ptr != NULL) {
+      *ptr = 0;
+      t = ptr + 1;
+    }; // if
+  }; // if
+  if (head != NULL) {
+    *head = h;
+  }; // if
+  if (tail != NULL) {
+    *tail = t;
+  }; // if
 } // __kmp_str_split
 
-/*
-    strtok_r() is not available on Windows* OS. This function reimplements strtok_r().
-*/
-char *
-__kmp_str_token(
-    char *       str,   // String to split into tokens. Note: String *is* modified!
-    char const * delim, // Delimiters.
-    char **      buf    // Internal buffer.
-) {
-    char * token = NULL;
-    #if KMP_OS_WINDOWS
-        // On Windows* OS there is no strtok_r() function. Let us implement it.
-        if ( str != NULL ) {
-            * buf = str;                       // First call, initialize buf.
-        }; // if
-        * buf += strspn( * buf, delim );       // Skip leading delimiters.
-        if ( ** buf != 0 ) {                   // Rest of the string is not yet empty.
-            token = * buf;                     // Use it as result.
-            * buf += strcspn( * buf, delim );  // Skip non-delimiters.
-            if ( ** buf != 0 ) {               // Rest of the string is not yet empty.
-                ** buf = 0;                    // Terminate token here.
-                * buf += 1;                    // Advance buf to start with the next token next time.
-            }; // if
-        }; // if
-    #else
-        // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
-        token = strtok_r( str, delim, buf );
-    #endif
-    return token;
+/* strtok_r() is not available on Windows* OS. This function reimplements
+   strtok_r(). */
+char *__kmp_str_token(
+    char *str, // String to split into tokens. Note: String *is* modified!
+    char const *delim, // Delimiters.
+    char **buf // Internal buffer.
+    ) {
+  char *token = NULL;
+#if KMP_OS_WINDOWS
+  // On Windows* OS there is no strtok_r() function. Let us implement it.
+  if (str != NULL) {
+    *buf = str; // First call, initialize buf.
+  }; // if
+  *buf += strspn(*buf, delim); // Skip leading delimiters.
+  if (**buf != 0) { // Rest of the string is not yet empty.
+    token = *buf; // Use it as result.
+    *buf += strcspn(*buf, delim); // Skip non-delimiters.
+    if (**buf != 0) { // Rest of the string is not yet empty.
+      **buf = 0; // Terminate token here.
+      *buf += 1; // Advance buf to start with the next token next time.
+    }; // if
+  }; // if
+#else
+  // On Linux* OS and OS X*, strtok_r() is available. Let us use it.
+  token = strtok_r(str, delim, buf);
+#endif
+  return token;
 }; // __kmp_str_token
 
+int __kmp_str_to_int(char const *str, char sentinel) {
+  int result, factor;
+  char const *t;
+
+  result = 0;
+
+  for (t = str; *t != '\0'; ++t) {
+    if (*t < '0' || *t > '9')
+      break;
+    result = (result * 10) + (*t - '0');
+  }
+
+  switch (*t) {
+  case '\0': /* the current default for no suffix is bytes */
+    factor = 1;
+    break;
+  case 'b':
+  case 'B': /* bytes */
+    ++t;
+    factor = 1;
+    break;
+  case 'k':
+  case 'K': /* kilo-bytes */
+    ++t;
+    factor = 1024;
+    break;
+  case 'm':
+  case 'M': /* mega-bytes */
+    ++t;
+    factor = (1024 * 1024);
+    break;
+  default:
+    if (*t != sentinel)
+      return (-1);
+    t = "";
+    factor = 1;
+  }
+
+  if (result > (INT_MAX / factor))
+    result = INT_MAX;
+  else
+    result *= factor;
 
-int
-__kmp_str_to_int(
-    char const * str,
-    char         sentinel
-) {
-    int result, factor;
-    char const * t;
-
-    result = 0;
-
-    for (t = str; *t != '\0'; ++t) {
-        if (*t < '0' || *t > '9')
-            break;
-        result = (result * 10) + (*t - '0');
-    }
-
-    switch (*t) {
-    case '\0':          /* the current default for no suffix is bytes */
-	factor = 1;
-        break;
-    case 'b': case 'B': /* bytes */
-	++t;
-	factor = 1;
-        break;
-    case 'k': case 'K': /* kilo-bytes */
-	++t;
-	factor = 1024;
-        break;
-    case 'm': case 'M': /* mega-bytes */
-	++t;
-	factor = (1024 * 1024);
-        break;
-    default:
-	if(*t != sentinel)
-	    return (-1);
-	t = "";
-	factor = 1;
-    }
-
-    if (result > (INT_MAX / factor))
-	result = INT_MAX;
-    else
-	result *= factor;
-
-    return (*t != 0 ? 0 : result);
-
+  return (*t != 0 ? 0 : result);
 } // __kmp_str_to_int
 
-
-/*
-    The routine parses input string. It is expected it is a unsigned integer with optional unit.
-    Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb"
-    or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is
-    ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
-    value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set
-    to zero.
-*/
-void
-__kmp_str_to_size(         // R: Error code.
-    char const *   str,    // I: String of characters, unsigned number and unit ("b", "kb", etc).
-    size_t *       out,    // O: Parsed number.
-    size_t         dfactor, // I: The factor if none of the letters specified.
-    char const * * error   // O: Null if everything is ok, error message otherwise.
-) {
-
-    size_t value    = 0;
-    size_t factor   = 0;
-    int    overflow = 0;
-    int    i        = 0;
-    int    digit;
-
-
-    KMP_DEBUG_ASSERT( str != NULL );
-
-    // Skip spaces.
-    while ( str[ i ] == ' ' || str[ i ] == '\t') {
-        ++ i;
-    }; // while
-
-    // Parse number.
-    if ( str[ i ] < '0' || str[ i ] > '9' ) {
-        * error = KMP_I18N_STR( NotANumber );
-        return;
-    }; // if
-    do {
-        digit = str[ i ] - '0';
-        overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 );
-        value = ( value * 10 ) + digit;
-        ++ i;
-    } while ( str[ i ] >= '0' && str[ i ] <= '9' );
-
-    // Skip spaces.
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
-        ++ i;
-    }; // while
-
-    // Parse unit.
-    #define _case( ch, exp )                            \
-        case ch :                                       \
-        case ch - ( 'a' - 'A' ) : {                     \
-            size_t shift = (exp) * 10;                  \
-            ++ i;                                       \
-            if ( shift < sizeof( size_t ) * 8 ) {       \
-                factor = (size_t)( 1 ) << shift;        \
-            } else {                                    \
-                overflow = 1;                           \
-            };                                          \
-        } break;
-    switch ( str[ i ] ) {
-        _case( 'k', 1 ); // Kilo
-        _case( 'm', 2 ); // Mega
-        _case( 'g', 3 ); // Giga
-        _case( 't', 4 ); // Tera
-        _case( 'p', 5 ); // Peta
-        _case( 'e', 6 ); // Exa
-        _case( 'z', 7 ); // Zetta
-        _case( 'y', 8 ); // Yotta
-        // Oops. No more units...
-    }; // switch
-    #undef _case
-    if ( str[ i ] == 'b' || str[ i ] == 'B' ) {    // Skip optional "b".
-	if ( factor == 0 ) {
-	    factor = 1;
-	}
-        ++ i;
-    }; // if
-    if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit
-        * error = KMP_I18N_STR( BadUnit );
-        return;
-    }; // if
-
-    if ( factor == 0 ) {
-	factor = dfactor;
+/* The routine parses input string. It is expected it is a unsigned integer with
+   optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb"
+   or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is
+   case-insensitive. The routine returns 0 if everything is ok, or error code:
+   -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed
+   value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown
+   unit *size is set to zero. */
+void __kmp_str_to_size( // R: Error code.
+    char const *str, // I: String of characters, unsigned number and unit ("b",
+    // "kb", etc).
+    size_t *out, // O: Parsed number.
+    size_t dfactor, // I: The factor if none of the letters specified.
+    char const **error // O: Null if everything is ok, error message otherwise.
+    ) {
+
+  size_t value = 0;
+  size_t factor = 0;
+  int overflow = 0;
+  int i = 0;
+  int digit;
+
+  KMP_DEBUG_ASSERT(str != NULL);
+
+  // Skip spaces.
+  while (str[i] == ' ' || str[i] == '\t') {
+    ++i;
+  }; // while
+
+  // Parse number.
+  if (str[i] < '0' || str[i] > '9') {
+    *error = KMP_I18N_STR(NotANumber);
+    return;
+  }; // if
+  do {
+    digit = str[i] - '0';
+    overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
+    value = (value * 10) + digit;
+    ++i;
+  } while (str[i] >= '0' && str[i] <= '9');
+
+  // Skip spaces.
+  while (str[i] == ' ' || str[i] == '\t') {
+    ++i;
+  }; // while
+
+// Parse unit.
+#define _case(ch, exp)                                                         \
+  case ch:                                                                     \
+  case ch - ('a' - 'A'): {                                                     \
+    size_t shift = (exp)*10;                                                   \
+    ++i;                                                                       \
+    if (shift < sizeof(size_t) * 8) {                                          \
+      factor = (size_t)(1) << shift;                                           \
+    } else {                                                                   \
+      overflow = 1;                                                            \
+    };                                                                         \
+  } break;
+  switch (str[i]) {
+    _case('k', 1); // Kilo
+    _case('m', 2); // Mega
+    _case('g', 3); // Giga
+    _case('t', 4); // Tera
+    _case('p', 5); // Peta
+    _case('e', 6); // Exa
+    _case('z', 7); // Zetta
+    _case('y', 8); // Yotta
+    // Oops. No more units...
+  }; // switch
+#undef _case
+  if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b".
+    if (factor == 0) {
+      factor = 1;
     }
+    ++i;
+  }; // if
+  if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit
+    *error = KMP_I18N_STR(BadUnit);
+    return;
+  }; // if
+
+  if (factor == 0) {
+    factor = dfactor;
+  }
+
+  // Apply factor.
+  overflow = overflow || (value > (KMP_SIZE_T_MAX / factor));
+  value *= factor;
+
+  // Skip spaces.
+  while (str[i] == ' ' || str[i] == '\t') {
+    ++i;
+  }; // while
+
+  if (str[i] != 0) {
+    *error = KMP_I18N_STR(IllegalCharacters);
+    return;
+  }; // if
+
+  if (overflow) {
+    *error = KMP_I18N_STR(ValueTooLarge);
+    *out = KMP_SIZE_T_MAX;
+    return;
+  }; // if
 
-    // Apply factor.
-    overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) );
-    value *= factor;
-
-    // Skip spaces.
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
-        ++ i;
-    }; // while
-
-    if ( str[ i ] != 0 ) {
-        * error = KMP_I18N_STR( IllegalCharacters );
-        return;
-    }; // if
-
-    if ( overflow ) {
-        * error = KMP_I18N_STR( ValueTooLarge );
-        * out = KMP_SIZE_T_MAX;
-        return;
-    }; // if
-
-    * error = NULL;
-    * out = value;
-
+  *error = NULL;
+  *out = value;
 } // __kmp_str_to_size
 
+void __kmp_str_to_uint( // R: Error code.
+    char const *str, // I: String of characters, unsigned number.
+    kmp_uint64 *out, // O: Parsed number.
+    char const **error // O: Null if everything is ok, error message otherwise.
+    ) {
+  size_t value = 0;
+  int overflow = 0;
+  int i = 0;
+  int digit;
+
+  KMP_DEBUG_ASSERT(str != NULL);
+
+  // Skip spaces.
+  while (str[i] == ' ' || str[i] == '\t') {
+    ++i;
+  }; // while
+
+  // Parse number.
+  if (str[i] < '0' || str[i] > '9') {
+    *error = KMP_I18N_STR(NotANumber);
+    return;
+  }; // if
+  do {
+    digit = str[i] - '0';
+    overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10);
+    value = (value * 10) + digit;
+    ++i;
+  } while (str[i] >= '0' && str[i] <= '9');
+
+  // Skip spaces.
+  while (str[i] == ' ' || str[i] == '\t') {
+    ++i;
+  }; // while
+
+  if (str[i] != 0) {
+    *error = KMP_I18N_STR(IllegalCharacters);
+    return;
+  }; // if
+
+  if (overflow) {
+    *error = KMP_I18N_STR(ValueTooLarge);
+    *out = (kmp_uint64)-1;
+    return;
+  }; // if
 
-void
-__kmp_str_to_uint(         // R: Error code.
-    char const *   str,    // I: String of characters, unsigned number.
-    kmp_uint64 *   out,    // O: Parsed number.
-    char const * * error   // O: Null if everything is ok, error message otherwise.
-) {
-
-    size_t value    = 0;
-    int    overflow = 0;
-    int    i        = 0;
-    int    digit;
-
-
-    KMP_DEBUG_ASSERT( str != NULL );
-
-    // Skip spaces.
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
-        ++ i;
-    }; // while
-
-    // Parse number.
-    if ( str[ i ] < '0' || str[ i ] > '9' ) {
-        * error = KMP_I18N_STR( NotANumber );
-        return;
-    }; // if
-    do {
-        digit = str[ i ] - '0';
-        overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 );
-        value = ( value * 10 ) + digit;
-        ++ i;
-    } while ( str[ i ] >= '0' && str[ i ] <= '9' );
-
-    // Skip spaces.
-    while ( str[ i ] == ' ' || str[ i ] == '\t' ) {
-        ++ i;
-    }; // while
-
-    if ( str[ i ] != 0 ) {
-        * error = KMP_I18N_STR( IllegalCharacters );
-        return;
-    }; // if
-
-    if ( overflow ) {
-        * error = KMP_I18N_STR( ValueTooLarge );
-        * out = (kmp_uint64) -1;
-        return;
-    }; // if
-
-    * error = NULL;
-    * out = value;
-
+  *error = NULL;
+  *out = value;
 } // __kmp_str_to_unit
 
-
-
 // end of file //

Modified: openmp/trunk/runtime/src/kmp_str.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_str.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_str.h (original)
+++ openmp/trunk/runtime/src/kmp_str.h Fri May 12 13:01:32 2017
@@ -16,104 +16,112 @@
 #ifndef KMP_STR_H
 #define KMP_STR_H
 
-#include <string.h>
 #include <stdarg.h>
+#include <string.h>
 
 #include "kmp_os.h"
 
 #ifdef __cplusplus
-    extern "C" {
+extern "C" {
 #endif // __cplusplus
 
 #if KMP_OS_WINDOWS
-# define strdup    _strdup
+#define strdup _strdup
 #endif
 
 /*  some macros to replace ctype.h functions  */
-#define TOLOWER(c)	((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c))
+#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c))
 
 struct kmp_str_buf {
-    char       * str;         // Pointer to buffer content, read only.
-    unsigned int size;        // Do not change this field!
-    int          used;        // Number of characters printed to buffer, read only.
-    char         bulk[ 512 ]; // Do not use this field!
+  char *str; // Pointer to buffer content, read only.
+  unsigned int size; // Do not change this field!
+  int used; // Number of characters printed to buffer, read only.
+  char bulk[512]; // Do not use this field!
 }; // struct kmp_str_buf
-typedef struct kmp_str_buf  kmp_str_buf_t;
-
-#define __kmp_str_buf_init( b )   { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; }
-
-void   __kmp_str_buf_clear( kmp_str_buf_t * buffer );
-void   __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size );
-void   __kmp_str_buf_detach( kmp_str_buf_t * buffer );
-void   __kmp_str_buf_free( kmp_str_buf_t * buffer );
-void   __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len );
-void   __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args );
-void   __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... );
-void   __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size );
-
-/*
-    File name parser. Usage:
-
-        kmp_str_fname_t fname = __kmp_str_fname_init( path );
-        // Use fname.path (copy of original path ), fname.dir, fname.base.
-        // Note fname.dir concatenated with fname.base gives exact copy of path.
-        __kmp_str_fname_free( & fname );
+typedef struct kmp_str_buf kmp_str_buf_t;
 
+#define __kmp_str_buf_init(b)                                                  \
+  {                                                                            \
+    (b)->str = (b)->bulk;                                                      \
+    (b)->size = sizeof((b)->bulk);                                             \
+    (b)->used = 0;                                                             \
+    (b)->bulk[0] = 0;                                                          \
+  }
+
+void __kmp_str_buf_clear(kmp_str_buf_t *buffer);
+void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size);
+void __kmp_str_buf_detach(kmp_str_buf_t *buffer);
+void __kmp_str_buf_free(kmp_str_buf_t *buffer);
+void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len);
+void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+                          va_list args);
+void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...);
+void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size);
+
+/* File name parser.
+   Usage:
+
+   kmp_str_fname_t fname = __kmp_str_fname_init( path );
+   // Use fname.path (copy of original path ), fname.dir, fname.base.
+   // Note fname.dir concatenated with fname.base gives exact copy of path.
+   __kmp_str_fname_free( & fname );
 */
 struct kmp_str_fname {
-    char * path;
-    char * dir;
-    char * base;
+  char *path;
+  char *dir;
+  char *base;
 }; // struct kmp_str_fname
 typedef struct kmp_str_fname kmp_str_fname_t;
-void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path );
-void __kmp_str_fname_free( kmp_str_fname_t * fname );
-// Compares file name with specified patern. If pattern is NULL, any fname matched.
-int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern );
-
-/*
-    The compiler provides source locations in string form ";file;func;line;col;;". It not not
-    convenient for manupulation. These structure keeps source location in more convenient form.
-    Usage:
-
-        kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 );
-        // use loc.file, loc.func, loc.line, loc.col.
-        // loc.fname is available if the second argument of __kmp_str_loc_init is true.
-        __kmp_str_loc_free( & loc );
+void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path);
+void __kmp_str_fname_free(kmp_str_fname_t *fname);
+// Compares file name with specified patern. If pattern is NULL, any fname
+// matched.
+int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern);
+
+/* The compiler provides source locations in string form
+   ";file;func;line;col;;". It is not convenient for manupulation. This
+   structure keeps source location in more convenient form.
+   Usage:
+
+   kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 );
+   // use loc.file, loc.func, loc.line, loc.col.
+   // loc.fname is available if second argument of __kmp_str_loc_init is true.
+   __kmp_str_loc_free( & loc );
 
-    If psource is NULL or does not follow format above, file and/or func may be NULL pointers.
+   If psource is NULL or does not follow format above, file and/or func may be
+   NULL pointers.
 */
 struct kmp_str_loc {
-    char *          _bulk;  // Do not use thid field.
-    kmp_str_fname_t fname;  // Will be initialized if init_fname is true.
-    char *          file;
-    char *          func;
-    int             line;
-    int             col;
+  char *_bulk; // Do not use thid field.
+  kmp_str_fname_t fname; // Will be initialized if init_fname is true.
+  char *file;
+  char *func;
+  int line;
+  int col;
 }; // struct kmp_str_loc
 typedef struct kmp_str_loc kmp_str_loc_t;
-kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname );
-void __kmp_str_loc_free( kmp_str_loc_t * loc );
-
-int    __kmp_str_eqf( char const * lhs, char const * rhs );
-char * __kmp_str_format( char const * format, ... );
-void   __kmp_str_free( char const * * str );
-int    __kmp_str_match( char const * target, int len, char const * data );
-int    __kmp_str_match_false( char const * data );
-int    __kmp_str_match_true( char const * data );
-void   __kmp_str_replace( char * str, char search_for, char replace_with );
-void   __kmp_str_split( char * str, char delim, char ** head, char ** tail );
-char * __kmp_str_token( char * str, char const * delim, char ** buf );
-int    __kmp_str_to_int( char const * str, char sentinel );
+kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname);
+void __kmp_str_loc_free(kmp_str_loc_t *loc);
 
-void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error );
-void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error );
+int __kmp_str_eqf(char const *lhs, char const *rhs);
+char *__kmp_str_format(char const *format, ...);
+void __kmp_str_free(char const **str);
+int __kmp_str_match(char const *target, int len, char const *data);
+int __kmp_str_match_false(char const *data);
+int __kmp_str_match_true(char const *data);
+void __kmp_str_replace(char *str, char search_for, char replace_with);
+void __kmp_str_split(char *str, char delim, char **head, char **tail);
+char *__kmp_str_token(char *str, char const *delim, char **buf);
+int __kmp_str_to_int(char const *str, char sentinel);
+
+void __kmp_str_to_size(char const *str, size_t *out, size_t dfactor,
+                       char const **error);
+void __kmp_str_to_uint(char const *str, kmp_uint64 *out, char const **error);
 
 #ifdef __cplusplus
-    } // extern "C"
+} // extern "C"
 #endif // __cplusplus
 
 #endif // KMP_STR_H
 
 // end of file //
-

Modified: openmp/trunk/runtime/src/kmp_stub.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stub.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stub.cpp (original)
+++ openmp/trunk/runtime/src/kmp_stub.cpp Fri May 12 13:01:32 2017
@@ -13,258 +13,304 @@
 //===----------------------------------------------------------------------===//
 
 
-#include <stdlib.h>
-#include <limits.h>
 #include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
 
-#include "omp.h"                // Function renamings.
-#include "kmp.h"                // KMP_DEFAULT_STKSIZE
+#include "kmp.h" // KMP_DEFAULT_STKSIZE
 #include "kmp_stub.h"
+#include "omp.h" // Function renamings.
 
 #if KMP_OS_WINDOWS
-    #include <windows.h>
+#include <windows.h>
 #else
-    #include <sys/time.h>
+#include <sys/time.h>
 #endif
 
 // Moved from omp.h
-#define omp_set_max_active_levels    ompc_set_max_active_levels
-#define omp_set_schedule             ompc_set_schedule
-#define omp_get_ancestor_thread_num  ompc_get_ancestor_thread_num
-#define omp_get_team_size            ompc_get_team_size
-
-#define omp_set_num_threads          ompc_set_num_threads
-#define omp_set_dynamic              ompc_set_dynamic
-#define omp_set_nested               ompc_set_nested
-#define kmp_set_stacksize            kmpc_set_stacksize
-#define kmp_set_stacksize_s          kmpc_set_stacksize_s
-#define kmp_set_blocktime            kmpc_set_blocktime
-#define kmp_set_library              kmpc_set_library
-#define kmp_set_defaults             kmpc_set_defaults
-#define kmp_set_disp_num_buffers     kmpc_set_disp_num_buffers
-#define kmp_malloc                   kmpc_malloc
-#define kmp_aligned_malloc           kmpc_aligned_malloc
-#define kmp_calloc                   kmpc_calloc
-#define kmp_realloc                  kmpc_realloc
-#define kmp_free                     kmpc_free
+#define omp_set_max_active_levels ompc_set_max_active_levels
+#define omp_set_schedule ompc_set_schedule
+#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num
+#define omp_get_team_size ompc_get_team_size
+
+#define omp_set_num_threads ompc_set_num_threads
+#define omp_set_dynamic ompc_set_dynamic
+#define omp_set_nested ompc_set_nested
+#define kmp_set_stacksize kmpc_set_stacksize
+#define kmp_set_stacksize_s kmpc_set_stacksize_s
+#define kmp_set_blocktime kmpc_set_blocktime
+#define kmp_set_library kmpc_set_library
+#define kmp_set_defaults kmpc_set_defaults
+#define kmp_set_disp_num_buffers kmpc_set_disp_num_buffers
+#define kmp_malloc kmpc_malloc
+#define kmp_aligned_malloc kmpc_aligned_malloc
+#define kmp_calloc kmpc_calloc
+#define kmp_realloc kmpc_realloc
+#define kmp_free kmpc_free
 
 static double frequency = 0.0;
 
 // Helper functions.
 static size_t __kmps_init() {
-    static int    initialized = 0;
-    static size_t dummy = 0;
-    if ( ! initialized ) {
-
-        // TODO: Analyze KMP_VERSION environment variable, print
-        // __kmp_version_copyright and __kmp_version_build_time.
-        // WARNING: Do not use "fprintf( stderr, ... )" because it will cause
-        // unresolved "__iob" symbol (see C70080). We need to extract
-        // __kmp_printf() stuff from kmp_runtime.cpp and use it.
-
-        // Trick with dummy variable forces linker to keep __kmp_version_copyright
-        // and __kmp_version_build_time strings in executable file (in case of
-        // static linkage). When KMP_VERSION analysis is implemented, dummy
-        // variable should be deleted, function should return void.
-        dummy = __kmp_version_copyright - __kmp_version_build_time;
-
-        #if KMP_OS_WINDOWS
-            LARGE_INTEGER freq;
-            BOOL status = QueryPerformanceFrequency( & freq );
-            if ( status ) {
-                frequency = double( freq.QuadPart );
-            }; // if
-        #endif
+  static int initialized = 0;
+  static size_t dummy = 0;
+  if (!initialized) {
+    // TODO: Analyze KMP_VERSION environment variable, print
+    // __kmp_version_copyright and __kmp_version_build_time.
+    // WARNING: Do not use "fprintf(stderr, ...)" because it will cause
+    // unresolved "__iob" symbol (see C70080). We need to extract __kmp_printf()
+    // stuff from kmp_runtime.cpp and use it.
+
+    // Trick with dummy variable forces linker to keep __kmp_version_copyright
+    // and __kmp_version_build_time strings in executable file (in case of
+    // static linkage). When KMP_VERSION analysis is implemented, dummy
+    // variable should be deleted, function should return void.
+    dummy = __kmp_version_copyright - __kmp_version_build_time;
 
-        initialized = 1;
+#if KMP_OS_WINDOWS
+    LARGE_INTEGER freq;
+    BOOL status = QueryPerformanceFrequency(&freq);
+    if (status) {
+      frequency = double(freq.QuadPart);
     }; // if
-    return dummy;
+#endif
+
+    initialized = 1;
+  }; // if
+  return dummy;
 }; // __kmps_init
 
 #define i __kmps_init();
 
 /* set API functions */
-void omp_set_num_threads( omp_int_t num_threads ) { i; }
-void omp_set_dynamic( omp_int_t dynamic )         { i; __kmps_set_dynamic( dynamic ); }
-void omp_set_nested( omp_int_t nested )           { i; __kmps_set_nested( nested );   }
-void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; }
-void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); }
-int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); }
-int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); }
-int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
-int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
-int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; }
+void omp_set_num_threads(omp_int_t num_threads) { i; }
+void omp_set_dynamic(omp_int_t dynamic) {
+  i;
+  __kmps_set_dynamic(dynamic);
+}
+void omp_set_nested(omp_int_t nested) {
+  i;
+  __kmps_set_nested(nested);
+}
+void omp_set_max_active_levels(omp_int_t max_active_levels) { i; }
+void omp_set_schedule(omp_sched_t kind, omp_int_t modifier) {
+  i;
+  __kmps_set_schedule((kmp_sched_t)kind, modifier);
+}
+int omp_get_ancestor_thread_num(omp_int_t level) {
+  i;
+  return (level) ? (-1) : (0);
+}
+int omp_get_team_size(omp_int_t level) {
+  i;
+  return (level) ? (-1) : (1);
+}
+int kmpc_set_affinity_mask_proc(int proc, void **mask) {
+  i;
+  return -1;
+}
+int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
+  i;
+  return -1;
+}
+int kmpc_get_affinity_mask_proc(int proc, void **mask) {
+  i;
+  return -1;
+}
 
 /* kmp API functions */
-void kmp_set_stacksize( omp_int_t arg )   { i; __kmps_set_stacksize( arg ); }
-void kmp_set_stacksize_s( size_t arg )    { i; __kmps_set_stacksize( arg ); }
-void kmp_set_blocktime( omp_int_t arg )   { i; __kmps_set_blocktime( arg ); }
-void kmp_set_library( omp_int_t arg )     { i; __kmps_set_library( arg ); }
-void kmp_set_defaults( char const * str ) { i; }
-void kmp_set_disp_num_buffers( omp_int_t arg ) { i; }
+void kmp_set_stacksize(omp_int_t arg) {
+  i;
+  __kmps_set_stacksize(arg);
+}
+void kmp_set_stacksize_s(size_t arg) {
+  i;
+  __kmps_set_stacksize(arg);
+}
+void kmp_set_blocktime(omp_int_t arg) {
+  i;
+  __kmps_set_blocktime(arg);
+}
+void kmp_set_library(omp_int_t arg) {
+  i;
+  __kmps_set_library(arg);
+}
+void kmp_set_defaults(char const *str) { i; }
+void kmp_set_disp_num_buffers(omp_int_t arg) { i; }
 
 /* KMP memory management functions. */
-void * kmp_malloc( size_t size )                 { i; return malloc( size ); }
-void * kmp_aligned_malloc( size_t sz, size_t a ) {
-    i;
+void *kmp_malloc(size_t size) {
+  i;
+  return malloc(size);
+}
+void *kmp_aligned_malloc(size_t sz, size_t a) {
+  i;
 #if KMP_OS_WINDOWS
-    errno = ENOSYS; // not supported
-    return NULL;    // no standard aligned allocator on Windows (pre - C11)
+  errno = ENOSYS; // not supported
+  return NULL; // no standard aligned allocator on Windows (pre - C11)
 #else
-    void *res;
-    int err;
-    if( err = posix_memalign( &res, a, sz ) ) {
-        errno = err; // can be EINVAL or ENOMEM
-        return NULL;
-    }
-    return res;
+  void *res;
+  int err;
+  if (err = posix_memalign(&res, a, sz)) {
+    errno = err; // can be EINVAL or ENOMEM
+    return NULL;
+  }
+  return res;
 #endif
 }
-void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); }
-void * kmp_realloc( void *ptr, size_t size )     { i; return realloc( ptr, size ); }
-void   kmp_free( void * ptr )                    { i; free( ptr ); }
+void *kmp_calloc(size_t nelem, size_t elsize) {
+  i;
+  return calloc(nelem, elsize);
+}
+void *kmp_realloc(void *ptr, size_t size) {
+  i;
+  return realloc(ptr, size);
+}
+void kmp_free(void *ptr) {
+  i;
+  free(ptr);
+}
 
 static int __kmps_blocktime = INT_MAX;
 
-void __kmps_set_blocktime( int arg ) {
-    i;
-    __kmps_blocktime = arg;
+void __kmps_set_blocktime(int arg) {
+  i;
+  __kmps_blocktime = arg;
 } // __kmps_set_blocktime
 
-int __kmps_get_blocktime( void ) {
-    i;
-    return __kmps_blocktime;
+int __kmps_get_blocktime(void) {
+  i;
+  return __kmps_blocktime;
 } // __kmps_get_blocktime
 
 static int __kmps_dynamic = 0;
 
-void __kmps_set_dynamic( int arg ) {
-    i;
-    __kmps_dynamic = arg;
+void __kmps_set_dynamic(int arg) {
+  i;
+  __kmps_dynamic = arg;
 } // __kmps_set_dynamic
 
-int __kmps_get_dynamic( void ) {
-    i;
-    return __kmps_dynamic;
+int __kmps_get_dynamic(void) {
+  i;
+  return __kmps_dynamic;
 } // __kmps_get_dynamic
 
 static int __kmps_library = 1000;
 
-void __kmps_set_library( int arg ) {
-    i;
-    __kmps_library = arg;
+void __kmps_set_library(int arg) {
+  i;
+  __kmps_library = arg;
 } // __kmps_set_library
 
-int __kmps_get_library( void ) {
-    i;
-    return __kmps_library;
+int __kmps_get_library(void) {
+  i;
+  return __kmps_library;
 } // __kmps_get_library
 
 static int __kmps_nested = 0;
 
-void __kmps_set_nested( int arg ) {
-    i;
-    __kmps_nested = arg;
+void __kmps_set_nested(int arg) {
+  i;
+  __kmps_nested = arg;
 } // __kmps_set_nested
 
-int __kmps_get_nested( void ) {
-    i;
-    return __kmps_nested;
+int __kmps_get_nested(void) {
+  i;
+  return __kmps_nested;
 } // __kmps_get_nested
 
 static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE;
 
-void __kmps_set_stacksize( int arg ) {
-    i;
-    __kmps_stacksize = arg;
+void __kmps_set_stacksize(int arg) {
+  i;
+  __kmps_stacksize = arg;
 } // __kmps_set_stacksize
 
-int __kmps_get_stacksize( void ) {
-    i;
-    return __kmps_stacksize;
+int __kmps_get_stacksize(void) {
+  i;
+  return __kmps_stacksize;
 } // __kmps_get_stacksize
 
-static kmp_sched_t __kmps_sched_kind     = kmp_sched_default;
-static int         __kmps_sched_modifier = 0;
+static kmp_sched_t __kmps_sched_kind = kmp_sched_default;
+static int __kmps_sched_modifier = 0;
 
-    void __kmps_set_schedule( kmp_sched_t kind, int modifier ) {
-        i;
-        __kmps_sched_kind     = kind;
-        __kmps_sched_modifier = modifier;
-    } // __kmps_set_schedule
-
-    void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) {
-        i;
-        *kind     = __kmps_sched_kind;
-        *modifier = __kmps_sched_modifier;
-    } // __kmps_get_schedule
+void __kmps_set_schedule(kmp_sched_t kind, int modifier) {
+  i;
+  __kmps_sched_kind = kind;
+  __kmps_sched_modifier = modifier;
+} // __kmps_set_schedule
+
+void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) {
+  i;
+  *kind = __kmps_sched_kind;
+  *modifier = __kmps_sched_modifier;
+} // __kmps_get_schedule
 
 #if OMP_40_ENABLED
 
 static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false;
 
-void __kmps_set_proc_bind( kmp_proc_bind_t arg ) {
-    i;
-    __kmps_proc_bind = arg;
+void __kmps_set_proc_bind(kmp_proc_bind_t arg) {
+  i;
+  __kmps_proc_bind = arg;
 } // __kmps_set_proc_bind
 
-kmp_proc_bind_t __kmps_get_proc_bind( void ) {
-    i;
-    return __kmps_proc_bind;
+kmp_proc_bind_t __kmps_get_proc_bind(void) {
+  i;
+  return __kmps_proc_bind;
 } // __kmps_get_proc_bind
 
 #endif /* OMP_40_ENABLED */
 
-double __kmps_get_wtime( void ) {
-    // Elapsed wall clock time (in second) from "sometime in the past".
-    double wtime = 0.0;
-    i;
-    #if KMP_OS_WINDOWS
-        if ( frequency > 0.0 ) {
-            LARGE_INTEGER now;
-            BOOL status = QueryPerformanceCounter( & now );
-            if ( status ) {
-                wtime = double( now.QuadPart ) / frequency;
-            }; // if
-        }; // if
-    #else
-        // gettimeofday() returns seconds and microseconds since the Epoch.
-        struct timeval  tval;
-        int             rc;
-        rc = gettimeofday( & tval, NULL );
-        if ( rc == 0 ) {
-            wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec );
-        } else {
-            // TODO: Assert or abort here.
-        }; // if
-    #endif
-    return wtime;
+double __kmps_get_wtime(void) {
+  // Elapsed wall clock time (in second) from "sometime in the past".
+  double wtime = 0.0;
+  i;
+#if KMP_OS_WINDOWS
+  if (frequency > 0.0) {
+    LARGE_INTEGER now;
+    BOOL status = QueryPerformanceCounter(&now);
+    if (status) {
+      wtime = double(now.QuadPart) / frequency;
+    }; // if
+  }; // if
+#else
+  // gettimeofday() returns seconds and microseconds since the Epoch.
+  struct timeval tval;
+  int rc;
+  rc = gettimeofday(&tval, NULL);
+  if (rc == 0) {
+    wtime = (double)(tval.tv_sec) + 1.0E-06 * (double)(tval.tv_usec);
+  } else {
+    // TODO: Assert or abort here.
+  }; // if
+#endif
+  return wtime;
 }; // __kmps_get_wtime
 
-double __kmps_get_wtick( void ) {
-    // Number of seconds between successive clock ticks.
-    double wtick = 0.0;
-    i;
-    #if KMP_OS_WINDOWS
-        {
-            DWORD increment;
-            DWORD adjustment;
-            BOOL  disabled;
-            BOOL  rc;
-            rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled );
-            if ( rc ) {
-                wtick = 1.0E-07 * (double)( disabled ? increment : adjustment );
-            } else {
-                // TODO: Assert or abort here.
-                wtick = 1.0E-03;
-            }; // if
-        }
-    #else
-        // TODO: gettimeofday() returns in microseconds, but what the precision?
-        wtick = 1.0E-06;
-    #endif
-    return wtick;
+double __kmps_get_wtick(void) {
+  // Number of seconds between successive clock ticks.
+  double wtick = 0.0;
+  i;
+#if KMP_OS_WINDOWS
+  {
+    DWORD increment;
+    DWORD adjustment;
+    BOOL disabled;
+    BOOL rc;
+    rc = GetSystemTimeAdjustment(&adjustment, &increment, &disabled);
+    if (rc) {
+      wtick = 1.0E-07 * (double)(disabled ? increment : adjustment);
+    } else {
+      // TODO: Assert or abort here.
+      wtick = 1.0E-03;
+    }; // if
+  }
+#else
+  // TODO: gettimeofday() returns in microseconds, but what the precision?
+  wtick = 1.0E-06;
+#endif
+  return wtick;
 }; // __kmps_get_wtick
 
 // end of file //
-

Modified: openmp/trunk/runtime/src/kmp_stub.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_stub.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_stub.h (original)
+++ openmp/trunk/runtime/src/kmp_stub.h Fri May 12 13:01:32 2017
@@ -17,43 +17,43 @@
 #define KMP_STUB_H
 
 #ifdef __cplusplus
-    extern "C" {
+extern "C" {
 #endif // __cplusplus
 
-void __kmps_set_blocktime( int arg );
-int  __kmps_get_blocktime( void );
-void __kmps_set_dynamic( int arg );
-int  __kmps_get_dynamic( void );
-void __kmps_set_library( int arg );
-int  __kmps_get_library( void );
-void __kmps_set_nested( int arg );
-int  __kmps_get_nested( void );
-void __kmps_set_stacksize( int arg );
-int  __kmps_get_stacksize();
+void __kmps_set_blocktime(int arg);
+int __kmps_get_blocktime(void);
+void __kmps_set_dynamic(int arg);
+int __kmps_get_dynamic(void);
+void __kmps_set_library(int arg);
+int __kmps_get_library(void);
+void __kmps_set_nested(int arg);
+int __kmps_get_nested(void);
+void __kmps_set_stacksize(int arg);
+int __kmps_get_stacksize();
 
 #ifndef KMP_SCHED_TYPE_DEFINED
 #define KMP_SCHED_TYPE_DEFINED
 typedef enum kmp_sched {
-    kmp_sched_static            = 1, // mapped to kmp_sch_static_chunked           (33)
-    kmp_sched_dynamic           = 2, // mapped to kmp_sch_dynamic_chunked          (35)
-    kmp_sched_guided            = 3, // mapped to kmp_sch_guided_chunked           (36)
-    kmp_sched_auto              = 4, // mapped to kmp_sch_auto                     (38)
-    kmp_sched_default = kmp_sched_static   // default scheduling
+  kmp_sched_static = 1, // mapped to kmp_sch_static_chunked           (33)
+  kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked          (35)
+  kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked           (36)
+  kmp_sched_auto = 4, // mapped to kmp_sch_auto                     (38)
+  kmp_sched_default = kmp_sched_static // default scheduling
 } kmp_sched_t;
 #endif
-void __kmps_set_schedule( kmp_sched_t kind, int modifier );
-void __kmps_get_schedule( kmp_sched_t *kind, int *modifier );
+void __kmps_set_schedule(kmp_sched_t kind, int modifier);
+void __kmps_get_schedule(kmp_sched_t *kind, int *modifier);
 
 #if OMP_40_ENABLED
-void __kmps_set_proc_bind( kmp_proc_bind_t arg );
-kmp_proc_bind_t __kmps_get_proc_bind( void );
+void __kmps_set_proc_bind(kmp_proc_bind_t arg);
+kmp_proc_bind_t __kmps_get_proc_bind(void);
 #endif /* OMP_40_ENABLED */
 
 double __kmps_get_wtime();
 double __kmps_get_wtick();
 
 #ifdef __cplusplus
-    } // extern "C"
+} // extern "C"
 #endif // __cplusplus
 
 #endif // KMP_STUB_H

Modified: openmp/trunk/runtime/src/kmp_taskdeps.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_taskdeps.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_taskdeps.cpp (original)
+++ openmp/trunk/runtime/src/kmp_taskdeps.cpp Fri May 12 13:01:32 2017
@@ -21,511 +21,543 @@
 
 #if OMP_40_ENABLED
 
-//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries?
-//TODO: don't use atomic ref counters for stack-allocated nodes.
-//TODO: find an alternate to atomic refs for heap-allocated nodes?
-//TODO: Finish graph output support
-//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks
-//TODO: Any ITT support needed?
+// TODO: Improve memory allocation? keep a list of pre-allocated structures?
+// allocate in blocks? re-use list finished list entries?
+// TODO: don't use atomic ref counters for stack-allocated nodes.
+// TODO: find an alternate to atomic refs for heap-allocated nodes?
+// TODO: Finish graph output support
+// TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
+// runtime locks
+// TODO: Any ITT support needed?
 
 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
 static kmp_int32 kmp_node_id_seed = 0;
 #endif
 
-static void
-__kmp_init_node ( kmp_depnode_t *node )
-{
-    node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed
-    node->dn.successors = NULL;
-    __kmp_init_lock(&node->dn.lock);
-    node->dn.nrefs = 1; // init creates the first reference to the node
+static void __kmp_init_node(kmp_depnode_t *node) {
+  node->dn.task = NULL; // set to null initially, it will point to the right
+  // task once dependences have been processed
+  node->dn.successors = NULL;
+  __kmp_init_lock(&node->dn.lock);
+  node->dn.nrefs = 1; // init creates the first reference to the node
 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
-    node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
+  node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
 #endif
 }
 
-static inline kmp_depnode_t *
-__kmp_node_ref ( kmp_depnode_t *node )
-{
-    KMP_TEST_THEN_INC32(&node->dn.nrefs);
-    return node;
+static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
+  KMP_TEST_THEN_INC32(&node->dn.nrefs);
+  return node;
 }
 
-static inline void
-__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node )
-{
-    if (!node) return;
+static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) {
+  if (!node)
+    return;
 
-    kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
-    if ( n == 0 ) {
-        KMP_ASSERT(node->dn.nrefs == 0);
+  kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1;
+  if (n == 0) {
+    KMP_ASSERT(node->dn.nrefs == 0);
 #if USE_FAST_MEMORY
-        __kmp_fast_free(thread,node);
+    __kmp_fast_free(thread, node);
 #else
-        __kmp_thread_free(thread,node);
+    __kmp_thread_free(thread, node);
 #endif
-    }
+  }
 }
 
-#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid))
-#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid))
+#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid))
+#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid))
 
-static void
-__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list );
+static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list);
 
-enum {
-    KMP_DEPHASH_OTHER_SIZE = 97,
-    KMP_DEPHASH_MASTER_SIZE = 997
-};
+enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
 
-static inline kmp_int32
-__kmp_dephash_hash ( kmp_intptr_t addr, size_t hsize )
-{
-    //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets );
-    return ((addr >> 6) ^ (addr >> 2)) % hsize;
-}
-
-static kmp_dephash_t *
-__kmp_dephash_create ( kmp_info_t *thread, kmp_taskdata_t *current_task )
-{
-    kmp_dephash_t *h;
-
-    size_t h_size;
-
-    if ( current_task->td_flags.tasktype == TASK_IMPLICIT )
-        h_size = KMP_DEPHASH_MASTER_SIZE;
-    else
-        h_size = KMP_DEPHASH_OTHER_SIZE;
+static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
+  // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
+  // m_num_sets );
+  return ((addr >> 6) ^ (addr >> 2)) % hsize;
+}
+
+static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
+                                           kmp_taskdata_t *current_task) {
+  kmp_dephash_t *h;
 
-    kmp_int32 size =
-        h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
+  size_t h_size;
+
+  if (current_task->td_flags.tasktype == TASK_IMPLICIT)
+    h_size = KMP_DEPHASH_MASTER_SIZE;
+  else
+    h_size = KMP_DEPHASH_OTHER_SIZE;
+
+  kmp_int32 size =
+      h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
 
 #if USE_FAST_MEMORY
-    h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size );
+  h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
 #else
-    h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size );
+  h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
 #endif
-    h->size = h_size;
+  h->size = h_size;
 
 #ifdef KMP_DEBUG
-    h->nelements = 0;
-    h->nconflicts = 0;
+  h->nelements = 0;
+  h->nconflicts = 0;
 #endif
-    h->buckets = (kmp_dephash_entry **)(h+1);
+  h->buckets = (kmp_dephash_entry **)(h + 1);
 
-    for ( size_t i = 0; i < h_size; i++ )
-        h->buckets[i] = 0;
+  for (size_t i = 0; i < h_size; i++)
+    h->buckets[i] = 0;
 
-    return h;
-}
-
-void
-__kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h)
-{
-    for (size_t i = 0; i < h->size; i++) {
-        if (h->buckets[i]) {
-            kmp_dephash_entry_t *next;
-            for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
-                next = entry->next_in_bucket;
-                __kmp_depnode_list_free(thread,entry->last_ins);
-                __kmp_node_deref(thread,entry->last_out);
+  return h;
+}
+
+void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) {
+  for (size_t i = 0; i < h->size; i++) {
+    if (h->buckets[i]) {
+      kmp_dephash_entry_t *next;
+      for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) {
+        next = entry->next_in_bucket;
+        __kmp_depnode_list_free(thread, entry->last_ins);
+        __kmp_node_deref(thread, entry->last_out);
 #if USE_FAST_MEMORY
-                __kmp_fast_free(thread,entry);
+        __kmp_fast_free(thread, entry);
 #else
-                __kmp_thread_free(thread,entry);
+        __kmp_thread_free(thread, entry);
 #endif
-            }
-            h->buckets[i] = 0;
-        }
+      }
+      h->buckets[i] = 0;
     }
+  }
 }
 
-void
-__kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h)
-{
-    __kmp_dephash_free_entries(thread, h);
+void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) {
+  __kmp_dephash_free_entries(thread, h);
 #if USE_FAST_MEMORY
-    __kmp_fast_free(thread,h);
+  __kmp_fast_free(thread, h);
 #else
-    __kmp_thread_free(thread,h);
+  __kmp_thread_free(thread, h);
 #endif
 }
 
 static kmp_dephash_entry *
-__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr )
-{
-    kmp_int32 bucket = __kmp_dephash_hash(addr,h->size);
-
-    kmp_dephash_entry_t *entry;
-    for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket )
-        if ( entry->addr == addr ) break;
+__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) {
+  kmp_int32 bucket = __kmp_dephash_hash(addr, h->size);
+
+  kmp_dephash_entry_t *entry;
+  for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
+    if (entry->addr == addr)
+      break;
 
-    if ( entry == NULL ) {
-        // create entry. This is only done by one thread so no locking required
+  if (entry == NULL) {
+// create entry. This is only done by one thread so no locking required
 #if USE_FAST_MEMORY
-        entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) );
+    entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
+        thread, sizeof(kmp_dephash_entry_t));
 #else
-        entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) );
+    entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
+        thread, sizeof(kmp_dephash_entry_t));
 #endif
-        entry->addr = addr;
-        entry->last_out = NULL;
-        entry->last_ins = NULL;
-        entry->next_in_bucket = h->buckets[bucket];
-        h->buckets[bucket] = entry;
+    entry->addr = addr;
+    entry->last_out = NULL;
+    entry->last_ins = NULL;
+    entry->next_in_bucket = h->buckets[bucket];
+    h->buckets[bucket] = entry;
 #ifdef KMP_DEBUG
-        h->nelements++;
-        if ( entry->next_in_bucket ) h->nconflicts++;
-#endif
-    }
-    return entry;
+    h->nelements++;
+    if (entry->next_in_bucket)
+      h->nconflicts++;
+#endif
+  }
+  return entry;
 }
 
-static kmp_depnode_list_t *
-__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node )
-{
-    kmp_depnode_list_t *new_head;
+static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
+                                          kmp_depnode_list_t *list,
+                                          kmp_depnode_t *node) {
+  kmp_depnode_list_t *new_head;
 
 #if USE_FAST_MEMORY
-    new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t));
+  new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
+      thread, sizeof(kmp_depnode_list_t));
 #else
-    new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t));
+  new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
+      thread, sizeof(kmp_depnode_list_t));
 #endif
 
-    new_head->node = __kmp_node_ref(node);
-    new_head->next = list;
+  new_head->node = __kmp_node_ref(node);
+  new_head->next = list;
 
-    return new_head;
+  return new_head;
 }
 
-static void
-__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list )
-{
-    kmp_depnode_list *next;
+static void __kmp_depnode_list_free(kmp_info_t *thread,
+                                    kmp_depnode_list *list) {
+  kmp_depnode_list *next;
 
-    for ( ; list ; list = next ) {
-        next = list->next;
+  for (; list; list = next) {
+    next = list->next;
 
-        __kmp_node_deref(thread,list->node);
+    __kmp_node_deref(thread, list->node);
 #if USE_FAST_MEMORY
-        __kmp_fast_free(thread,list);
+    __kmp_fast_free(thread, list);
 #else
-        __kmp_thread_free(thread,list);
+    __kmp_thread_free(thread, list);
 #endif
-    }
+  }
 }
 
-static inline void
-__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink,
-                         kmp_task_t *sink_task )
-{
+static inline void __kmp_track_dependence(kmp_depnode_t *source,
+                                          kmp_depnode_t *sink,
+                                          kmp_task_t *sink_task) {
 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
-    kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
-    // do not use sink->dn.task as that is only filled after the dependencies
-    // are already processed!
-    kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task);
-
-    __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource);
+  kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
+  // do not use sink->dn.task as that is only filled after the dependencies
+  // are already processed!
+  kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
+
+  __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
+               task_source->td_ident->psource, sink->dn.id,
+               task_sink->td_ident->psource);
 #endif
 #if OMPT_SUPPORT && OMPT_TRACE
-    /* OMPT tracks dependences between task (a=source, b=sink) in which
-       task a blocks the execution of b through the ompt_new_dependence_callback */
-    if (ompt_enabled &&
-        ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair))
-    {
-        kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
-        kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task);
-
-        ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)(
-          task_source->ompt_task_info.task_id,
-          task_sink->ompt_task_info.task_id);
-    }
+  // OMPT tracks dependences between task (a=source, b=sink) in which
+  // task a blocks the execution of b through the ompt_new_dependence_callback
+  if (ompt_enabled &&
+      ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) {
+    kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
+    kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
+
+    ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)(
+        task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id);
+  }
 #endif /* OMPT_SUPPORT && OMPT_TRACE */
 }
 
-template< bool filter >
+template <bool filter>
 static inline kmp_int32
-__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
-                     bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list,
-                     kmp_task_t *task )
-{
-    KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) );
-
-    kmp_info_t *thread = __kmp_threads[ gtid ];
-    kmp_int32 npredecessors=0;
-    for ( kmp_int32 i = 0; i < ndeps ; i++ ) {
-        const kmp_depend_info_t * dep = &dep_list[i];
-
-        KMP_DEBUG_ASSERT(dep->flags.in);
-
-        if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries
-
-        kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr);
-        kmp_depnode_t *last_out = info->last_out;
-
-        if ( dep->flags.out && info->last_ins ) {
-            for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) {
-                kmp_depnode_t * indep = p->node;
-                if ( indep->dn.task ) {
-                    KMP_ACQUIRE_DEPNODE(gtid,indep);
-                    if ( indep->dn.task ) {
-                        __kmp_track_dependence(indep,node,task);
-                        indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node);
-                        KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
-                                 filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(task)));
-                        npredecessors++;
-                    }
-                    KMP_RELEASE_DEPNODE(gtid,indep);
-                }
-            }
-
-            __kmp_depnode_list_free(thread,info->last_ins);
-            info->last_ins = NULL;
-
-        } else if ( last_out && last_out->dn.task ) {
-            KMP_ACQUIRE_DEPNODE(gtid,last_out);
-            if ( last_out->dn.task ) {
-                __kmp_track_dependence(last_out,node,task);
-                last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node);
-                KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
-                             filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(task)));
-
-                npredecessors++;
-            }
-            KMP_RELEASE_DEPNODE(gtid,last_out);
+__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
+                   bool dep_barrier, kmp_int32 ndeps,
+                   kmp_depend_info_t *dep_list, kmp_task_t *task) {
+  KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : "
+                "dep_barrier = %d\n",
+                filter, gtid, ndeps, dep_barrier));
+
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_int32 npredecessors = 0;
+  for (kmp_int32 i = 0; i < ndeps; i++) {
+    const kmp_depend_info_t *dep = &dep_list[i];
+
+    KMP_DEBUG_ASSERT(dep->flags.in);
+
+    if (filter && dep->base_addr == 0)
+      continue; // skip filtered entries
+
+    kmp_dephash_entry_t *info =
+        __kmp_dephash_find(thread, hash, dep->base_addr);
+    kmp_depnode_t *last_out = info->last_out;
+
+    if (dep->flags.out && info->last_ins) {
+      for (kmp_depnode_list_t *p = info->last_ins; p; p = p->next) {
+        kmp_depnode_t *indep = p->node;
+        if (indep->dn.task) {
+          KMP_ACQUIRE_DEPNODE(gtid, indep);
+          if (indep->dn.task) {
+            __kmp_track_dependence(indep, node, task);
+            indep->dn.successors =
+                __kmp_add_node(thread, indep->dn.successors, node);
+            KA_TRACE(40, ("__kmp_process_deps<%d>: T#%d adding dependence from "
+                          "%p to %p\n",
+                          filter, gtid, KMP_TASK_TO_TASKDATA(indep->dn.task),
+                          KMP_TASK_TO_TASKDATA(task)));
+            npredecessors++;
+          }
+          KMP_RELEASE_DEPNODE(gtid, indep);
         }
+      }
 
-        if ( dep_barrier ) {
-            // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after
-            // the execution of this task so the previous output nodes can be cleared.
-            __kmp_node_deref(thread,last_out);
-            info->last_out = NULL;
-        } else {
-            if ( dep->flags.out ) {
-                __kmp_node_deref(thread,last_out);
-                info->last_out = __kmp_node_ref(node);
-            } else
-                info->last_ins = __kmp_add_node(thread, info->last_ins, node);
-        }
+      __kmp_depnode_list_free(thread, info->last_ins);
+      info->last_ins = NULL;
 
+    } else if (last_out && last_out->dn.task) {
+      KMP_ACQUIRE_DEPNODE(gtid, last_out);
+      if (last_out->dn.task) {
+        __kmp_track_dependence(last_out, node, task);
+        last_out->dn.successors =
+            __kmp_add_node(thread, last_out->dn.successors, node);
+        KA_TRACE(
+            40,
+            ("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
+             filter, gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task),
+             KMP_TASK_TO_TASKDATA(task)));
+
+        npredecessors++;
+      }
+      KMP_RELEASE_DEPNODE(gtid, last_out);
+    }
+
+    if (dep_barrier) {
+      // if this is a sync point in the serial sequence, then the previous
+      // outputs are guaranteed to be completed after
+      // the execution of this task so the previous output nodes can be cleared.
+      __kmp_node_deref(thread, last_out);
+      info->last_out = NULL;
+    } else {
+      if (dep->flags.out) {
+        __kmp_node_deref(thread, last_out);
+        info->last_out = __kmp_node_ref(node);
+      } else
+        info->last_ins = __kmp_add_node(thread, info->last_ins, node);
     }
+  }
 
-    KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) );
+  KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
+                gtid, npredecessors));
 
-    return npredecessors;
+  return npredecessors;
 }
 
 #define NO_DEP_BARRIER (false)
 #define DEP_BARRIER (true)
 
 // returns true if the task has any outstanding dependence
-static bool
-__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier,
-                   kmp_int32 ndeps, kmp_depend_info_t *dep_list,
-                   kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
-{
-    int i;
+static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
+                             kmp_task_t *task, kmp_dephash_t *hash,
+                             bool dep_barrier, kmp_int32 ndeps,
+                             kmp_depend_info_t *dep_list,
+                             kmp_int32 ndeps_noalias,
+                             kmp_depend_info_t *noalias_dep_list) {
+  int i;
 
 #if KMP_DEBUG
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
 #endif
-    KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) );
-
-    // Filter deps in dep_list
-    // TODO: Different algorithm for large dep_list ( > 10 ? )
-    for ( i = 0; i < ndeps; i ++ ) {
-        if ( dep_list[i].base_addr != 0 )
-            for ( int j = i+1; j < ndeps; j++ )
-                if ( dep_list[i].base_addr == dep_list[j].base_addr ) {
-                    dep_list[i].flags.in |= dep_list[j].flags.in;
-                    dep_list[i].flags.out |= dep_list[j].flags.out;
-                    dep_list[j].base_addr = 0; // Mark j element as void
-                }
-    }
-
-    // doesn't need to be atomic as no other thread is going to be accessing this node just yet
-    // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies
-    node->dn.npredecessors = -1;
-
-    // used to pack all npredecessors additions into a single atomic operation at the end
-    int npredecessors;
-
-    npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
-      ndeps, dep_list, task);
-    npredecessors += __kmp_process_deps<false>(gtid, node, hash, dep_barrier,
-      ndeps_noalias, noalias_dep_list, task);
-
-    node->dn.task = task;
-    KMP_MB();
-
-    // Account for our initial fake value
-    npredecessors++;
-
-    // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences)
-    npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors;
-
-    KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) );
-
-    // beyond this point the task could be queued (and executed) by a releasing task...
-    return npredecessors > 0 ? true : false;
-}
-
-void
-__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
-{
-    kmp_info_t *thread = __kmp_threads[ gtid ];
-    kmp_depnode_t *node = task->td_depnode;
+  KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d "
+                "possibly aliased dependencies, %d non-aliased depedencies : "
+                "dep_barrier=%d .\n",
+                gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
+
+  // Filter deps in dep_list
+  // TODO: Different algorithm for large dep_list ( > 10 ? )
+  for (i = 0; i < ndeps; i++) {
+    if (dep_list[i].base_addr != 0)
+      for (int j = i + 1; j < ndeps; j++)
+        if (dep_list[i].base_addr == dep_list[j].base_addr) {
+          dep_list[i].flags.in |= dep_list[j].flags.in;
+          dep_list[i].flags.out |= dep_list[j].flags.out;
+          dep_list[j].base_addr = 0; // Mark j element as void
+        }
+  }
 
-    if ( task->td_dephash ) {
-        KA_TRACE(40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) );
-        __kmp_dephash_free(thread,task->td_dephash);
-        task->td_dephash = NULL;
+  // doesn't need to be atomic as no other thread is going to be accessing this
+  // node just yet.
+  // npredecessors is set -1 to ensure that none of the releasing tasks queues
+  // this task before we have finished processing all the dependencies
+  node->dn.npredecessors = -1;
+
+  // used to pack all npredecessors additions into a single atomic operation at
+  // the end
+  int npredecessors;
+
+  npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier, ndeps,
+                                           dep_list, task);
+  npredecessors += __kmp_process_deps<false>(
+      gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
+
+  node->dn.task = task;
+  KMP_MB();
+
+  // Account for our initial fake value
+  npredecessors++;
+
+  // Update predecessors and obtain current value to check if there are still
+  // any outstandig dependences (some tasks may have finished while we processed
+  // the dependences)
+  npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) +
+                  npredecessors;
+
+  KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
+                gtid, npredecessors, taskdata));
+
+  // beyond this point the task could be queued (and executed) by a releasing
+  // task...
+  return npredecessors > 0 ? true : false;
+}
+
+void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) {
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_depnode_t *node = task->td_depnode;
+
+  if (task->td_dephash) {
+    KA_TRACE(
+        40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n",
+             gtid, task));
+    __kmp_dephash_free(thread, task->td_dephash);
+    task->td_dephash = NULL;
+  }
+
+  if (!node)
+    return;
+
+  KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n",
+                gtid, task));
+
+  KMP_ACQUIRE_DEPNODE(gtid, node);
+  node->dn.task =
+      NULL; // mark this task as finished, so no new dependencies are generated
+  KMP_RELEASE_DEPNODE(gtid, node);
+
+  kmp_depnode_list_t *next;
+  for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) {
+    kmp_depnode_t *successor = p->node;
+    kmp_int32 npredecessors =
+        KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
+
+    // successor task can be NULL for wait_depends or because deps are still
+    // being processed
+    if (npredecessors == 0) {
+      KMP_MB();
+      if (successor->dn.task) {
+        KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled "
+                      "for execution.\n",
+                      gtid, successor->dn.task, task));
+        __kmp_omp_task(gtid, successor->dn.task, false);
+      }
     }
 
-    if ( !node ) return;
-
-    KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", gtid, task ) );
-
-    KMP_ACQUIRE_DEPNODE(gtid,node);
-    node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated
-    KMP_RELEASE_DEPNODE(gtid,node);
-
-    kmp_depnode_list_t *next;
-    for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) {
-        kmp_depnode_t *successor = p->node;
-        kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1;
-
-        // successor task can be NULL for wait_depends or because deps are still being processed
-        if ( npredecessors == 0 ) {
-            KMP_MB();
-            if ( successor->dn.task ) {
-                KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) );
-                __kmp_omp_task(gtid,successor->dn.task,false);
-            }
-        }
-
-        next = p->next;
-        __kmp_node_deref(thread,p->node);
+    next = p->next;
+    __kmp_node_deref(thread, p->node);
 #if USE_FAST_MEMORY
-        __kmp_fast_free(thread,p);
+    __kmp_fast_free(thread, p);
 #else
-        __kmp_thread_free(thread,p);
+    __kmp_thread_free(thread, p);
 #endif
-    }
+  }
 
-    __kmp_node_deref(thread,node);
+  __kmp_node_deref(thread, node);
 
-    KA_TRACE(20, ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", gtid, task ) );
+  KA_TRACE(
+      20,
+      ("__kmp_release_deps: T#%d all successors of %p notified of completion\n",
+       gtid, task));
 }
 
 /*!
 @ingroup TASKING
 @param loc_ref location of the original task directive
 @param gtid Global Thread ID of encountering thread
- at param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
+ at param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
+task''
 @param ndeps Number of depend items with possible aliasing
 @param dep_list List of depend items with possible aliasing
 @param ndeps_noalias Number of depend items with no aliasing
 @param noalias_dep_list List of depend items with no aliasing
 
- at return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
+ at return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
+suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
 
 Schedule a non-thread-switchable task with dependences for execution
 */
-kmp_int32
-__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task,
-                            kmp_int32 ndeps, kmp_depend_info_t *dep_list,
-                            kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
-{
-
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
-    KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, new_taskdata ) );
+kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
+                                    kmp_task_t *new_task, kmp_int32 ndeps,
+                                    kmp_depend_info_t *dep_list,
+                                    kmp_int32 ndeps_noalias,
+                                    kmp_depend_info_t *noalias_dep_list) {
+
+  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+  KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
+                loc_ref, new_taskdata));
 
-    kmp_info_t *thread = __kmp_threads[ gtid ];
-    kmp_taskdata_t * current_task = thread->th.th_current_task;
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskdata_t *current_task = thread->th.th_current_task;
 
 #if OMPT_SUPPORT && OMPT_TRACE
-    /* OMPT grab all dependences if requested by the tool */
-    if (ompt_enabled && ndeps+ndeps_noalias > 0 &&
-        ompt_callbacks.ompt_callback(ompt_event_task_dependences))
-	{
-        kmp_int32 i;
-
-        new_taskdata->ompt_task_info.ndeps = ndeps+ndeps_noalias;
-        new_taskdata->ompt_task_info.deps = (ompt_task_dependence_t *)
-          KMP_OMPT_DEPS_ALLOC(thread,
-             (ndeps+ndeps_noalias)*sizeof(ompt_task_dependence_t));
-
-        KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
-
-        for (i = 0; i < ndeps; i++)
-        {
-            new_taskdata->ompt_task_info.deps[i].variable_addr =
-              (void*) dep_list[i].base_addr;
-            if (dep_list[i].flags.in && dep_list[i].flags.out)
-                new_taskdata->ompt_task_info.deps[i].dependence_flags =
-                  ompt_task_dependence_type_inout;
-            else if (dep_list[i].flags.out)
-                new_taskdata->ompt_task_info.deps[i].dependence_flags =
-                  ompt_task_dependence_type_out;
-            else if (dep_list[i].flags.in)
-                new_taskdata->ompt_task_info.deps[i].dependence_flags =
-                  ompt_task_dependence_type_in;
-        }
-        for (i = 0; i < ndeps_noalias; i++)
-        {
-            new_taskdata->ompt_task_info.deps[ndeps+i].variable_addr =
-              (void*) noalias_dep_list[i].base_addr;
-            if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
-                new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
-                  ompt_task_dependence_type_inout;
-            else if (noalias_dep_list[i].flags.out)
-                new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
-                  ompt_task_dependence_type_out;
-            else if (noalias_dep_list[i].flags.in)
-                new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
-                  ompt_task_dependence_type_in;
-        }
+  /* OMPT grab all dependences if requested by the tool */
+  if (ompt_enabled && ndeps + ndeps_noalias > 0 &&
+      ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
+    kmp_int32 i;
+
+    new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias;
+    new_taskdata->ompt_task_info.deps =
+        (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC(
+            thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t));
+
+    KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL);
+
+    for (i = 0; i < ndeps; i++) {
+      new_taskdata->ompt_task_info.deps[i].variable_addr =
+          (void *)dep_list[i].base_addr;
+      if (dep_list[i].flags.in && dep_list[i].flags.out)
+        new_taskdata->ompt_task_info.deps[i].dependence_flags =
+            ompt_task_dependence_type_inout;
+      else if (dep_list[i].flags.out)
+        new_taskdata->ompt_task_info.deps[i].dependence_flags =
+            ompt_task_dependence_type_out;
+      else if (dep_list[i].flags.in)
+        new_taskdata->ompt_task_info.deps[i].dependence_flags =
+            ompt_task_dependence_type_in;
+    }
+    for (i = 0; i < ndeps_noalias; i++) {
+      new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr =
+          (void *)noalias_dep_list[i].base_addr;
+      if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
+        new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
+            ompt_task_dependence_type_inout;
+      else if (noalias_dep_list[i].flags.out)
+        new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
+            ompt_task_dependence_type_out;
+      else if (noalias_dep_list[i].flags.in)
+        new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags =
+            ompt_task_dependence_type_in;
     }
+  }
 #endif /* OMPT_SUPPORT && OMPT_TRACE */
 
-    bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
+  bool serial = current_task->td_flags.team_serial ||
+                current_task->td_flags.tasking_ser ||
+                current_task->td_flags.final;
 #if OMP_45_ENABLED
-    kmp_task_team_t * task_team = thread->th.th_task_team;
-    serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks);
+  kmp_task_team_t *task_team = thread->th.th_task_team;
+  serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks);
 #endif
 
-    if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) {
-        /* if no dependencies have been tracked yet, create the dependence hash */
-        if ( current_task->td_dephash == NULL )
-            current_task->td_dephash = __kmp_dephash_create(thread, current_task);
+  if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
+    /* if no dependencies have been tracked yet, create the dependence hash */
+    if (current_task->td_dephash == NULL)
+      current_task->td_dephash = __kmp_dephash_create(thread, current_task);
 
 #if USE_FAST_MEMORY
-        kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t));
+    kmp_depnode_t *node =
+        (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
 #else
-        kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t));
+    kmp_depnode_t *node =
+        (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
 #endif
 
-        __kmp_init_node(node);
-        new_taskdata->td_depnode = node;
+    __kmp_init_node(node);
+    new_taskdata->td_depnode = node;
 
-        if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER,
-                               ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) {
-            KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: "
-                  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
-                  new_taskdata ) );
-            return TASK_CURRENT_NOT_QUEUED;
-        }
-    } else {
-        KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)"
-                      "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) );
-    }
-
-    KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : "
-                  "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref,
-                  new_taskdata ) );
+    if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash,
+                         NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
+                         noalias_dep_list)) {
+      KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
+                    "dependencies: "
+                    "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
+                    gtid, loc_ref, new_taskdata));
+      return TASK_CURRENT_NOT_QUEUED;
+    }
+  } else {
+    KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies "
+                  "for task (serialized)"
+                  "loc=%p task=%p\n",
+                  gtid, loc_ref, new_taskdata));
+  }
+
+  KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
+                "dependencies : "
+                "loc=%p task=%p, transferring to __kmpc_omp_task\n",
+                gtid, loc_ref, new_taskdata));
 
-    return __kmpc_omp_task(loc_ref,gtid,new_task);
+  return __kmpc_omp_task(loc_ref, gtid, new_task);
 }
 
 /*!
@@ -539,55 +571,64 @@ __kmpc_omp_task_with_deps( ident_t *loc_
 
 Blocks the current task until all specifies dependencies have been fulfilled.
 */
-void
-__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
-                       kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list )
-{
-    KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) );
-
-    if ( ndeps == 0 && ndeps_noalias == 0 ) {
-        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) );
-        return;
-    }
-
-    kmp_info_t *thread = __kmp_threads[ gtid ];
-    kmp_taskdata_t * current_task = thread->th.th_current_task;
-
-    // We can return immediately as:
-    //   - dependences are not computed in serial teams (except if we have proxy tasks)
-    //   - if the dephash is not yet created it means we have nothing to wait for
-    bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final;
+void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
+                          kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
+                          kmp_depend_info_t *noalias_dep_list) {
+  KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref));
+
+  if (ndeps == 0 && ndeps_noalias == 0) {
+    KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to "
+                  "wait upon : loc=%p\n",
+                  gtid, loc_ref));
+    return;
+  }
+
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskdata_t *current_task = thread->th.th_current_task;
+
+  // We can return immediately as:
+  // - dependences are not computed in serial teams (except with proxy tasks)
+  // - if the dephash is not yet created it means we have nothing to wait for
+  bool ignore = current_task->td_flags.team_serial ||
+                current_task->td_flags.tasking_ser ||
+                current_task->td_flags.final;
 #if OMP_45_ENABLED
-    ignore = ignore && thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
+  ignore = ignore && thread->th.th_task_team != NULL &&
+           thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE;
 #endif
-    ignore = ignore || current_task->td_dephash == NULL;
-
-    if ( ignore ) {
-        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
-        return;
-    }
-
-    kmp_depnode_t node;
-    __kmp_init_node(&node);
-
-    if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER,
-                           ndeps, dep_list, ndeps_noalias, noalias_dep_list )) {
-        KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) );
-        return;
-    }
+  ignore = ignore || current_task->td_dephash == NULL;
 
-    int thread_finished = FALSE;
-    kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U);
-    while ( node.dn.npredecessors > 0 ) {
-        flag.execute_tasks(thread, gtid, FALSE, &thread_finished,
+  if (ignore) {
+    KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
+                  "dependencies : loc=%p\n",
+                  gtid, loc_ref));
+    return;
+  }
+
+  kmp_depnode_t node;
+  __kmp_init_node(&node);
+
+  if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash,
+                        DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
+                        noalias_dep_list)) {
+    KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking "
+                  "dependencies : loc=%p\n",
+                  gtid, loc_ref));
+    return;
+  }
+
+  int thread_finished = FALSE;
+  kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U);
+  while (node.dn.npredecessors > 0) {
+    flag.execute_tasks(thread, gtid, FALSE, &thread_finished,
 #if USE_ITT_BUILD
-                           NULL,
+                       NULL,
 #endif
-                           __kmp_task_stealing_constraint );
-    }
+                       __kmp_task_stealing_constraint);
+  }
 
-    KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) );
+  KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n",
+                gtid, loc_ref));
 }
 
 #endif /* OMP_40_ENABLED */
-

Modified: openmp/trunk/runtime/src/kmp_tasking.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_tasking.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_tasking.cpp (original)
+++ openmp/trunk/runtime/src/kmp_tasking.cpp Fri May 12 13:01:32 2017
@@ -16,8 +16,8 @@
 #include "kmp.h"
 #include "kmp_i18n.h"
 #include "kmp_itt.h"
-#include "kmp_wait_release.h"
 #include "kmp_stats.h"
+#include "kmp_wait_release.h"
 
 #if OMPT_SUPPORT
 #include "ompt-specific.h"
@@ -25,1608 +25,1625 @@
 
 #include "tsan_annotations.h"
 
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-
 /* forward declaration */
-static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
-static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
-static int  __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
+static void __kmp_enable_tasking(kmp_task_team_t *task_team,
+                                 kmp_info_t *this_thr);
+static void __kmp_alloc_task_deque(kmp_info_t *thread,
+                                   kmp_thread_data_t *thread_data);
+static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
+                                           kmp_task_team_t *task_team);
 
 #ifdef OMP_45_ENABLED
-static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask );
+static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
 #endif
 
 #ifdef BUILD_TIED_TASK_STACK
 
-//---------------------------------------------------------------------------
 //  __kmp_trace_task_stack: print the tied tasks from the task stack in order
-//     from top do bottom
+//  from top do bottom
 //
 //  gtid: global thread identifier for thread containing stack
 //  thread_data: thread data for task team thread containing stack
 //  threshold: value above which the trace statement triggers
 //  location: string identifying call site of this function (for trace)
+static void __kmp_trace_task_stack(kmp_int32 gtid,
+                                   kmp_thread_data_t *thread_data,
+                                   int threshold, char *location) {
+  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+  kmp_taskdata_t **stack_top = task_stack->ts_top;
+  kmp_int32 entries = task_stack->ts_entries;
+  kmp_taskdata_t *tied_task;
+
+  KA_TRACE(
+      threshold,
+      ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
+       "first_block = %p, stack_top = %p \n",
+       location, gtid, entries, task_stack->ts_first_block, stack_top));
 
-static void
-__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
-{
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
-    kmp_taskdata_t **stack_top = task_stack -> ts_top;
-    kmp_int32 entries = task_stack -> ts_entries;
-    kmp_taskdata_t *tied_task;
-
-    KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
-                         "first_block = %p, stack_top = %p \n",
-                         location, gtid, entries, task_stack->ts_first_block, stack_top ) );
-
-    KMP_DEBUG_ASSERT( stack_top != NULL );
-    KMP_DEBUG_ASSERT( entries > 0 );
-
-    while ( entries != 0 )
-    {
-        KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
-        // fix up ts_top if we need to pop from previous block
-        if ( entries & TASK_STACK_INDEX_MASK == 0 )
-        {
-            kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
+  KMP_DEBUG_ASSERT(stack_top != NULL);
+  KMP_DEBUG_ASSERT(entries > 0);
 
-            stack_block = stack_block -> sb_prev;
-            stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
-        }
+  while (entries != 0) {
+    KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
+    // fix up ts_top if we need to pop from previous block
+    if (entries & TASK_STACK_INDEX_MASK == 0) {
+      kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
 
-        // finish bookkeeping
-        stack_top--;
-        entries--;
+      stack_block = stack_block->sb_prev;
+      stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
+    }
 
-        tied_task = * stack_top;
+    // finish bookkeeping
+    stack_top--;
+    entries--;
 
-        KMP_DEBUG_ASSERT( tied_task != NULL );
-        KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
+    tied_task = *stack_top;
 
-        KA_TRACE(threshold, ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
-                             "stack_top=%p, tied_task=%p\n",
-                             location, gtid, entries, stack_top, tied_task ) );
-    }
-    KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
+    KMP_DEBUG_ASSERT(tied_task != NULL);
+    KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
 
-    KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
-                         location, gtid ) );
+    KA_TRACE(threshold,
+             ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
+              "stack_top=%p, tied_task=%p\n",
+              location, gtid, entries, stack_top, tied_task));
+  }
+  KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
+
+  KA_TRACE(threshold,
+           ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
+            location, gtid));
 }
 
-//---------------------------------------------------------------------------
 //  __kmp_init_task_stack: initialize the task stack for the first time
-//    after a thread_data structure is created.
-//    It should not be necessary to do this again (assuming the stack works).
+//  after a thread_data structure is created.
+//  It should not be necessary to do this again (assuming the stack works).
 //
 //  gtid: global thread identifier of calling thread
 //  thread_data: thread data for task team thread containing stack
-
-static void
-__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
-{
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
-    kmp_stack_block_t *first_block;
-
-    // set up the first block of the stack
-    first_block = & task_stack -> ts_first_block;
-    task_stack -> ts_top = (kmp_taskdata_t **) first_block;
-    memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
-
-    // initialize the stack to be empty
-    task_stack  -> ts_entries = TASK_STACK_EMPTY;
-    first_block -> sb_next = NULL;
-    first_block -> sb_prev = NULL;
+static void __kmp_init_task_stack(kmp_int32 gtid,
+                                  kmp_thread_data_t *thread_data) {
+  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+  kmp_stack_block_t *first_block;
+
+  // set up the first block of the stack
+  first_block = &task_stack->ts_first_block;
+  task_stack->ts_top = (kmp_taskdata_t **)first_block;
+  memset((void *)first_block, '\0',
+         TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
+
+  // initialize the stack to be empty
+  task_stack->ts_entries = TASK_STACK_EMPTY;
+  first_block->sb_next = NULL;
+  first_block->sb_prev = NULL;
 }
 
-
-//---------------------------------------------------------------------------
 //  __kmp_free_task_stack: free the task stack when thread_data is destroyed.
 //
 //  gtid: global thread identifier for calling thread
 //  thread_data: thread info for thread containing stack
-
-static void
-__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
-{
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
-    kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
-
-    KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
-    // free from the second block of the stack
-    while ( stack_block != NULL ) {
-        kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
-
-        stack_block -> sb_next = NULL;
-        stack_block -> sb_prev = NULL;
-        if (stack_block != & task_stack -> ts_first_block) {
-            __kmp_thread_free( thread, stack_block );  // free the block, if not the first
-        }
-        stack_block = next_block;
-    }
-    // initialize the stack to be empty
-    task_stack -> ts_entries = 0;
-    task_stack -> ts_top = NULL;
+static void __kmp_free_task_stack(kmp_int32 gtid,
+                                  kmp_thread_data_t *thread_data) {
+  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+  kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
+
+  KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
+  // free from the second block of the stack
+  while (stack_block != NULL) {
+    kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
+
+    stack_block->sb_next = NULL;
+    stack_block->sb_prev = NULL;
+    if (stack_block != &task_stack->ts_first_block) {
+      __kmp_thread_free(thread,
+                        stack_block); // free the block, if not the first
+    }
+    stack_block = next_block;
+  }
+  // initialize the stack to be empty
+  task_stack->ts_entries = 0;
+  task_stack->ts_top = NULL;
 }
 
-
-//---------------------------------------------------------------------------
 //  __kmp_push_task_stack: Push the tied task onto the task stack.
 //     Grow the stack if necessary by allocating another block.
 //
 //  gtid: global thread identifier for calling thread
 //  thread: thread info for thread containing stack
 //  tied_task: the task to push on the stack
-
-static void
-__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
-{
-    // GEH - need to consider what to do if tt_threads_data not allocated yet
-    kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
-                                        tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
-
-    if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
-        return;  // Don't push anything on stack if team or team tasks are serialized
-    }
-
-    KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
-    KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
-
-    KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
-                  gtid, thread, tied_task ) );
-    // Store entry
-    * (task_stack -> ts_top) = tied_task;
-
-    // Do bookkeeping for next push
-    task_stack -> ts_top++;
-    task_stack -> ts_entries++;
-
-    if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
-    {
-        // Find beginning of this task block
-        kmp_stack_block_t *stack_block =
-             (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
-
-        // Check if we already have a block
-        if ( stack_block -> sb_next != NULL )
-        {    // reset ts_top to beginning of next block
-            task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
-        }
-        else
-        {   // Alloc new block and link it up
-            kmp_stack_block_t *new_block = (kmp_stack_block_t *)
-              __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
-
-            task_stack -> ts_top  = & new_block -> sb_block[0];
-            stack_block -> sb_next = new_block;
-            new_block  -> sb_prev = stack_block;
-            new_block  -> sb_next = NULL;
-
-            KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
-                          gtid, tied_task, new_block ) );
-        }
-    }
-    KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
+static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
+                                  kmp_taskdata_t *tied_task) {
+  // GEH - need to consider what to do if tt_threads_data not allocated yet
+  kmp_thread_data_t *thread_data =
+      &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
+  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+
+  if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
+    return; // Don't push anything on stack if team or team tasks are serialized
+  }
+
+  KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
+  KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
+
+  KA_TRACE(20,
+           ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
+            gtid, thread, tied_task));
+  // Store entry
+  *(task_stack->ts_top) = tied_task;
+
+  // Do bookkeeping for next push
+  task_stack->ts_top++;
+  task_stack->ts_entries++;
+
+  if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
+    // Find beginning of this task block
+    kmp_stack_block_t *stack_block =
+        (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
+
+    // Check if we already have a block
+    if (stack_block->sb_next !=
+        NULL) { // reset ts_top to beginning of next block
+      task_stack->ts_top = &stack_block->sb_next->sb_block[0];
+    } else { // Alloc new block and link it up
+      kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
+          thread, sizeof(kmp_stack_block_t));
+
+      task_stack->ts_top = &new_block->sb_block[0];
+      stack_block->sb_next = new_block;
+      new_block->sb_prev = stack_block;
+      new_block->sb_next = NULL;
+
+      KA_TRACE(
+          30,
+          ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
+           gtid, tied_task, new_block));
+    }
+  }
+  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
+                tied_task));
 }
 
-//---------------------------------------------------------------------------
 //  __kmp_pop_task_stack: Pop the tied task from the task stack.  Don't return
-//     the task, just check to make sure it matches the ending task passed in.
+//  the task, just check to make sure it matches the ending task passed in.
 //
 //  gtid: global thread identifier for the calling thread
 //  thread: thread info structure containing stack
 //  tied_task: the task popped off the stack
 //  ending_task: the task that is ending (should match popped task)
+static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
+                                 kmp_taskdata_t *ending_task) {
+  // GEH - need to consider what to do if tt_threads_data not allocated yet
+  kmp_thread_data_t *thread_data =
+      &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
+  kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
+  kmp_taskdata_t *tied_task;
 
-static void
-__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
-{
-    // GEH - need to consider what to do if tt_threads_data not allocated yet
-    kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
-    kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
-    kmp_taskdata_t *tied_task;
-
-    if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
-        return;  // Don't pop anything from stack if team or team tasks are serialized
-    }
+  if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
+    // Don't pop anything from stack if team or team tasks are serialized
+    return;
+  }
 
-    KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
-    KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
+  KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
+  KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
 
-    KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
+  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
+                thread));
 
-    // fix up ts_top if we need to pop from previous block
-    if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
-    {
-        kmp_stack_block_t *stack_block =
-           (kmp_stack_block_t *) (task_stack -> ts_top) ;
+  // fix up ts_top if we need to pop from previous block
+  if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
+    kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
 
-        stack_block = stack_block -> sb_prev;
-        task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
-    }
+    stack_block = stack_block->sb_prev;
+    task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
+  }
 
-    // finish bookkeeping
-    task_stack -> ts_top--;
-    task_stack -> ts_entries--;
+  // finish bookkeeping
+  task_stack->ts_top--;
+  task_stack->ts_entries--;
 
-    tied_task = * (task_stack -> ts_top );
+  tied_task = *(task_stack->ts_top);
 
-    KMP_DEBUG_ASSERT( tied_task != NULL );
-    KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
-    KMP_DEBUG_ASSERT( tied_task == ending_task );  // If we built the stack correctly
+  KMP_DEBUG_ASSERT(tied_task != NULL);
+  KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
+  KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly
 
-    KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
-    return;
+  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
+                tied_task));
+  return;
 }
 #endif /* BUILD_TIED_TASK_STACK */
 
-//---------------------------------------------------
 //  __kmp_push_task: Add a task to the thread's deque
-
-static kmp_int32
-__kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
-{
-    kmp_info_t *        thread = __kmp_threads[ gtid ];
-    kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task);
-    kmp_task_team_t *   task_team = thread->th.th_task_team;
-    kmp_int32           tid = __kmp_tid_from_gtid( gtid );
-    kmp_thread_data_t * thread_data;
-
-    KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
-
-    if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
-        // untied task needs to increment counter so that the task structure is not freed prematurely
-        kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
-        KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
-                       gtid, counter, taskdata ) );
-    }
-
-    // The first check avoids building task_team thread data if serialized
-    if ( taskdata->td_flags.task_serial ) {
-        KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
-                       gtid, taskdata ) );
-        return TASK_NOT_PUSHED;
-    }
-
-    // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    if ( ! KMP_TASKING_ENABLED(task_team) ) {
-         __kmp_enable_tasking( task_team, thread );
-    }
-    KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
-    KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
-
-    // Find tasking deque specific to encountering thread
-    thread_data = & task_team -> tt.tt_threads_data[ tid ];
-
-    // No lock needed since only owner can allocate
-    if (thread_data -> td.td_deque == NULL ) {
-        __kmp_alloc_task_deque( thread, thread_data );
-    }
-
-    // Check if deque is full
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
-    {
-        KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
-                       gtid, taskdata ) );
-        return TASK_NOT_PUSHED;
-    }
-
-    // Lock the deque for the task push operation
-    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
-
-#if OMP_45_ENABLED
-    // Need to recheck as we can get a proxy task from a thread outside of OpenMP
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
-    {
-        __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
-        KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n",
-                       gtid, taskdata ) );
-        return TASK_NOT_PUSHED;
-    }
+static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_task_team_t *task_team = thread->th.th_task_team;
+  kmp_int32 tid = __kmp_tid_from_gtid(gtid);
+  kmp_thread_data_t *thread_data;
+
+  KA_TRACE(20,
+           ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
+
+  if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+    // untied task needs to increment counter so that the task structure is not
+    // freed prematurely
+    kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
+    KA_TRACE(
+        20,
+        ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
+         gtid, counter, taskdata));
+  }
+
+  // The first check avoids building task_team thread data if serialized
+  if (taskdata->td_flags.task_serial) {
+    KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning "
+                  "TASK_NOT_PUSHED for task %p\n",
+                  gtid, taskdata));
+    return TASK_NOT_PUSHED;
+  }
+
+  // Now that serialized tasks have returned, we can assume that we are not in
+  // immediate exec mode
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+  if (!KMP_TASKING_ENABLED(task_team)) {
+    __kmp_enable_tasking(task_team, thread);
+  }
+  KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
+  KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
+
+  // Find tasking deque specific to encountering thread
+  thread_data = &task_team->tt.tt_threads_data[tid];
+
+  // No lock needed since only owner can allocate
+  if (thread_data->td.td_deque == NULL) {
+    __kmp_alloc_task_deque(thread, thread_data);
+  }
+
+  // Check if deque is full
+  if (TCR_4(thread_data->td.td_deque_ntasks) >=
+      TASK_DEQUE_SIZE(thread_data->td)) {
+    KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
+                  "TASK_NOT_PUSHED for task %p\n",
+                  gtid, taskdata));
+    return TASK_NOT_PUSHED;
+  }
+
+  // Lock the deque for the task push operation
+  __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+#if OMP_45_ENABLED
+  // Need to recheck as we can get a proxy task from a thread outside of OpenMP
+  if (TCR_4(thread_data->td.td_deque_ntasks) >=
+      TASK_DEQUE_SIZE(thread_data->td)) {
+    __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+    KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; returning "
+                  "TASK_NOT_PUSHED for task %p\n",
+                  gtid, taskdata));
+    return TASK_NOT_PUSHED;
+  }
 #else
-    // Must have room since no thread can add tasks but calling thread
-    KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) );
+  // Must have room since no thread can add tasks but calling thread
+  KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
+                   TASK_DEQUE_SIZE(thread_data->td));
 #endif
 
-    thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;  // Push taskdata
-    // Wrap index.
-    thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
-    TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);             // Adjust task count
+  thread_data->td.td_deque[thread_data->td.td_deque_tail] =
+      taskdata; // Push taskdata
+  // Wrap index.
+  thread_data->td.td_deque_tail =
+      (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
+  TCW_4(thread_data->td.td_deque_ntasks,
+        TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
 
-    KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
-          "task=%p ntasks=%d head=%u tail=%u\n",
-          gtid, taskdata, thread_data->td.td_deque_ntasks,
-          thread_data->td.td_deque_head, thread_data->td.td_deque_tail) );
+  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
+                "task=%p ntasks=%d head=%u tail=%u\n",
+                gtid, taskdata, thread_data->td.td_deque_ntasks,
+                thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
 
-    __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
+  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
 
-    return TASK_SUCCESSFULLY_PUSHED;
+  return TASK_SUCCESSFULLY_PUSHED;
 }
 
-
-//-----------------------------------------------------------------------------------------
-// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
+// __kmp_pop_current_task_from_thread: set up current task from called thread
+// when team ends
+//
 // this_thr: thread structure to set current_task in.
-
-void
-__kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
-{
-    KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
-                   "curtask_parent=%p\n",
-                   0, this_thr, this_thr -> th.th_current_task,
-                   this_thr -> th.th_current_task -> td_parent ) );
-
-    this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
-
-    KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
-                   "curtask_parent=%p\n",
-                   0, this_thr, this_thr -> th.th_current_task,
-                   this_thr -> th.th_current_task -> td_parent ) );
+void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
+  KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d "
+                "this_thread=%p, curtask=%p, "
+                "curtask_parent=%p\n",
+                0, this_thr, this_thr->th.th_current_task,
+                this_thr->th.th_current_task->td_parent));
+
+  this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
+
+  KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d "
+                "this_thread=%p, curtask=%p, "
+                "curtask_parent=%p\n",
+                0, this_thr, this_thr->th.th_current_task,
+                this_thr->th.th_current_task->td_parent));
 }
 
-
-//---------------------------------------------------------------------------------------
-// __kmp_push_current_task_to_thread: set up current task in called thread for a new team
+// __kmp_push_current_task_to_thread: set up current task in called thread for a
+// new team
+//
 // this_thr: thread structure to set up
 // team: team for implicit task data
 // tid: thread within team to set up
-
-void
-__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
-{
-    // current task of the thread is a parent of the new just created implicit tasks of new team
-    KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
-                    "parent_task=%p\n",
-                    tid, this_thr, this_thr->th.th_current_task,
-                    team->t.t_implicit_task_taskdata[tid].td_parent ) );
-
-    KMP_DEBUG_ASSERT (this_thr != NULL);
-
-    if( tid == 0 ) {
-        if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
-            team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
-            this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
-        }
-    } else {
-        team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
-        this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
-    }
-
-    KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
-                    "parent_task=%p\n",
-                    tid, this_thr, this_thr->th.th_current_task,
-                    team->t.t_implicit_task_taskdata[tid].td_parent ) );
+void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
+                                       int tid) {
+  // current task of the thread is a parent of the new just created implicit
+  // tasks of new team
+  KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
+                "curtask=%p "
+                "parent_task=%p\n",
+                tid, this_thr, this_thr->th.th_current_task,
+                team->t.t_implicit_task_taskdata[tid].td_parent));
+
+  KMP_DEBUG_ASSERT(this_thr != NULL);
+
+  if (tid == 0) {
+    if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
+      team->t.t_implicit_task_taskdata[0].td_parent =
+          this_thr->th.th_current_task;
+      this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
+    }
+  } else {
+    team->t.t_implicit_task_taskdata[tid].td_parent =
+        team->t.t_implicit_task_taskdata[0].td_parent;
+    this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
+  }
+
+  KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
+                "curtask=%p "
+                "parent_task=%p\n",
+                tid, this_thr, this_thr->th.th_current_task,
+                team->t.t_implicit_task_taskdata[tid].td_parent));
 }
 
-
-//----------------------------------------------------------------------
 // __kmp_task_start: bookkeeping for a task starting execution
+//
 // GTID: global thread id of calling thread
 // task: task starting execution
 // current_task: task suspending
+static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
+                             kmp_taskdata_t *current_task) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_info_t *thread = __kmp_threads[gtid];
+
+  KA_TRACE(10,
+           ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
+            gtid, taskdata, current_task));
+
+  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+
+  // mark currently executing task as suspended
+  // TODO: GEH - make sure root team implicit task is initialized properly.
+  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
+  current_task->td_flags.executing = 0;
 
-static void
-__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
-{
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
-    kmp_info_t * thread = __kmp_threads[ gtid ];
-
-    KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
-                  gtid, taskdata, current_task) );
-
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
-
-    // mark currently executing task as suspended
-    // TODO: GEH - make sure root team implicit task is initialized properly.
-    // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
-    current_task -> td_flags.executing = 0;
-
-    // Add task to stack if tied
+// Add task to stack if tied
 #ifdef BUILD_TIED_TASK_STACK
-    if ( taskdata -> td_flags.tiedness == TASK_TIED )
-    {
-        __kmp_push_task_stack( gtid, thread, taskdata );
-    }
+  if (taskdata->td_flags.tiedness == TASK_TIED) {
+    __kmp_push_task_stack(gtid, thread, taskdata);
+  }
 #endif /* BUILD_TIED_TASK_STACK */
 
-    // mark starting task as executing and as current task
-    thread -> th.th_current_task = taskdata;
+  // mark starting task as executing and as current task
+  thread->th.th_current_task = taskdata;
 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
-    KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED );
-    taskdata -> td_flags.started = 1;
-    taskdata -> td_flags.executing = 1;
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
-
-    // GEH TODO: shouldn't we pass some sort of location identifier here?
-    // APT: yes, we will pass location here.
-    // need to store current thread state (in a thread or taskdata structure)
-    // before setting work_state, otherwise wrong state is set after end of task
+  KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
+                   taskdata->td_flags.tiedness == TASK_UNTIED);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
+                   taskdata->td_flags.tiedness == TASK_UNTIED);
+  taskdata->td_flags.started = 1;
+  taskdata->td_flags.executing = 1;
+  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
+  // GEH TODO: shouldn't we pass some sort of location identifier here?
+  // APT: yes, we will pass location here.
+  // need to store current thread state (in a thread or taskdata structure)
+  // before setting work_state, otherwise wrong state is set after end of task
 
-    KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
-                  gtid, taskdata ) );
+  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
 
 #if OMPT_SUPPORT
-    if (ompt_enabled &&
-        ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
-        kmp_taskdata_t *parent = taskdata->td_parent;
-        ompt_callbacks.ompt_callback(ompt_event_task_begin)(
-            parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
-            parent ? &(parent->ompt_task_info.frame) : NULL,
-            taskdata->ompt_task_info.task_id,
-            taskdata->ompt_task_info.function);
-    }
+  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) {
+    kmp_taskdata_t *parent = taskdata->td_parent;
+    ompt_callbacks.ompt_callback(ompt_event_task_begin)(
+        parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
+        parent ? &(parent->ompt_task_info.frame) : NULL,
+        taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function);
+  }
 #endif
 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
-    /* OMPT emit all dependences if requested by the tool */
-    if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
-        ompt_callbacks.ompt_callback(ompt_event_task_dependences))
-	{
-        ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
-            taskdata->ompt_task_info.task_id,
-            taskdata->ompt_task_info.deps,
-            taskdata->ompt_task_info.ndeps
-        );
-		/* We can now free the allocated memory for the dependencies */
-		KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps);
-        taskdata->ompt_task_info.deps = NULL;
-        taskdata->ompt_task_info.ndeps = 0;
-    }
+  /* OMPT emit all dependences if requested by the tool */
+  if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 &&
+      ompt_callbacks.ompt_callback(ompt_event_task_dependences)) {
+    ompt_callbacks.ompt_callback(ompt_event_task_dependences)(
+        taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps,
+        taskdata->ompt_task_info.ndeps);
+    /* We can now free the allocated memory for the dependencies */
+    KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps);
+    taskdata->ompt_task_info.deps = NULL;
+    taskdata->ompt_task_info.ndeps = 0;
+  }
 #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
 
-    return;
+  return;
 }
 
-
-//----------------------------------------------------------------------
-// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
+// __kmpc_omp_task_begin_if0: report that a given serialized task has started
+// execution
+//
 // loc_ref: source location information; points to beginning of task block.
 // gtid: global thread number.
 // task: task thunk for the started task.
+void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
+                               kmp_task_t *task) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
+
+  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
+                "current_task=%p\n",
+                gtid, loc_ref, taskdata, current_task));
+
+  if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+    // untied task needs to increment counter so that the task structure is not
+    // freed prematurely
+    kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
+    KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
+                  "incremented for task %p\n",
+                  gtid, counter, taskdata));
+  }
+
+  taskdata->td_flags.task_serial =
+      1; // Execute this task immediately, not deferred.
+  __kmp_task_start(gtid, task, current_task);
 
-void
-__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
-{
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
-    kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
-
-    KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
-                  gtid, loc_ref, taskdata, current_task ) );
+  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
+                loc_ref, taskdata));
 
-    if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
-        // untied task needs to increment counter so that the task structure is not freed prematurely
-        kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count);
-        KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n",
-                       gtid, counter, taskdata ) );
-    }
-
-    taskdata -> td_flags.task_serial = 1;  // Execute this task immediately, not deferred.
-    __kmp_task_start( gtid, task, current_task );
-
-    KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
-                  gtid, loc_ref, taskdata ) );
-
-    return;
+  return;
 }
 
 #ifdef TASK_UNUSED
-//----------------------------------------------------------------------
 // __kmpc_omp_task_begin: report that a given task has started execution
 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
+void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
+  kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
 
-void
-__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
-{
-    kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
-
-    KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
-
-    __kmp_task_start( gtid, task, current_task );
-
-    KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
-    return;
+  KA_TRACE(
+      10,
+      ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
+       gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
+
+  __kmp_task_start(gtid, task, current_task);
+
+  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
+                loc_ref, KMP_TASK_TO_TASKDATA(task)));
+  return;
 }
 #endif // TASK_UNUSED
 
-
-//-------------------------------------------------------------------------------------
 // __kmp_free_task: free the current task space and the space for shareds
+//
 // gtid: Global thread ID of calling thread
 // taskdata: task to free
 // thread: thread data structure of caller
+static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
+                            kmp_info_t *thread) {
+  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid,
+                taskdata));
+
+  // Check to make sure all flags and counters have the correct values
+  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+  KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 ||
+                   taskdata->td_flags.task_serial == 1);
+  KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0);
+
+  taskdata->td_flags.freed = 1;
+  ANNOTATE_HAPPENS_BEFORE(taskdata);
+// deallocate the taskdata and shared variable blocks associated with this task
+#if USE_FAST_MEMORY
+  __kmp_fast_free(thread, taskdata);
+#else /* ! USE_FAST_MEMORY */
+  __kmp_thread_free(thread, taskdata);
+#endif
 
-static void
-__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
-{
-    KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
-                  gtid, taskdata) );
-
-    // Check to make sure all flags and counters have the correct values
-    KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
-    KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
-    KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
-    KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
-    KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0  || taskdata->td_flags.task_serial == 1);
-    KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
-
-    taskdata->td_flags.freed = 1;
-    ANNOTATE_HAPPENS_BEFORE(taskdata);
-    // deallocate the taskdata and shared variable blocks associated with this task
-    #if USE_FAST_MEMORY
-        __kmp_fast_free( thread, taskdata );
-    #else /* ! USE_FAST_MEMORY */
-        __kmp_thread_free( thread, taskdata );
-    #endif
-
-    KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
-                  gtid, taskdata) );
+  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
 }
 
-//-------------------------------------------------------------------------------------
-// __kmp_free_task_and_ancestors: free the current task and ancestors without children
+// __kmp_free_task_and_ancestors: free the current task and ancestors without
+// children
 //
 // gtid: Global thread ID of calling thread
 // taskdata: task to free
 // thread: thread data structure of caller
-
-static void
-__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
-{
-#if OMP_45_ENABLED
-    // Proxy tasks must always be allowed to free their parents
-    // because they can be run in background even in serial mode.
-    kmp_int32 team_serial = ( taskdata->td_flags.team_serial ||
-        taskdata->td_flags.tasking_ser ) && !taskdata->td_flags.proxy;
+static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
+                                          kmp_taskdata_t *taskdata,
+                                          kmp_info_t *thread) {
+#if OMP_45_ENABLED
+  // Proxy tasks must always be allowed to free their parents
+  // because they can be run in background even in serial mode.
+  kmp_int32 team_serial =
+      (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
+      !taskdata->td_flags.proxy;
 #else
-    kmp_int32 team_serial = taskdata->td_flags.team_serial ||
-        taskdata->td_flags.tasking_ser;
+  kmp_int32 team_serial =
+      taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser;
 #endif
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
 
-    kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
-    KMP_DEBUG_ASSERT( children >= 0 );
+  kmp_int32 children =
+      KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_allocated_child_tasks)) -
+      1;
+  KMP_DEBUG_ASSERT(children >= 0);
 
-    // Now, go up the ancestor tree to see if any ancestors can now be freed.
-    while ( children == 0 )
-    {
-        kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
-
-        KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
-                      "and freeing itself\n", gtid, taskdata) );
+  // Now, go up the ancestor tree to see if any ancestors can now be freed.
+  while (children == 0) {
+    kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
 
-        // --- Deallocate my ancestor task ---
-        __kmp_free_task( gtid, taskdata, thread );
+    KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
+                  "and freeing itself\n",
+                  gtid, taskdata));
 
-        taskdata = parent_taskdata;
+    // --- Deallocate my ancestor task ---
+    __kmp_free_task(gtid, taskdata, thread);
 
-        // Stop checking ancestors at implicit task
-        // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
-        if ( team_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
-            return;
+    taskdata = parent_taskdata;
 
-        // Predecrement simulated by "- 1" calculation
-        children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
-        KMP_DEBUG_ASSERT( children >= 0 );
-    }
+    // Stop checking ancestors at implicit task instead of walking up ancestor
+    // tree to avoid premature deallocation of ancestors.
+    if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT)
+      return;
 
-    KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
-                  "not freeing it yet\n", gtid, taskdata, children) );
+    // Predecrement simulated by "- 1" calculation
+    children = KMP_TEST_THEN_DEC32(
+                   (kmp_int32 *)(&taskdata->td_allocated_child_tasks)) -
+               1;
+    KMP_DEBUG_ASSERT(children >= 0);
+  }
+
+  KA_TRACE(
+      20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
+           "not freeing it yet\n",
+           gtid, taskdata, children));
 }
 
-//---------------------------------------------------------------------
 // __kmp_task_finish: bookkeeping to do when a task finishes execution
+//
 // gtid: global thread ID for calling thread
 // task: task to be finished
 // resumed_task: task to be resumed.  (may be NULL if task is serialized)
-
-static void
-__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
-{
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
-    kmp_info_t * thread = __kmp_threads[ gtid ];
-    kmp_task_team_t * task_team = thread->th.th_task_team; // might be NULL for serial teams...
-    kmp_int32 children = 0;
+static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
+                              kmp_taskdata_t *resumed_task) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_task_team_t *task_team =
+      thread->th.th_task_team; // might be NULL for serial teams...
+  kmp_int32 children = 0;
 
 #if OMPT_SUPPORT
-    if (ompt_enabled &&
-        ompt_callbacks.ompt_callback(ompt_event_task_end)) {
-        kmp_taskdata_t *parent = taskdata->td_parent;
-        ompt_callbacks.ompt_callback(ompt_event_task_end)(
-            taskdata->ompt_task_info.task_id);
-    }
+  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) {
+    kmp_taskdata_t *parent = taskdata->td_parent;
+    ompt_callbacks.ompt_callback(ompt_event_task_end)(
+        taskdata->ompt_task_info.task_id);
+  }
 #endif
 
-    KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
-                  gtid, taskdata, resumed_task) );
+  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
+                "task %p\n",
+                gtid, taskdata, resumed_task));
 
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
+  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
 
-    // Pop task from stack if tied
+// Pop task from stack if tied
 #ifdef BUILD_TIED_TASK_STACK
-    if ( taskdata -> td_flags.tiedness == TASK_TIED )
-    {
-        __kmp_pop_task_stack( gtid, thread, taskdata );
-    }
+  if (taskdata->td_flags.tiedness == TASK_TIED) {
+    __kmp_pop_task_stack(gtid, thread, taskdata);
+  }
 #endif /* BUILD_TIED_TASK_STACK */
 
-    if ( taskdata->td_flags.tiedness == TASK_UNTIED ) {
-        // untied task needs to check the counter so that the task structure is not freed prematurely
-        kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
-        KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
-                       gtid, counter, taskdata ) );
-        if ( counter > 0 ) {
-            // untied task is not done, to be continued possibly by other thread, do not free it now
-            if (resumed_task == NULL) {
-                KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial );
-                resumed_task = taskdata->td_parent;  // In a serialized task, the resumed task is the parent
-            }
-            thread->th.th_current_task = resumed_task; // restore current_task
-            resumed_task->td_flags.executing = 1;  // resume previous task
-            KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n",
-                          gtid, taskdata, resumed_task) );
-            return;
-        }
-    }
-
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
-    taskdata -> td_flags.complete = 1;   // mark the task as completed
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
-
-    // Only need to keep track of count if team parallel and tasking not serialized
-    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
-        // Predecrement simulated by "- 1" calculation
-        children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
-        KMP_DEBUG_ASSERT( children >= 0 );
+  if (taskdata->td_flags.tiedness == TASK_UNTIED) {
+    // untied task needs to check the counter so that the task structure is not
+    // freed prematurely
+    kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1;
+    KA_TRACE(
+        20,
+        ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
+         gtid, counter, taskdata));
+    if (counter > 0) {
+      // untied task is not done, to be continued possibly by other thread, do
+      // not free it now
+      if (resumed_task == NULL) {
+        KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
+        resumed_task = taskdata->td_parent; // In a serialized task, the resumed
+        // task is the parent
+      }
+      thread->th.th_current_task = resumed_task; // restore current_task
+      resumed_task->td_flags.executing = 1; // resume previous task
+      KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, "
+                    "resuming task %p\n",
+                    gtid, taskdata, resumed_task));
+      return;
+    }
+  }
+
+  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+  taskdata->td_flags.complete = 1; // mark the task as completed
+  KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
+  // Only need to keep track of count if team parallel and tasking not
+  // serialized
+  if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+    // Predecrement simulated by "- 1" calculation
+    children =
+        KMP_TEST_THEN_DEC32(
+            (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) -
+        1;
+    KMP_DEBUG_ASSERT(children >= 0);
 #if OMP_40_ENABLED
-        if ( taskdata->td_taskgroup )
-            KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
+    if (taskdata->td_taskgroup)
+      KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
 #if OMP_45_ENABLED
-    }
-    // if we found proxy tasks there could exist a dependency chain
-    // with the proxy task as origin
-    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) {
-#endif
-        __kmp_release_deps(gtid,taskdata);
-#endif
-    }
-
-    // td_flags.executing  must be marked as 0 after __kmp_release_deps has been called
-    // Othertwise, if a task is executed immediately from the release_deps code
-    // the flag will be reset to 1 again by this same function
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
-    taskdata -> td_flags.executing = 0;  // suspend the finishing task
-
-    KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
-                  gtid, taskdata, children) );
+  }
+  // if we found proxy tasks there could exist a dependency chain
+  // with the proxy task as origin
+  if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) ||
+      (task_team && task_team->tt.tt_found_proxy_tasks)) {
+#endif
+    __kmp_release_deps(gtid, taskdata);
+#endif
+  }
+
+  // td_flags.executing must be marked as 0 after __kmp_release_deps has been
+  // called. Othertwise, if a task is executed immediately from the release_deps
+  // code, the flag will be reset to 1 again by this same function
+  KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
+  taskdata->td_flags.executing = 0; // suspend the finishing task
+
+  KA_TRACE(
+      20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
+           gtid, taskdata, children));
 
 #if OMP_40_ENABLED
-    /* If the tasks' destructor thunk flag has been set, we need to invoke the
-       destructor thunk that has been generated by the compiler.
-       The code is placed here, since at this point other tasks might have been released
-       hence overlapping the destructor invokations with some other work in the
-       released tasks.  The OpenMP spec is not specific on when the destructors are
-       invoked, so we should be free to choose.
-    */
-    if (taskdata->td_flags.destructors_thunk) {
-        kmp_routine_entry_t destr_thunk = task->data1.destructors;
-        KMP_ASSERT(destr_thunk);
-        destr_thunk(gtid, task);
-    }
+  /* If the tasks' destructor thunk flag has been set, we need to invoke the
+     destructor thunk that has been generated by the compiler. The code is
+     placed here, since at this point other tasks might have been released
+     hence overlapping the destructor invokations with some other work in the
+     released tasks.  The OpenMP spec is not specific on when the destructors
+     are invoked, so we should be free to choose. */
+  if (taskdata->td_flags.destructors_thunk) {
+    kmp_routine_entry_t destr_thunk = task->data1.destructors;
+    KMP_ASSERT(destr_thunk);
+    destr_thunk(gtid, task);
+  }
 #endif // OMP_40_ENABLED
 
-    // bookkeeping for resuming task:
-    // GEH - note tasking_ser => task_serial
-    KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
-                       taskdata->td_flags.task_serial);
-    if ( taskdata->td_flags.task_serial )
-    {
-        if (resumed_task == NULL) {
-            resumed_task = taskdata->td_parent;  // In a serialized task, the resumed task is the parent
-        }
-        else
+  // bookkeeping for resuming task:
+  // GEH - note tasking_ser => task_serial
+  KMP_DEBUG_ASSERT(
+      (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
+      taskdata->td_flags.task_serial);
+  if (taskdata->td_flags.task_serial) {
+    if (resumed_task == NULL) {
+      resumed_task = taskdata->td_parent; // In a serialized task, the resumed
+      // task is the parent
+    } else
 #if OMP_45_ENABLED
-             if ( !(task_team && task_team->tt.tt_found_proxy_tasks) )
+        if (!(task_team && task_team->tt.tt_found_proxy_tasks))
 #endif
-        {
-            // verify resumed task passed in points to parent
-            KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
-        }
-    }
-    else {
-        KMP_DEBUG_ASSERT( resumed_task != NULL );        // verify that resumed task is passed as arguemnt
+    {
+      // verify resumed task passed in points to parent
+      KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent);
     }
+  } else {
+    KMP_DEBUG_ASSERT(resumed_task !=
+                     NULL); // verify that resumed task is passed as arguemnt
+  }
+
+  // Free this task and then ancestor tasks if they have no children.
+  // Restore th_current_task first as suggested by John:
+  // johnmc: if an asynchronous inquiry peers into the runtime system
+  // it doesn't see the freed task as the current task.
+  thread->th.th_current_task = resumed_task;
+  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
+
+  // TODO: GEH - make sure root team implicit task is initialized properly.
+  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
+  resumed_task->td_flags.executing = 1; // resume previous task
+
+  KA_TRACE(
+      10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
+           gtid, taskdata, resumed_task));
 
-    // Free this task and then ancestor tasks if they have no children.
-    // Restore th_current_task first as suggested by John:
-    // johnmc: if an asynchronous inquiry peers into the runtime system
-    // it doesn't see the freed task as the current task.
-    thread->th.th_current_task = resumed_task;
-    __kmp_free_task_and_ancestors(gtid, taskdata, thread);
-
-    // TODO: GEH - make sure root team implicit task is initialized properly.
-    // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
-    resumed_task->td_flags.executing = 1;  // resume previous task
-
-    KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
-                  gtid, taskdata, resumed_task) );
-
-    return;
+  return;
 }
 
-//---------------------------------------------------------------------
 // __kmpc_omp_task_complete_if0: report that a task has completed execution
+//
 // loc_ref: source location information; points to end of task block.
 // gtid: global thread number.
 // task: task thunk for the completed task.
-
-void
-__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
-{
-    KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
-    __kmp_task_finish( gtid, task, NULL );  // this routine will provide task to resume
-
-    KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
-    return;
+void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_task_t *task) {
+  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
+                gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
+  // this routine will provide task to resume
+  __kmp_task_finish(gtid, task, NULL);
+
+  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
+                gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
+  return;
 }
 
 #ifdef TASK_UNUSED
-//---------------------------------------------------------------------
 // __kmpc_omp_task_complete: report that a task has completed execution
 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
-
-void
-__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
-{
-    KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-
-    __kmp_task_finish( gtid, task, NULL );  // Not sure how to find task to resume
-
-    KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
-    return;
+void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,
+                              kmp_task_t *task) {
+  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
+                loc_ref, KMP_TASK_TO_TASKDATA(task)));
+
+  __kmp_task_finish(gtid, task, NULL); // Not sure how to find task to resume
+
+  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
+                loc_ref, KMP_TASK_TO_TASKDATA(task)));
+  return;
 }
 #endif // TASK_UNUSED
 
-
 #if OMPT_SUPPORT
-//----------------------------------------------------------------------------------------------------
-// __kmp_task_init_ompt:
-//   Initialize OMPT fields maintained by a task. This will only be called after
-//   ompt_tool, so we already know whether ompt is enabled or not.
-
-static inline void
-__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function )
-{
-    if (ompt_enabled) {
-        task->ompt_task_info.task_id = __ompt_task_id_new(tid);
-        task->ompt_task_info.function = function;
-        task->ompt_task_info.frame.exit_runtime_frame = NULL;
-        task->ompt_task_info.frame.reenter_runtime_frame = NULL;
+// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will
+//  only be called after ompt_tool, so we already know whether ompt is enabled
+// or not.
+static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid,
+                                        void *function) {
+  if (ompt_enabled) {
+    task->ompt_task_info.task_id = __ompt_task_id_new(tid);
+    task->ompt_task_info.function = function;
+    task->ompt_task_info.frame.exit_runtime_frame = NULL;
+    task->ompt_task_info.frame.reenter_runtime_frame = NULL;
 #if OMP_40_ENABLED
-        task->ompt_task_info.ndeps = 0;
-        task->ompt_task_info.deps = NULL;
+    task->ompt_task_info.ndeps = 0;
+    task->ompt_task_info.deps = NULL;
 #endif /* OMP_40_ENABLED */
-    }
+  }
 }
 #endif
 
-
-//----------------------------------------------------------------------------------------------------
-// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
+// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit
+// task for a given thread
 //
 // loc_ref:  reference to source location of parallel region
 // this_thr:  thread data structure corresponding to implicit task
 // team: team for this_thr
 // tid: thread id of given thread within team
 // set_curr_task: TRUE if need to push current task to thread
-// NOTE: Routine does not set up the implicit task ICVS.  This is assumed to have already been done elsewhere.
+// NOTE: Routine does not set up the implicit task ICVS.  This is assumed to
+// have already been done elsewhere.
 // TODO: Get better loc_ref.  Value passed in may be NULL
-
-void
-__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
-{
-    kmp_taskdata_t * task   = & team->t.t_implicit_task_taskdata[ tid ];
-
-    KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
-                  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
-
-    task->td_task_id  = KMP_GEN_TASK_ID();
-    task->td_team     = team;
-//    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info in debugger)
-    task->td_ident    = loc_ref;
-    task->td_taskwait_ident   = NULL;
-    task->td_taskwait_counter = 0;
-    task->td_taskwait_thread  = 0;
-
-    task->td_flags.tiedness    = TASK_TIED;
-    task->td_flags.tasktype    = TASK_IMPLICIT;
-#if OMP_45_ENABLED
-    task->td_flags.proxy       = TASK_FULL;
-#endif
-
-    // All implicit tasks are executed immediately, not deferred
-    task->td_flags.task_serial = 1;
-    task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
-    task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
-
-    task->td_flags.started     = 1;
-    task->td_flags.executing   = 1;
-    task->td_flags.complete    = 0;
-    task->td_flags.freed       = 0;
+void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,
+                              kmp_team_t *team, int tid, int set_curr_task) {
+  kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
+
+  KF_TRACE(
+      10,
+      ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
+       tid, team, task, set_curr_task ? "TRUE" : "FALSE"));
+
+  task->td_task_id = KMP_GEN_TASK_ID();
+  task->td_team = team;
+  //    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info
+  //    in debugger)
+  task->td_ident = loc_ref;
+  task->td_taskwait_ident = NULL;
+  task->td_taskwait_counter = 0;
+  task->td_taskwait_thread = 0;
+
+  task->td_flags.tiedness = TASK_TIED;
+  task->td_flags.tasktype = TASK_IMPLICIT;
+#if OMP_45_ENABLED
+  task->td_flags.proxy = TASK_FULL;
+#endif
+
+  // All implicit tasks are executed immediately, not deferred
+  task->td_flags.task_serial = 1;
+  task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
+  task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
+
+  task->td_flags.started = 1;
+  task->td_flags.executing = 1;
+  task->td_flags.complete = 0;
+  task->td_flags.freed = 0;
 
 #if OMP_40_ENABLED
-    task->td_depnode = NULL;
+  task->td_depnode = NULL;
 #endif
 
-    if (set_curr_task) {  // only do this initialization the first time a thread is created
-        task->td_incomplete_child_tasks = 0;
-        task->td_allocated_child_tasks  = 0; // Not used because do not need to deallocate implicit task
+  if (set_curr_task) { // only do this init first time thread is created
+    task->td_incomplete_child_tasks = 0;
+    task->td_allocated_child_tasks = 0; // Not used: don't need to
+// deallocate implicit task
 #if OMP_40_ENABLED
-        task->td_taskgroup = NULL;           // An implicit task does not have taskgroup
-        task->td_dephash = NULL;
+    task->td_taskgroup = NULL; // An implicit task does not have taskgroup
+    task->td_dephash = NULL;
 #endif
-        __kmp_push_current_task_to_thread( this_thr, team, tid );
-    } else {
-        KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
-        KMP_DEBUG_ASSERT(task->td_allocated_child_tasks  == 0);
-    }
+    __kmp_push_current_task_to_thread(this_thr, team, tid);
+  } else {
+    KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
+    KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
+  }
 
 #if OMPT_SUPPORT
-    __kmp_task_init_ompt(task, tid, NULL);
+  __kmp_task_init_ompt(task, tid, NULL);
 #endif
 
-    KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
-                  tid, team, task ) );
+  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
+                team, task));
 }
 
-
-//-----------------------------------------------------------------------------
-//// __kmp_finish_implicit_task: Release resources associated to implicit tasks
-//// at the end of parallel regions. Some resources are kept for reuse in the
-//// next parallel region.
-////
-//// thread:  thread data structure corresponding to implicit task
+// __kmp_finish_implicit_task: Release resources associated to implicit tasks
+// at the end of parallel regions. Some resources are kept for reuse in the next
+// parallel region.
 //
-void
-__kmp_finish_implicit_task(kmp_info_t *thread)
-{
-    kmp_taskdata_t *task = thread->th.th_current_task;
-    if (task->td_dephash)
-        __kmp_dephash_free_entries(thread, task->td_dephash);
+// thread:  thread data structure corresponding to implicit task
+void __kmp_finish_implicit_task(kmp_info_t *thread) {
+  kmp_taskdata_t *task = thread->th.th_current_task;
+  if (task->td_dephash)
+    __kmp_dephash_free_entries(thread, task->td_dephash);
 }
 
-
-//-----------------------------------------------------------------------------
-//// __kmp_free_implicit_task: Release resources associated to implicit tasks
-//// when these are destroyed regions
-////
-//// thread:  thread data structure corresponding to implicit task
+// __kmp_free_implicit_task: Release resources associated to implicit tasks
+// when these are destroyed regions
 //
-void
-__kmp_free_implicit_task(kmp_info_t *thread)
-{
-    kmp_taskdata_t *task = thread->th.th_current_task;
-    if (task->td_dephash)
-        __kmp_dephash_free(thread, task->td_dephash);
-    task->td_dephash = NULL;
+// thread:  thread data structure corresponding to implicit task
+void __kmp_free_implicit_task(kmp_info_t *thread) {
+  kmp_taskdata_t *task = thread->th.th_current_task;
+  if (task->td_dephash)
+    __kmp_dephash_free(thread, task->td_dephash);
+  task->td_dephash = NULL;
 }
 
-
-// Round up a size to a power of two specified by val
-// Used to insert padding between structures co-allocated using a single malloc() call
-static size_t
-__kmp_round_up_to_val( size_t size, size_t val ) {
-    if ( size & ( val - 1 ) ) {
-        size &= ~ ( val - 1 );
-        if ( size <= KMP_SIZE_T_MAX - val ) {
-            size += val;    // Round up if there is no overflow.
-        }; // if
+// Round up a size to a power of two specified by val: Used to insert padding
+// between structures co-allocated using a single malloc() call
+static size_t __kmp_round_up_to_val(size_t size, size_t val) {
+  if (size & (val - 1)) {
+    size &= ~(val - 1);
+    if (size <= KMP_SIZE_T_MAX - val) {
+      size += val; // Round up if there is no overflow.
     }; // if
-    return size;
+  }; // if
+  return size;
 } // __kmp_round_up_to_va
 
-
-//---------------------------------------------------------------------------------
 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
 //
 // loc_ref: source location information
 // gtid: global thread number.
-// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
-//        Converted from kmp_int32 to kmp_tasking_flags_t in routine.
-// sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including private vars accessed in task.
-// sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed in task.
+// flags: include tiedness & task type (explicit vs. implicit) of the ''new''
+// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine.
+// sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including
+// private vars accessed in task.
+// sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed
+// in task.
 // task_entry: Pointer to task code entry point generated by compiler.
 // returns: a pointer to the allocated kmp_task_t structure (task).
-
-kmp_task_t *
-__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
-                  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
-                  kmp_routine_entry_t task_entry )
-{
-    kmp_task_t *task;
-    kmp_taskdata_t *taskdata;
-    kmp_info_t *thread = __kmp_threads[ gtid ];
-    kmp_team_t *team = thread->th.th_team;
-    kmp_taskdata_t *parent_task = thread->th.th_current_task;
-    size_t shareds_offset;
-
-    KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
-                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
-                  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
-                  sizeof_shareds, task_entry) );
-
-    if ( parent_task->td_flags.final ) {
-        if (flags->merged_if0) {
-        }
-        flags->final = 1;
-    }
-
-#if OMP_45_ENABLED
-    if ( flags->proxy == TASK_PROXY ) {
-        flags->tiedness = TASK_UNTIED;
-        flags->merged_if0 = 1;
-
-        /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */
-        if ( (thread->th.th_task_team) == NULL ) {
-            /* This should only happen if the team is serialized
-                setup a task team and propagate it to the thread
-            */
-            KMP_DEBUG_ASSERT(team->t.t_serialized);
-            KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid));
-            __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads
-            thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
-        }
-        kmp_task_team_t * task_team = thread->th.th_task_team;
-
-        /* tasking must be enabled now as the task might not be pushed */
-        if ( !KMP_TASKING_ENABLED( task_team ) ) {
-            KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
-            __kmp_enable_tasking( task_team, thread );
-            kmp_int32 tid = thread->th.th_info.ds.ds_tid;
-            kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
-            // No lock needed since only owner can allocate
-            if (thread_data -> td.td_deque == NULL ) {
-                __kmp_alloc_task_deque( thread, thread_data );
-            }
-        }
-
-        if ( task_team->tt.tt_found_proxy_tasks == FALSE )
-          TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE);
+kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+                             kmp_tasking_flags_t *flags,
+                             size_t sizeof_kmp_task_t, size_t sizeof_shareds,
+                             kmp_routine_entry_t task_entry) {
+  kmp_task_t *task;
+  kmp_taskdata_t *taskdata;
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_team_t *team = thread->th.th_team;
+  kmp_taskdata_t *parent_task = thread->th.th_current_task;
+  size_t shareds_offset;
+
+  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
+                "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+                gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
+                sizeof_shareds, task_entry));
+
+  if (parent_task->td_flags.final) {
+    if (flags->merged_if0) {
+    }
+    flags->final = 1;
+  }
+
+#if OMP_45_ENABLED
+  if (flags->proxy == TASK_PROXY) {
+    flags->tiedness = TASK_UNTIED;
+    flags->merged_if0 = 1;
+
+    /* are we running in a sequential parallel or tskm_immediate_exec... we need
+       tasking support enabled */
+    if ((thread->th.th_task_team) == NULL) {
+      /* This should only happen if the team is serialized
+          setup a task team and propagate it to the thread */
+      KMP_DEBUG_ASSERT(team->t.t_serialized);
+      KA_TRACE(30,
+               ("T#%d creating task team in __kmp_task_alloc for proxy task\n",
+                gtid));
+      __kmp_task_team_setup(
+          thread, team,
+          1); // 1 indicates setup the current team regardless of nthreads
+      thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
+    }
+    kmp_task_team_t *task_team = thread->th.th_task_team;
+
+    /* tasking must be enabled now as the task might not be pushed */
+    if (!KMP_TASKING_ENABLED(task_team)) {
+      KA_TRACE(
+          30,
+          ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
+      __kmp_enable_tasking(task_team, thread);
+      kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+      kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
+      // No lock needed since only owner can allocate
+      if (thread_data->td.td_deque == NULL) {
+        __kmp_alloc_task_deque(thread, thread_data);
+      }
     }
-#endif
-
-    // Calculate shared structure offset including padding after kmp_task_t struct
-    // to align pointers in shared struct
-    shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
-    shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
 
-    // Allocate a kmp_taskdata_t block and a kmp_task_t block.
-    KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
-                  gtid, shareds_offset) );
-    KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
-                  gtid, sizeof_shareds) );
+    if (task_team->tt.tt_found_proxy_tasks == FALSE)
+      TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
+  }
+#endif
+
+  // Calculate shared structure offset including padding after kmp_task_t struct
+  // to align pointers in shared struct
+  shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
+  shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *));
+
+  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
+  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
+                shareds_offset));
+  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
+                sizeof_shareds));
+
+// Avoid double allocation here by combining shareds with taskdata
+#if USE_FAST_MEMORY
+  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
+                                                               sizeof_shareds);
+#else /* ! USE_FAST_MEMORY */
+  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
+                                                               sizeof_shareds);
+#endif /* USE_FAST_MEMORY */
+  ANNOTATE_HAPPENS_AFTER(taskdata);
 
-    // Avoid double allocation here by combining shareds with taskdata
-    #if USE_FAST_MEMORY
-    taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
-    #else /* ! USE_FAST_MEMORY */
-    taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
-    #endif /* USE_FAST_MEMORY */
-    ANNOTATE_HAPPENS_AFTER(taskdata);
+  task = KMP_TASKDATA_TO_TASK(taskdata);
 
-    task                      = KMP_TASKDATA_TO_TASK(taskdata);
-
-    // Make sure task & taskdata are aligned appropriately
+// Make sure task & taskdata are aligned appropriately
 #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
+  KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0);
+  KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0);
 #else
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
-    KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
+  KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0);
+  KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0);
 #endif
-    if (sizeof_shareds > 0) {
-        // Avoid double allocation here by combining shareds with taskdata
-        task->shareds         = & ((char *) taskdata)[ shareds_offset ];
-        // Make sure shareds struct is aligned to pointer size
-        KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
-    } else {
-        task->shareds         = NULL;
-    }
-    task->routine             = task_entry;
-    task->part_id             = 0;      // AC: Always start with 0 part id
-
-    taskdata->td_task_id      = KMP_GEN_TASK_ID();
-    taskdata->td_team         = team;
-    taskdata->td_alloc_thread = thread;
-    taskdata->td_parent       = parent_task;
-    taskdata->td_level        = parent_task->td_level + 1; // increment nesting level
-    taskdata->td_untied_count = 0;
-    taskdata->td_ident        = loc_ref;
-    taskdata->td_taskwait_ident   = NULL;
-    taskdata->td_taskwait_counter = 0;
-    taskdata->td_taskwait_thread  = 0;
-    KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
-#if OMP_45_ENABLED
-    // avoid copying icvs for proxy tasks
-    if ( flags->proxy == TASK_FULL )
-#endif
-       copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
-
-    taskdata->td_flags.tiedness    = flags->tiedness;
-    taskdata->td_flags.final       = flags->final;
-    taskdata->td_flags.merged_if0  = flags->merged_if0;
+  if (sizeof_shareds > 0) {
+    // Avoid double allocation here by combining shareds with taskdata
+    task->shareds = &((char *)taskdata)[shareds_offset];
+    // Make sure shareds struct is aligned to pointer size
+    KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
+                     0);
+  } else {
+    task->shareds = NULL;
+  }
+  task->routine = task_entry;
+  task->part_id = 0; // AC: Always start with 0 part id
+
+  taskdata->td_task_id = KMP_GEN_TASK_ID();
+  taskdata->td_team = team;
+  taskdata->td_alloc_thread = thread;
+  taskdata->td_parent = parent_task;
+  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
+  taskdata->td_untied_count = 0;
+  taskdata->td_ident = loc_ref;
+  taskdata->td_taskwait_ident = NULL;
+  taskdata->td_taskwait_counter = 0;
+  taskdata->td_taskwait_thread = 0;
+  KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
+#if OMP_45_ENABLED
+  // avoid copying icvs for proxy tasks
+  if (flags->proxy == TASK_FULL)
+#endif
+    copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
+
+  taskdata->td_flags.tiedness = flags->tiedness;
+  taskdata->td_flags.final = flags->final;
+  taskdata->td_flags.merged_if0 = flags->merged_if0;
 #if OMP_40_ENABLED
-    taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
+  taskdata->td_flags.destructors_thunk = flags->destructors_thunk;
 #endif // OMP_40_ENABLED
 #if OMP_45_ENABLED
-    taskdata->td_flags.proxy           = flags->proxy;
-    taskdata->td_task_team         = thread->th.th_task_team;
-    taskdata->td_size_alloc        = shareds_offset + sizeof_shareds;
+  taskdata->td_flags.proxy = flags->proxy;
+  taskdata->td_task_team = thread->th.th_task_team;
+  taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
 #endif
-    taskdata->td_flags.tasktype    = TASK_EXPLICIT;
+  taskdata->td_flags.tasktype = TASK_EXPLICIT;
+
+  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
+  taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
 
-    // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
-    taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
+  // GEH - TODO: fix this to copy parent task's value of team_serial flag
+  taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
 
-    // GEH - TODO: fix this to copy parent task's value of team_serial flag
-    taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
+  // GEH - Note we serialize the task if the team is serialized to make sure
+  // implicit parallel region tasks are not left until program termination to
+  // execute. Also, it helps locality to execute immediately.
 
-    // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
-    //       tasks are not left until program termination to execute.  Also, it helps locality to execute
-    //       immediately.
-    taskdata->td_flags.task_serial = ( parent_task->td_flags.final
-      || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
+  taskdata->td_flags.task_serial =
+      (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
+       taskdata->td_flags.tasking_ser);
 
-    taskdata->td_flags.started     = 0;
-    taskdata->td_flags.executing   = 0;
-    taskdata->td_flags.complete    = 0;
-    taskdata->td_flags.freed       = 0;
+  taskdata->td_flags.started = 0;
+  taskdata->td_flags.executing = 0;
+  taskdata->td_flags.complete = 0;
+  taskdata->td_flags.freed = 0;
 
-    taskdata->td_flags.native      = flags->native;
+  taskdata->td_flags.native = flags->native;
 
-    taskdata->td_incomplete_child_tasks = 0;
-    taskdata->td_allocated_child_tasks  = 1; // start at one because counts current task and children
+  taskdata->td_incomplete_child_tasks = 0;
+  taskdata->td_allocated_child_tasks = 1; // start at one because counts current
+// task and children
 #if OMP_40_ENABLED
-    taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
-    taskdata->td_dephash = NULL;
-    taskdata->td_depnode = NULL;
+  taskdata->td_taskgroup =
+      parent_task->td_taskgroup; // task inherits taskgroup from the parent task
+  taskdata->td_dephash = NULL;
+  taskdata->td_depnode = NULL;
 #endif
 
-    // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task
+// Only need to keep track of child task counts if team parallel and tasking not
+// serialized or if it is a proxy task
 #if OMP_45_ENABLED
-    if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
+  if (flags->proxy == TASK_PROXY ||
+      !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
 #else
-    if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) )
+  if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser))
 #endif
-    {
-        KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
+  {
+    KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks));
 #if OMP_40_ENABLED
-        if ( parent_task->td_taskgroup )
-            KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
+    if (parent_task->td_taskgroup)
+      KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
 #endif
-        // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
-        if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
-            KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
-        }
-    }
-
-    KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
-                  gtid, taskdata, taskdata->td_parent) );
-    ANNOTATE_HAPPENS_BEFORE(task);
+    // Only need to keep track of allocated child tasks for explicit tasks since
+    // implicit not deallocated
+    if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
+      KMP_TEST_THEN_INC32(
+          (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks));
+    }
+  }
+
+  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
+                gtid, taskdata, taskdata->td_parent));
+  ANNOTATE_HAPPENS_BEFORE(task);
 
 #if OMPT_SUPPORT
-    __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry);
+  __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry);
 #endif
 
-    return task;
+  return task;
 }
 
+kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
+                                  kmp_int32 flags, size_t sizeof_kmp_task_t,
+                                  size_t sizeof_shareds,
+                                  kmp_routine_entry_t task_entry) {
+  kmp_task_t *retval;
+  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
 
-kmp_task_t *
-__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
-                       size_t sizeof_kmp_task_t, size_t sizeof_shareds,
-                       kmp_routine_entry_t task_entry )
-{
-    kmp_task_t *retval;
-    kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
-
-    input_flags->native = FALSE;
-    // __kmp_task_alloc() sets up all other runtime flags
+  input_flags->native = FALSE;
+// __kmp_task_alloc() sets up all other runtime flags
 
 #if OMP_45_ENABLED
-    KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
-                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
-                  gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
-                  input_flags->proxy ? "proxy" : "",
-                  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
+  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) "
+                "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+                gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
+                input_flags->proxy ? "proxy" : "", sizeof_kmp_task_t,
+                sizeof_shareds, task_entry));
 #else
-    KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
-                  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
-                  gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
-                  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
+  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
+                "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
+                gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
+                sizeof_kmp_task_t, sizeof_shareds, task_entry));
 #endif
 
-    retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
-                               sizeof_shareds, task_entry );
+  retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
+                            sizeof_shareds, task_entry);
 
-    KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
+  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
 
-    return retval;
+  return retval;
 }
 
-//-----------------------------------------------------------
 //  __kmp_invoke_task: invoke the specified task
 //
 // gtid: global thread ID of caller
 // task: the task to invoke
 // current_task: the task to resume after task invokation
-
-static void
-__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
-{
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
-    kmp_uint64 cur_time;
+static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
+                              kmp_taskdata_t *current_task) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_uint64 cur_time;
 #if OMP_40_ENABLED
-    int discard = 0 /* false */;
+  int discard = 0 /* false */;
 #endif
-    KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
-                  gtid, taskdata, current_task) );
-    KMP_DEBUG_ASSERT(task);
-#if OMP_45_ENABLED
-    if ( taskdata->td_flags.proxy == TASK_PROXY &&
-         taskdata->td_flags.complete == 1)
-         {
-            // This is a proxy task that was already completed but it needs to run
-            // its bottom-half finish
-            KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
-                  gtid, taskdata) );
+  KA_TRACE(
+      30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
+           gtid, taskdata, current_task));
+  KMP_DEBUG_ASSERT(task);
+#if OMP_45_ENABLED
+  if (taskdata->td_flags.proxy == TASK_PROXY &&
+      taskdata->td_flags.complete == 1) {
+    // This is a proxy task that was already completed but it needs to run
+    // its bottom-half finish
+    KA_TRACE(
+        30,
+        ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
+         gtid, taskdata));
+
+    __kmp_bottom_half_finish_proxy(gtid, task);
+
+    KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for "
+                  "proxy task %p, resuming task %p\n",
+                  gtid, taskdata, current_task));
 
-            __kmp_bottom_half_finish_proxy(gtid,task);
-
-            KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) );
-
-            return;
-         }
+    return;
+  }
 #endif
 
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
-    if(__kmp_forkjoin_frames_mode == 3) {
-        // Get the current time stamp to measure task execution time to correct barrier imbalance time
-        cur_time = __itt_get_timestamp();
-    }
+  if (__kmp_forkjoin_frames_mode == 3) {
+    // Get the current time stamp to measure task execution time to correct
+    // barrier imbalance time
+    cur_time = __itt_get_timestamp();
+  }
 #endif
 
 #if OMP_45_ENABLED
-    // Proxy tasks are not handled by the runtime
-    if ( taskdata->td_flags.proxy != TASK_PROXY ) {
+  // Proxy tasks are not handled by the runtime
+  if (taskdata->td_flags.proxy != TASK_PROXY) {
 #endif
-      ANNOTATE_HAPPENS_AFTER(task);
-      __kmp_task_start( gtid, task, current_task );
+    ANNOTATE_HAPPENS_AFTER(task);
+    __kmp_task_start(gtid, task, current_task);
 #if OMP_45_ENABLED
-    }
+  }
 #endif
 
 #if OMPT_SUPPORT
-    ompt_thread_info_t oldInfo;
-    kmp_info_t * thread;
-    if (ompt_enabled) {
-        // Store the threads states and restore them after the task
-        thread = __kmp_threads[ gtid ];
-        oldInfo = thread->th.ompt_thread_info;
-        thread->th.ompt_thread_info.wait_id = 0;
-        thread->th.ompt_thread_info.state = ompt_state_work_parallel;
-        taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0);
-    }
+  ompt_thread_info_t oldInfo;
+  kmp_info_t *thread;
+  if (ompt_enabled) {
+    // Store the threads states and restore them after the task
+    thread = __kmp_threads[gtid];
+    oldInfo = thread->th.ompt_thread_info;
+    thread->th.ompt_thread_info.wait_id = 0;
+    thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+    taskdata->ompt_task_info.frame.exit_runtime_frame =
+        __builtin_frame_address(0);
+  }
 #endif
 
 #if OMP_40_ENABLED
-    // TODO: cancel tasks if the parallel region has also been cancelled
-    // TODO: check if this sequence can be hoisted above __kmp_task_start
-    // if cancellation has been enabled for this run ...
-    if (__kmp_omp_cancellation) {
-        kmp_info_t *this_thr = __kmp_threads [ gtid ];
-        kmp_team_t * this_team = this_thr->th.th_team;
-        kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
-        if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) {
-            KMP_COUNT_BLOCK(TASK_cancelled);
-            // this task belongs to a task group and we need to cancel it
-            discard = 1 /* true */;
-        }
-    }
-
-    //
-    // Invoke the task routine and pass in relevant data.
-    // Thunks generated by gcc take a different argument list.
-    //
-    if (!discard) {
+  // TODO: cancel tasks if the parallel region has also been cancelled
+  // TODO: check if this sequence can be hoisted above __kmp_task_start
+  // if cancellation has been enabled for this run ...
+  if (__kmp_omp_cancellation) {
+    kmp_info_t *this_thr = __kmp_threads[gtid];
+    kmp_team_t *this_team = this_thr->th.th_team;
+    kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
+    if ((taskgroup && taskgroup->cancel_request) ||
+        (this_team->t.t_cancel_request == cancel_parallel)) {
+      KMP_COUNT_BLOCK(TASK_cancelled);
+      // this task belongs to a task group and we need to cancel it
+      discard = 1 /* true */;
+    }
+  }
+
+  // Invoke the task routine and pass in relevant data.
+  // Thunks generated by gcc take a different argument list.
+  if (!discard) {
 #if KMP_STATS_ENABLED
-        KMP_COUNT_BLOCK(TASK_executed);
-        switch(KMP_GET_THREAD_STATE()) {
-         case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break;
-         case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break;
-         case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break;
-         case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break;
-         case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break;
-         default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break;
-        }
+    KMP_COUNT_BLOCK(TASK_executed);
+    switch (KMP_GET_THREAD_STATE()) {
+    case FORK_JOIN_BARRIER:
+      KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
+      break;
+    case PLAIN_BARRIER:
+      KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
+      break;
+    case TASKYIELD:
+      KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
+      break;
+    case TASKWAIT:
+      KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
+      break;
+    case TASKGROUP:
+      KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
+      break;
+    default:
+      KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
+      break;
+    }
 #endif // KMP_STATS_ENABLED
 #endif // OMP_40_ENABLED
 
 #if OMPT_SUPPORT && OMPT_TRACE
-        /* let OMPT know that we're about to run this task */
-        if (ompt_enabled &&
-             ompt_callbacks.ompt_callback(ompt_event_task_switch))
-        {
-          ompt_callbacks.ompt_callback(ompt_event_task_switch)(
-            current_task->ompt_task_info.task_id,
-            taskdata->ompt_task_info.task_id);
-        }
+    /* let OMPT know that we're about to run this task */
+    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
+      ompt_callbacks.ompt_callback(ompt_event_task_switch)(
+          current_task->ompt_task_info.task_id,
+          taskdata->ompt_task_info.task_id);
+    }
 #endif
 
 #ifdef KMP_GOMP_COMPAT
-        if (taskdata->td_flags.native) {
-            ((void (*)(void *))(*(task->routine)))(task->shareds);
-        }
-        else
+    if (taskdata->td_flags.native) {
+      ((void (*)(void *))(*(task->routine)))(task->shareds);
+    } else
 #endif /* KMP_GOMP_COMPAT */
-        {
-            (*(task->routine))(gtid, task);
-        }
-        KMP_POP_PARTITIONED_TIMER();
+    {
+      (*(task->routine))(gtid, task);
+    }
+    KMP_POP_PARTITIONED_TIMER();
 
 #if OMPT_SUPPORT && OMPT_TRACE
-        /* let OMPT know that we're returning to the callee task */
-        if (ompt_enabled &&
-             ompt_callbacks.ompt_callback(ompt_event_task_switch))
-        {
-          ompt_callbacks.ompt_callback(ompt_event_task_switch)(
-            taskdata->ompt_task_info.task_id,
-            current_task->ompt_task_info.task_id);
-        }
+    /* let OMPT know that we're returning to the callee task */
+    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) {
+      ompt_callbacks.ompt_callback(ompt_event_task_switch)(
+          taskdata->ompt_task_info.task_id,
+          current_task->ompt_task_info.task_id);
+    }
 #endif
 
 #if OMP_40_ENABLED
-    }
+  }
 #endif // OMP_40_ENABLED
 
-
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
-        thread->th.ompt_thread_info = oldInfo;
-        taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
-    }
+  if (ompt_enabled) {
+    thread->th.ompt_thread_info = oldInfo;
+    taskdata->ompt_task_info.frame.exit_runtime_frame = NULL;
+  }
 #endif
 
 #if OMP_45_ENABLED
-    // Proxy tasks are not handled by the runtime
-    if ( taskdata->td_flags.proxy != TASK_PROXY ) {
+  // Proxy tasks are not handled by the runtime
+  if (taskdata->td_flags.proxy != TASK_PROXY) {
 #endif
-      ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
-      __kmp_task_finish( gtid, task, current_task );
+    ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
+    __kmp_task_finish(gtid, task, current_task);
 #if OMP_45_ENABLED
-    }
+  }
 #endif
 
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
-    // Barrier imbalance - correct arrive time after the task finished
-    if(__kmp_forkjoin_frames_mode == 3) {
-        kmp_info_t *this_thr = __kmp_threads [ gtid ];
-        if(this_thr->th.th_bar_arrive_time) {
-            this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
-        }
-    }
-#endif
-    KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
-                  gtid, taskdata, current_task) );
-    return;
+  // Barrier imbalance - correct arrive time after the task finished
+  if (__kmp_forkjoin_frames_mode == 3) {
+    kmp_info_t *this_thr = __kmp_threads[gtid];
+    if (this_thr->th.th_bar_arrive_time) {
+      this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
+    }
+  }
+#endif
+  KA_TRACE(
+      30,
+      ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
+       gtid, taskdata, current_task));
+  return;
 }
 
-//-----------------------------------------------------------------------
 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
 //
 // loc_ref: location of original task pragma (ignored)
 // gtid: Global Thread ID of encountering thread
 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
 // Returns:
-//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
-//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
-
-kmp_int32
-__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
-{
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
-
-    KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, new_taskdata ) );
-
-    /* Should we execute the new task or queue it?   For now, let's just always try to
-       queue it.  If the queue fills up, then we'll execute it.  */
-
-    if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
-    {                                                           // Execute this task immediately
-        kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
-        new_taskdata->td_flags.task_serial = 1;
-        __kmp_invoke_task( gtid, new_task, current_task );
-    }
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+//    be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+//    resumed later.
+kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
+                                kmp_task_t *new_task) {
+  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+
+  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
+                loc_ref, new_taskdata));
+
+  /* Should we execute the new task or queue it? For now, let's just always try
+     to queue it.  If the queue fills up, then we'll execute it.  */
+
+  if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
+  { // Execute this task immediately
+    kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
+    new_taskdata->td_flags.task_serial = 1;
+    __kmp_invoke_task(gtid, new_task, current_task);
+  }
+
+  KA_TRACE(
+      10,
+      ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
+       "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
+       gtid, loc_ref, new_taskdata));
 
-    KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
-                  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
-                  new_taskdata ) );
-
-    ANNOTATE_HAPPENS_BEFORE(new_task);
-    return TASK_CURRENT_NOT_QUEUED;
+  ANNOTATE_HAPPENS_BEFORE(new_task);
+  return TASK_CURRENT_NOT_QUEUED;
 }
 
-//---------------------------------------------------------------------
 // __kmp_omp_task: Schedule a non-thread-switchable task for execution
+//
 // gtid: Global Thread ID of encountering thread
-// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
-// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized
-// returns:
-//
-//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
-//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
-kmp_int32
-__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate )
-{
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
+// serialize_immediate: if TRUE then if the task is executed immediately its
+// execution will be serialized
+// Returns:
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+//    be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+//    resumed later.
+kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
+                         bool serialize_immediate) {
+  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
-        new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
-            __builtin_frame_address(1);
-    }
+  if (ompt_enabled) {
+    new_taskdata->ompt_task_info.frame.reenter_runtime_frame =
+        __builtin_frame_address(1);
+  }
 #endif
 
-    /* Should we execute the new task or queue it?   For now, let's just always try to
-       queue it.  If the queue fills up, then we'll execute it.  */
+/* Should we execute the new task or queue it? For now, let's just always try to
+   queue it.  If the queue fills up, then we'll execute it.  */
 #if OMP_45_ENABLED
-    if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+  if (new_taskdata->td_flags.proxy == TASK_PROXY ||
+      __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
 #else
-    if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
+  if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer
 #endif
-    {                                                           // Execute this task immediately
-        kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
-        if ( serialize_immediate )
-          new_taskdata -> td_flags.task_serial = 1;
-        __kmp_invoke_task( gtid, new_task, current_task );
-    }
+  { // Execute this task immediately
+    kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
+    if (serialize_immediate)
+      new_taskdata->td_flags.task_serial = 1;
+    __kmp_invoke_task(gtid, new_task, current_task);
+  }
 
 #if OMPT_SUPPORT
-    if (ompt_enabled) {
-        new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
-    }
+  if (ompt_enabled) {
+    new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
+  }
 #endif
 
-    ANNOTATE_HAPPENS_BEFORE(new_task);
-    return TASK_CURRENT_NOT_QUEUED;
+  ANNOTATE_HAPPENS_BEFORE(new_task);
+  return TASK_CURRENT_NOT_QUEUED;
 }
 
-//---------------------------------------------------------------------
-// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from
-// the parent thread only!
+// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a
+// non-thread-switchable task from the parent thread only!
+//
 // loc_ref: location of original task pragma (ignored)
 // gtid: Global Thread ID of encountering thread
-// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
-// returns:
-//
-//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
-//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
-
-kmp_int32
-__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
-{
-    kmp_int32 res;
-    KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
+// new_task: non-thread-switchable task thunk allocated by
+// __kmp_omp_task_alloc()
+// Returns:
+//    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to
+//    be resumed later.
+//    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be
+//    resumed later.
+kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
+                          kmp_task_t *new_task) {
+  kmp_int32 res;
+  KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
 
 #if KMP_DEBUG
-    kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
+  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
 #endif
-    KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
-                  gtid, loc_ref, new_taskdata ) );
+  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
+                new_taskdata));
 
-    res =  __kmp_omp_task(gtid,new_task,true);
+  res = __kmp_omp_task(gtid, new_task, true);
 
-    KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
-                  gtid, loc_ref, new_taskdata ) );
-    return res;
+  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning "
+                "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
+                gtid, loc_ref, new_taskdata));
+  return res;
 }
 
-//-------------------------------------------------------------------------------------
-// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
+// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are
+// complete
+kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
+  kmp_taskdata_t *taskdata;
+  kmp_info_t *thread;
+  int thread_finished = FALSE;
+  KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
 
-kmp_int32
-__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
-{
-    kmp_taskdata_t * taskdata;
-    kmp_info_t * thread;
-    int thread_finished = FALSE;
-    KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
-
-    KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) );
+  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
 
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
+  if (__kmp_tasking_mode != tskm_immediate_exec) {
+    // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+    // begin wait?
 
-        thread = __kmp_threads[ gtid ];
-        taskdata = thread -> th.th_current_task;
+    thread = __kmp_threads[gtid];
+    taskdata = thread->th.th_current_task;
 
 #if OMPT_SUPPORT && OMPT_TRACE
-        ompt_task_id_t my_task_id;
-        ompt_parallel_id_t my_parallel_id;
+    ompt_task_id_t my_task_id;
+    ompt_parallel_id_t my_parallel_id;
 
-        if (ompt_enabled) {
-            kmp_team_t *team = thread->th.th_team;
-            my_task_id = taskdata->ompt_task_info.task_id;
-            my_parallel_id = team->t.ompt_team_info.parallel_id;
-
-            taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1);
-            if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
-                ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(
-                                my_parallel_id, my_task_id);
-            }
-        }
+    if (ompt_enabled) {
+      kmp_team_t *team = thread->th.th_team;
+      my_task_id = taskdata->ompt_task_info.task_id;
+      my_parallel_id = team->t.ompt_team_info.parallel_id;
+
+      taskdata->ompt_task_info.frame.reenter_runtime_frame =
+          __builtin_frame_address(1);
+      if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) {
+        ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id,
+                                                                my_task_id);
+      }
+    }
 #endif
 
-        // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
+// Debugger: The taskwait is active. Store location and thread encountered the
+// taskwait.
 #if USE_ITT_BUILD
-        // Note: These values are used by ITT events as well.
+// Note: These values are used by ITT events as well.
 #endif /* USE_ITT_BUILD */
-        taskdata->td_taskwait_counter += 1;
-        taskdata->td_taskwait_ident    = loc_ref;
-        taskdata->td_taskwait_thread   = gtid + 1;
+    taskdata->td_taskwait_counter += 1;
+    taskdata->td_taskwait_ident = loc_ref;
+    taskdata->td_taskwait_thread = gtid + 1;
 
 #if USE_ITT_BUILD
-        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
-        if ( itt_sync_obj != NULL )
-            __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+    void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
+    if (itt_sync_obj != NULL)
+      __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
 
-        bool must_wait = ! taskdata->td_flags.team_serial && ! taskdata->td_flags.final;
+    bool must_wait =
+        !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
 
 #if OMP_45_ENABLED
-        must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks);
+    must_wait = must_wait || (thread->th.th_task_team != NULL &&
+                              thread->th.th_task_team->tt.tt_found_proxy_tasks);
 #endif
-        if (must_wait)
-        {
-            kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
-            while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
-                flag.execute_tasks(thread, gtid, FALSE, &thread_finished
-                                   USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
-            }
-        }
+    if (must_wait) {
+      kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U);
+      while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) {
+        flag.execute_tasks(thread, gtid, FALSE,
+                           &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
+                           __kmp_task_stealing_constraint);
+      }
+    }
 #if USE_ITT_BUILD
-        if ( itt_sync_obj != NULL )
-            __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+    if (itt_sync_obj != NULL)
+      __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
 
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
-        // Debugger:  The taskwait is completed. Location remains, but thread is negated.
-        taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
+    // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+    // end of wait?
+    // Debugger:  The taskwait is completed. Location remains, but thread is
+    // negated.
+    taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
 
 #if OMPT_SUPPORT && OMPT_TRACE
-        if (ompt_enabled) {
-            if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
-                ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(
-                                my_parallel_id, my_task_id);
-            }
-            taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
-        }
-#endif
-        ANNOTATE_HAPPENS_AFTER(taskdata);
+    if (ompt_enabled) {
+      if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) {
+        ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id,
+                                                              my_task_id);
+      }
+      taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL;
     }
+#endif
+    ANNOTATE_HAPPENS_AFTER(taskdata);
+  }
 
-    KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
-                  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
+  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
+                "returning TASK_CURRENT_NOT_QUEUED\n",
+                gtid, taskdata));
 
-    return TASK_CURRENT_NOT_QUEUED;
+  return TASK_CURRENT_NOT_QUEUED;
 }
 
-
-//-------------------------------------------------
 // __kmpc_omp_taskyield: switch to a different task
-
-kmp_int32
-__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
-{
-    kmp_taskdata_t * taskdata;
-    kmp_info_t * thread;
-    int thread_finished = FALSE;
-
-    KMP_COUNT_BLOCK(OMP_TASKYIELD);
-    KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
-
-    KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
-                  gtid, loc_ref, end_part) );
-
-    if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) {
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
-
-        thread = __kmp_threads[ gtid ];
-        taskdata = thread -> th.th_current_task;
-        // Should we model this as a task wait or not?
-        // Debugger: The taskwait is active. Store location and thread encountered the taskwait.
+kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) {
+  kmp_taskdata_t *taskdata;
+  kmp_info_t *thread;
+  int thread_finished = FALSE;
+
+  KMP_COUNT_BLOCK(OMP_TASKYIELD);
+  KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
+
+  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
+                gtid, loc_ref, end_part));
+
+  if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
+    // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+    // begin wait?
+
+    thread = __kmp_threads[gtid];
+    taskdata = thread->th.th_current_task;
+// Should we model this as a task wait or not?
+// Debugger: The taskwait is active. Store location and thread encountered the
+// taskwait.
 #if USE_ITT_BUILD
-        // Note: These values are used by ITT events as well.
+// Note: These values are used by ITT events as well.
 #endif /* USE_ITT_BUILD */
-        taskdata->td_taskwait_counter += 1;
-        taskdata->td_taskwait_ident    = loc_ref;
-        taskdata->td_taskwait_thread   = gtid + 1;
+    taskdata->td_taskwait_counter += 1;
+    taskdata->td_taskwait_ident = loc_ref;
+    taskdata->td_taskwait_thread = gtid + 1;
 
 #if USE_ITT_BUILD
-        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
-        if ( itt_sync_obj != NULL )
-            __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+    void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
+    if (itt_sync_obj != NULL)
+      __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
-        if ( ! taskdata->td_flags.team_serial ) {
-            kmp_task_team_t * task_team = thread->th.th_task_team;
-            if (task_team != NULL) {
-                if (KMP_TASKING_ENABLED(task_team)) {
-                    __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished
-                                            USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
-                }
-            }
+    if (!taskdata->td_flags.team_serial) {
+      kmp_task_team_t *task_team = thread->th.th_task_team;
+      if (task_team != NULL) {
+        if (KMP_TASKING_ENABLED(task_team)) {
+          __kmp_execute_tasks_32(
+              thread, gtid, NULL, FALSE,
+              &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
+              __kmp_task_stealing_constraint);
         }
+      }
+    }
 #if USE_ITT_BUILD
-        if ( itt_sync_obj != NULL )
-            __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+    if (itt_sync_obj != NULL)
+      __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
 
-        // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
-        // Debugger:  The taskwait is completed. Location remains, but thread is negated.
-        taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
-    }
+    // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark
+    // end of wait?
+    // Debugger:  The taskwait is completed. Location remains, but thread is
+    // negated.
+    taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
+  }
+
+  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
+                "returning TASK_CURRENT_NOT_QUEUED\n",
+                gtid, taskdata));
 
-    KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
-                  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
-
-    return TASK_CURRENT_NOT_QUEUED;
+  return TASK_CURRENT_NOT_QUEUED;
 }
 
 // TODO: change to OMP_50_ENABLED, need to change build tools for this to work
 #if OMP_45_ENABLED
-//
 // Task Reduction implementation
-//
 
 typedef struct kmp_task_red_flags {
-    unsigned  lazy_priv : 1;  // hint: (1) use lazy allocation (big objects)
-    unsigned  reserved31 : 31;
+  unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects)
+  unsigned reserved31 : 31;
 } kmp_task_red_flags_t;
 
 // internal structure for reduction data item related info
 typedef struct kmp_task_red_data {
-    void       *reduce_shar; // shared reduction item
-    size_t      reduce_size; // size of data item
-    void       *reduce_priv; // thread specific data
-    void       *reduce_pend; // end of private data for comparison op
-    void       *reduce_init; // data initialization routine
-    void       *reduce_fini; // data finalization routine
-    void       *reduce_comb; // data combiner routine
-    kmp_task_red_flags_t flags; // flags for additional info from compiler
+  void *reduce_shar; // shared reduction item
+  size_t reduce_size; // size of data item
+  void *reduce_priv; // thread specific data
+  void *reduce_pend; // end of private data for comparison op
+  void *reduce_init; // data initialization routine
+  void *reduce_fini; // data finalization routine
+  void *reduce_comb; // data combiner routine
+  kmp_task_red_flags_t flags; // flags for additional info from compiler
 } kmp_task_red_data_t;
 
 // structure sent us by compiler - one per reduction item
 typedef struct kmp_task_red_input {
-    void       *reduce_shar; // shared reduction item
-    size_t      reduce_size; // size of data item
-    void       *reduce_init; // data initialization routine
-    void       *reduce_fini; // data finalization routine
-    void       *reduce_comb; // data combiner routine
-    kmp_task_red_flags_t flags; // flags for additional info from compiler
+  void *reduce_shar; // shared reduction item
+  size_t reduce_size; // size of data item
+  void *reduce_init; // data initialization routine
+  void *reduce_fini; // data finalization routine
+  void *reduce_comb; // data combiner routine
+  kmp_task_red_flags_t flags; // flags for additional info from compiler
 } kmp_task_red_input_t;
 
 /*!
@@ -1638,58 +1655,57 @@ typedef struct kmp_task_red_input {
 
 Initialize task reduction for the taskgroup.
 */
-void*
-__kmpc_task_reduction_init(int gtid, int num, void *data)
-{
-    kmp_info_t * thread = __kmp_threads[gtid];
-    kmp_taskgroup_t * tg = thread->th.th_current_task->td_taskgroup;
-    kmp_int32 nth = thread->th.th_team_nproc;
-    kmp_task_red_input_t *input = (kmp_task_red_input_t*)data;
-    kmp_task_red_data_t *arr;
-
-    // check input data just in case
-    KMP_ASSERT(tg != NULL);
-    KMP_ASSERT(data != NULL);
-    KMP_ASSERT(num > 0);
-    if (nth == 1) {
-        KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
-                gtid, tg));
-        return (void*)tg;
-    }
-    KA_TRACE(10,("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
-                 gtid, tg, num));
-    arr = (kmp_task_red_data_t*)__kmp_thread_malloc(thread, num * sizeof(kmp_task_red_data_t));
-    for (int i = 0; i < num; ++i) {
-        void(*f_init)(void*) = (void(*)(void*))(input[i].reduce_init);
-        size_t size = input[i].reduce_size - 1;
-        // round the size up to cache line per thread-specific item
-        size += CACHE_LINE - size % CACHE_LINE;
-        KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
-        arr[i].reduce_shar = input[i].reduce_shar;
-        arr[i].reduce_size = size;
-        arr[i].reduce_init = input[i].reduce_init;
-        arr[i].reduce_fini = input[i].reduce_fini;
-        arr[i].reduce_comb = input[i].reduce_comb;
-        arr[i].flags       = input[i].flags;
-        if (!input[i].flags.lazy_priv) {
-            // allocate cache-line aligned block and fill it with zeros
-            arr[i].reduce_priv = __kmp_allocate(nth * size);
-            arr[i].reduce_pend = (char*)(arr[i].reduce_priv) + nth * size;
-            if (f_init != NULL) {
-                // initialize thread-specific items
-                for (int j = 0; j < nth; ++j) {
-                    f_init((char*)(arr[i].reduce_priv) + j * size);
-                }
-            }
-        } else {
-            // only allocate space for pointers now,
-            // objects will be lazily allocated/initialized once requested
-            arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void*));
+void *__kmpc_task_reduction_init(int gtid, int num, void *data) {
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
+  kmp_int32 nth = thread->th.th_team_nproc;
+  kmp_task_red_input_t *input = (kmp_task_red_input_t *)data;
+  kmp_task_red_data_t *arr;
+
+  // check input data just in case
+  KMP_ASSERT(tg != NULL);
+  KMP_ASSERT(data != NULL);
+  KMP_ASSERT(num > 0);
+  if (nth == 1) {
+    KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
+                  gtid, tg));
+    return (void *)tg;
+  }
+  KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
+                gtid, tg, num));
+  arr = (kmp_task_red_data_t *)__kmp_thread_malloc(
+      thread, num * sizeof(kmp_task_red_data_t));
+  for (int i = 0; i < num; ++i) {
+    void (*f_init)(void *) = (void (*)(void *))(input[i].reduce_init);
+    size_t size = input[i].reduce_size - 1;
+    // round the size up to cache line per thread-specific item
+    size += CACHE_LINE - size % CACHE_LINE;
+    KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory
+    arr[i].reduce_shar = input[i].reduce_shar;
+    arr[i].reduce_size = size;
+    arr[i].reduce_init = input[i].reduce_init;
+    arr[i].reduce_fini = input[i].reduce_fini;
+    arr[i].reduce_comb = input[i].reduce_comb;
+    arr[i].flags = input[i].flags;
+    if (!input[i].flags.lazy_priv) {
+      // allocate cache-line aligned block and fill it with zeros
+      arr[i].reduce_priv = __kmp_allocate(nth * size);
+      arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size;
+      if (f_init != NULL) {
+        // initialize thread-specific items
+        for (int j = 0; j < nth; ++j) {
+          f_init((char *)(arr[i].reduce_priv) + j * size);
         }
-    }
-    tg->reduce_data = (void*)arr;
-    tg->reduce_num_data = num;
-    return (void*)tg;
+      }
+    } else {
+      // only allocate space for pointers now,
+      // objects will be lazily allocated/initialized once requested
+      arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *));
+    }
+  }
+  tg->reduce_data = (void *)arr;
+  tg->reduce_num_data = num;
+  return (void *)tg;
 }
 
 /*!
@@ -1701,370 +1717,386 @@ __kmpc_task_reduction_init(int gtid, int
 
 Get thread-specific location of data item
 */
-void*
-__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
-{
-    kmp_info_t * thread = __kmp_threads[gtid];
-    kmp_int32 nth = thread->th.th_team_nproc;
-    if (nth == 1)
-        return data; // nothing to do
-
-    kmp_taskgroup_t *tg = (kmp_taskgroup_t*)tskgrp;
-    if (tg == NULL)
-        tg = thread->th.th_current_task->td_taskgroup;
-    KMP_ASSERT(tg != NULL);
-    kmp_task_red_data_t *arr = (kmp_task_red_data_t*)(tg->reduce_data);
-    kmp_int32 num = tg->reduce_num_data;
-    kmp_int32 tid = thread->th.th_info.ds.ds_tid;
-
-    KMP_ASSERT(data != NULL);
-    while (tg != NULL) {
-      for (int i = 0; i < num; ++i) {
-        if (!arr[i].flags.lazy_priv) {
-          if (data == arr[i].reduce_shar ||
-             (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
-            return (char*)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
-        } else {
-          // check shared location first
-          void **p_priv = (void**)(arr[i].reduce_priv);
-          if (data == arr[i].reduce_shar)
+void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) {
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_int32 nth = thread->th.th_team_nproc;
+  if (nth == 1)
+    return data; // nothing to do
+
+  kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
+  if (tg == NULL)
+    tg = thread->th.th_current_task->td_taskgroup;
+  KMP_ASSERT(tg != NULL);
+  kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data);
+  kmp_int32 num = tg->reduce_num_data;
+  kmp_int32 tid = thread->th.th_info.ds.ds_tid;
+
+  KMP_ASSERT(data != NULL);
+  while (tg != NULL) {
+    for (int i = 0; i < num; ++i) {
+      if (!arr[i].flags.lazy_priv) {
+        if (data == arr[i].reduce_shar ||
+            (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
+          return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size;
+      } else {
+        // check shared location first
+        void **p_priv = (void **)(arr[i].reduce_priv);
+        if (data == arr[i].reduce_shar)
+          goto found;
+        // check if we get some thread specific location as parameter
+        for (int j = 0; j < nth; ++j)
+          if (data == p_priv[j])
             goto found;
-          // check if we get some thread specific location as parameter
-          for (int j = 0; j < nth; ++j)
-            if (data == p_priv[j])
-              goto found;
-          continue; // not found, continue search
-        found:
-          if (p_priv[tid] == NULL) {
-            // allocate thread specific object lazily
-            void(*f_init)(void*) = (void(*)(void*))(arr[i].reduce_init);
-            p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
-            if (f_init != NULL) {
-              f_init(p_priv[tid]);
-            }
+        continue; // not found, continue search
+      found:
+        if (p_priv[tid] == NULL) {
+          // allocate thread specific object lazily
+          void (*f_init)(void *) = (void (*)(void *))(arr[i].reduce_init);
+          p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
+          if (f_init != NULL) {
+            f_init(p_priv[tid]);
           }
-          return p_priv[tid];
         }
+        return p_priv[tid];
       }
-      tg = tg->parent;
-      arr = (kmp_task_red_data_t*)(tg->reduce_data);
-      num = tg->reduce_num_data;
     }
-    KMP_ASSERT2(0, "Unknown task reduction item");
-    return NULL; // ERROR, this line never executed
+    tg = tg->parent;
+    arr = (kmp_task_red_data_t *)(tg->reduce_data);
+    num = tg->reduce_num_data;
+  }
+  KMP_ASSERT2(0, "Unknown task reduction item");
+  return NULL; // ERROR, this line never executed
 }
 
 // Finalize task reduction.
 // Called from __kmpc_end_taskgroup()
-static void
-__kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg)
-{
-    kmp_int32 nth = th->th.th_team_nproc;
-    KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
-    kmp_task_red_data_t *arr = (kmp_task_red_data_t*)tg->reduce_data;
-    kmp_int32 num = tg->reduce_num_data;
-    for (int i = 0; i < num; ++i) {
-        void *sh_data = arr[i].reduce_shar;
-        void(*f_fini)(void*) = (void(*)(void*))(arr[i].reduce_fini);
-        void(*f_comb)(void*,void*) = (void(*)(void*,void*))(arr[i].reduce_comb);
-        if (!arr[i].flags.lazy_priv) {
-            void *pr_data = arr[i].reduce_priv;
-            size_t size = arr[i].reduce_size;
-            for (int j = 0; j < nth; ++j) {
-                void * priv_data = (char*)pr_data + j * size;
-                f_comb(sh_data, priv_data); // combine results
-                if (f_fini)
-                    f_fini(priv_data); // finalize if needed
-            }
-        } else {
-            void **pr_data = (void**)(arr[i].reduce_priv);
-            for (int j = 0; j < nth; ++j) {
-                if (pr_data[j] != NULL) {
-                    f_comb(sh_data, pr_data[j]); // combine results
-                    if (f_fini)
-                        f_fini(pr_data[j]); // finalize if needed
-                    __kmp_free(pr_data[j]);
-                }
-            }
+static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
+  kmp_int32 nth = th->th.th_team_nproc;
+  KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1
+  kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data;
+  kmp_int32 num = tg->reduce_num_data;
+  for (int i = 0; i < num; ++i) {
+    void *sh_data = arr[i].reduce_shar;
+    void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini);
+    void (*f_comb)(void *, void *) =
+        (void (*)(void *, void *))(arr[i].reduce_comb);
+    if (!arr[i].flags.lazy_priv) {
+      void *pr_data = arr[i].reduce_priv;
+      size_t size = arr[i].reduce_size;
+      for (int j = 0; j < nth; ++j) {
+        void *priv_data = (char *)pr_data + j * size;
+        f_comb(sh_data, priv_data); // combine results
+        if (f_fini)
+          f_fini(priv_data); // finalize if needed
+      }
+    } else {
+      void **pr_data = (void **)(arr[i].reduce_priv);
+      for (int j = 0; j < nth; ++j) {
+        if (pr_data[j] != NULL) {
+          f_comb(sh_data, pr_data[j]); // combine results
+          if (f_fini)
+            f_fini(pr_data[j]); // finalize if needed
+          __kmp_free(pr_data[j]);
         }
-        __kmp_free(arr[i].reduce_priv);
+      }
     }
-    __kmp_thread_free(th, arr);
-    tg->reduce_data = NULL;
-    tg->reduce_num_data = 0;
+    __kmp_free(arr[i].reduce_priv);
+  }
+  __kmp_thread_free(th, arr);
+  tg->reduce_data = NULL;
+  tg->reduce_num_data = 0;
 }
 #endif
 
 #if OMP_40_ENABLED
-//-------------------------------------------------------------------------------------
 // __kmpc_taskgroup: Start a new taskgroup
-
-void
-__kmpc_taskgroup( ident_t* loc, int gtid )
-{
-    kmp_info_t      * thread = __kmp_threads[ gtid ];
-    kmp_taskdata_t  * taskdata = thread->th.th_current_task;
-    kmp_taskgroup_t * tg_new =
-        (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
-    KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
-    tg_new->count = 0;
-    tg_new->cancel_request = cancel_noreq;
-    tg_new->parent = taskdata->td_taskgroup;
+void __kmpc_taskgroup(ident_t *loc, int gtid) {
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskdata_t *taskdata = thread->th.th_current_task;
+  kmp_taskgroup_t *tg_new =
+      (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t));
+  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
+  tg_new->count = 0;
+  tg_new->cancel_request = cancel_noreq;
+  tg_new->parent = taskdata->td_taskgroup;
 // TODO: change to OMP_50_ENABLED, need to change build tools for this to work
 #if OMP_45_ENABLED
-    tg_new->reduce_data = NULL;
-    tg_new->reduce_num_data = 0;
+  tg_new->reduce_data = NULL;
+  tg_new->reduce_num_data = 0;
 #endif
-    taskdata->td_taskgroup = tg_new;
+  taskdata->td_taskgroup = tg_new;
 }
 
-
-//-------------------------------------------------------------------------------------
 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
 //                       and its descendants are complete
+void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskdata_t *taskdata = thread->th.th_current_task;
+  kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
+  int thread_finished = FALSE;
+
+  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
+  KMP_DEBUG_ASSERT(taskgroup != NULL);
+  KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
 
-void
-__kmpc_end_taskgroup( ident_t* loc, int gtid )
-{
-    kmp_info_t      * thread = __kmp_threads[ gtid ];
-    kmp_taskdata_t  * taskdata = thread->th.th_current_task;
-    kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
-    int thread_finished = FALSE;
-
-    KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
-    KMP_DEBUG_ASSERT( taskgroup != NULL );
-    KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
-
-    if ( __kmp_tasking_mode != tskm_immediate_exec ) {
+  if (__kmp_tasking_mode != tskm_immediate_exec) {
 #if USE_ITT_BUILD
-        // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
-        void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
-        if ( itt_sync_obj != NULL )
-            __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
+    // For ITT the taskgroup wait is similar to taskwait until we need to
+    // distinguish them
+    void *itt_sync_obj = __kmp_itt_taskwait_object(gtid);
+    if (itt_sync_obj != NULL)
+      __kmp_itt_taskwait_starting(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
 
 #if OMP_45_ENABLED
-        if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) )
+    if (!taskdata->td_flags.team_serial ||
+        (thread->th.th_task_team != NULL &&
+         thread->th.th_task_team->tt.tt_found_proxy_tasks))
 #else
-        if ( ! taskdata->td_flags.team_serial )
+    if (!taskdata->td_flags.team_serial)
 #endif
-        {
-            kmp_flag_32 flag(&(taskgroup->count), 0U);
-            while ( TCR_4(taskgroup->count) != 0 ) {
-                flag.execute_tasks(thread, gtid, FALSE, &thread_finished
-                                   USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint );
-            }
-        }
+    {
+      kmp_flag_32 flag(&(taskgroup->count), 0U);
+      while (TCR_4(taskgroup->count) != 0) {
+        flag.execute_tasks(thread, gtid, FALSE,
+                           &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
+                           __kmp_task_stealing_constraint);
+      }
+    }
 
 #if USE_ITT_BUILD
-        if ( itt_sync_obj != NULL )
-            __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
+    if (itt_sync_obj != NULL)
+      __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
 #endif /* USE_ITT_BUILD */
-    }
-    KMP_DEBUG_ASSERT( taskgroup->count == 0 );
+  }
+  KMP_DEBUG_ASSERT(taskgroup->count == 0);
 
 // TODO: change to OMP_50_ENABLED, need to change build tools for this to work
 #if OMP_45_ENABLED
-    if( taskgroup->reduce_data != NULL ) // need to reduce?
-        __kmp_task_reduction_fini(thread, taskgroup);
+  if (taskgroup->reduce_data != NULL) // need to reduce?
+    __kmp_task_reduction_fini(thread, taskgroup);
 #endif
-    // Restore parent taskgroup for the current task
-    taskdata->td_taskgroup = taskgroup->parent;
-    __kmp_thread_free( thread, taskgroup );
-
-    KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
-    ANNOTATE_HAPPENS_AFTER(taskdata);
+  // Restore parent taskgroup for the current task
+  taskdata->td_taskgroup = taskgroup->parent;
+  __kmp_thread_free(thread, taskgroup);
+
+  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
+                gtid, taskdata));
+  ANNOTATE_HAPPENS_AFTER(taskdata);
 }
 #endif
 
-
-//------------------------------------------------------
 // __kmp_remove_my_task: remove a task from my own deque
+static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
+                                        kmp_task_team_t *task_team,
+                                        kmp_int32 is_constrained) {
+  kmp_task_t *task;
+  kmp_taskdata_t *taskdata;
+  kmp_thread_data_t *thread_data;
+  kmp_uint32 tail;
+
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+  KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
+                   NULL); // Caller should check this condition
+
+  thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
+
+  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
+                gtid, thread_data->td.td_deque_ntasks,
+                thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+
+  if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
+    KA_TRACE(10,
+             ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
+              "ntasks=%d head=%u tail=%u\n",
+              gtid, thread_data->td.td_deque_ntasks,
+              thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+    return NULL;
+  }
+
+  __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+  if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
+    __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+    KA_TRACE(10,
+             ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
+              "ntasks=%d head=%u tail=%u\n",
+              gtid, thread_data->td.td_deque_ntasks,
+              thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+    return NULL;
+  }
+
+  tail = (thread_data->td.td_deque_tail - 1) &
+         TASK_DEQUE_MASK(thread_data->td); // Wrap index.
+  taskdata = thread_data->td.td_deque[tail];
+
+  if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
+    // we need to check if the candidate obeys task scheduling constraint:
+    // only child of current task can be scheduled
+    kmp_taskdata_t *current = thread->th.th_current_task;
+    kmp_int32 level = current->td_level;
+    kmp_taskdata_t *parent = taskdata->td_parent;
+    while (parent != current && parent->td_level > level) {
+      parent = parent->td_parent; // check generation up to the level of the
+      // current task
+      KMP_DEBUG_ASSERT(parent != NULL);
+    }
+    if (parent != current) {
+      // If the tail task is not a child, then no other child can appear in the
+      // deque.
+      __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+      KA_TRACE(10,
+               ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
+                "ntasks=%d head=%u tail=%u\n",
+                gtid, thread_data->td.td_deque_ntasks,
+                thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
+      return NULL;
+    }
+  }
+
+  thread_data->td.td_deque_tail = tail;
+  TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
+
+  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: "
+                "ntasks=%d head=%u tail=%u\n",
+                gtid, taskdata, thread_data->td.td_deque_ntasks,
+                thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
 
-static kmp_task_t *
-__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
-                      kmp_int32 is_constrained )
-{
-    kmp_task_t * task;
-    kmp_taskdata_t * taskdata;
-    kmp_thread_data_t *thread_data;
-    kmp_uint32 tail;
-
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
-
-        thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
-
-    KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
-                  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
-                  thread_data->td.td_deque_tail) );
-
-    if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
-        KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
-                      gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
-                      thread_data->td.td_deque_tail) );
-        return NULL;
-    }
-
-    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
-
-    if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
-        __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
-        KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
-                      gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
-                      thread_data->td.td_deque_tail) );
-        return NULL;
-    }
-
-    tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td);  // Wrap index.
-    taskdata = thread_data -> td.td_deque[ tail ];
-
-    if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) {
-        // we need to check if the candidate obeys task scheduling constraint:
-        // only child of current task can be scheduled
-        kmp_taskdata_t * current = thread->th.th_current_task;
-        kmp_int32        level = current->td_level;
-        kmp_taskdata_t * parent = taskdata->td_parent;
-        while ( parent != current && parent->td_level > level ) {
-            parent = parent->td_parent;  // check generation up to the level of the current task
-            KMP_DEBUG_ASSERT(parent != NULL);
-        }
-        if ( parent != current ) {
-            // If the tail task is not a child, then no other child can appear in the deque.
-            __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
-            KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
-                          gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
-                          thread_data->td.td_deque_tail) );
-            return NULL;
-        }
-    }
-
-    thread_data -> td.td_deque_tail = tail;
-    TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
-
-    __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
-
-    KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
-                  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
-                  thread_data->td.td_deque_tail) );
-
-    task = KMP_TASKDATA_TO_TASK( taskdata );
-    return task;
+  task = KMP_TASKDATA_TO_TASK(taskdata);
+  return task;
 }
 
-
-//-----------------------------------------------------------
 // __kmp_steal_task: remove a task from another thread's deque
 // Assume that calling thread has already checked existence of
 // task_team thread_data before calling this routine.
-
 static kmp_task_t *
-__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
-                  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
-                  kmp_int32 is_constrained )
-{
-    kmp_task_t * task;
-    kmp_taskdata_t * taskdata;
-    kmp_thread_data_t *victim_td, *threads_data;
-    kmp_int32 victim_tid;
-
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-
-    threads_data = task_team -> tt.tt_threads_data;
-    KMP_DEBUG_ASSERT( threads_data != NULL );  // Caller should check this condition
-
-    victim_tid = victim->th.th_info.ds.ds_tid;
-    victim_td = & threads_data[ victim_tid ];
-
-    KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
-                  "head=%u tail=%u\n",
-                  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
-                  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
-
-    if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
-         (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
-    {
-        KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
-                      "ntasks=%d head=%u tail=%u\n",
-                      gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
-                      victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
-        return NULL;
-    }
-
-    __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
-
-    // Check again after we acquire the lock
-    if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
-         (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
-    {
-        __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
-        KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
-                      "ntasks=%d head=%u tail=%u\n",
-                      gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
-                      victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
-        return NULL;
-    }
-
-    KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
-
-    taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
-    if ( is_constrained ) {
-        // we need to check if the candidate obeys task scheduling constraint:
-        // only descendant of current task can be scheduled
-        kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
-        kmp_int32        level = current->td_level;
-        kmp_taskdata_t * parent = taskdata->td_parent;
-        while ( parent != current && parent->td_level > level ) {
-            parent = parent->td_parent;  // check generation up to the level of the current task
-            KMP_DEBUG_ASSERT(parent != NULL);
-        }
-        if ( parent != current ) {
-            // If the head task is not a descendant of the current task then do not
-            // steal it. No other task in victim's deque can be a descendant of the
-            // current task.
-            __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
-            KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
-                          "ntasks=%d head=%u tail=%u\n",
-                          gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
-                          task_team, victim_td->td.td_deque_ntasks,
-                          victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
-            return NULL;
-        }
-    }
-    // Bump head pointer and Wrap.
-    victim_td->td.td_deque_head = (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
-    if (*thread_finished) {
-        // We need to un-mark this victim as a finished victim.  This must be done before
-        // releasing the lock, or else other threads (starting with the master victim)
-        // might be prematurely released from the barrier!!!
-        kmp_uint32 count;
-
-        count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
-
-        KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
-                      gtid, count + 1, task_team) );
-
-        *thread_finished = FALSE;
-    }
-    TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
-
-    __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
-
-    KMP_COUNT_BLOCK(TASK_stolen);
-    KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
+__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
+                 volatile kmp_uint32 *unfinished_threads, int *thread_finished,
+                 kmp_int32 is_constrained)
+{
+  kmp_task_t *task;
+  kmp_taskdata_t *taskdata;
+  kmp_thread_data_t *victim_td, *threads_data;
+  kmp_int32 victim_tid;
+
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+
+  threads_data = task_team->tt.tt_threads_data;
+  KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition
+
+  victim_tid = victim->th.th_info.ds.ds_tid;
+  victim_td = &threads_data[victim_tid];
+
+  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: "
+                "task_team=%p ntasks=%d "
+                "head=%u tail=%u\n",
+                gtid, __kmp_gtid_from_thread(victim), task_team,
+                victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+                victim_td->td.td_deque_tail));
+
+  if ((TCR_4(victim_td->td.td_deque_ntasks) ==
+       0) || // Caller should not check this condition
+      (TCR_PTR(victim->th.th_task_team) !=
+       task_team)) // GEH: why would this happen?
+  {
+    KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
+                  "task_team=%p "
                   "ntasks=%d head=%u tail=%u\n",
-                  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
+                  gtid, __kmp_gtid_from_thread(victim), task_team,
                   victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
-                  victim_td->td.td_deque_tail) );
+                  victim_td->td.td_deque_tail));
+    return NULL;
+  }
+
+  __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
+
+  // Check again after we acquire the lock
+  if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) ||
+      (TCR_PTR(victim->th.th_task_team) !=
+       task_team)) // GEH: why would this happen?
+  {
+    __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+    KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
+                  "task_team=%p "
+                  "ntasks=%d head=%u tail=%u\n",
+                  gtid, __kmp_gtid_from_thread(victim), task_team,
+                  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+                  victim_td->td.td_deque_tail));
+    return NULL;
+  }
+
+  KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
+
+  taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
+  if (is_constrained) {
+    // we need to check if the candidate obeys task scheduling constraint:
+    // only descendant of current task can be scheduled
+    kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task;
+    kmp_int32 level = current->td_level;
+    kmp_taskdata_t *parent = taskdata->td_parent;
+    while (parent != current && parent->td_level > level) {
+      parent = parent->td_parent; // check generation up to the level of the
+      // current task
+      KMP_DEBUG_ASSERT(parent != NULL);
+    }
+    if (parent != current) {
+      // If the head task is not a descendant of the current task then do not
+      // steal it. No other task in victim's deque can be a descendant of the
+      // current task.
+      __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+      KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from "
+                    "T#%d: task_team=%p "
+                    "ntasks=%d head=%u tail=%u\n",
+                    gtid,
+                    __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr),
+                    task_team, victim_td->td.td_deque_ntasks,
+                    victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
+      return NULL;
+    }
+  }
+  // Bump head pointer and Wrap.
+  victim_td->td.td_deque_head =
+      (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
+  if (*thread_finished) {
+    // We need to un-mark this victim as a finished victim.  This must be done
+    // before releasing the lock, or else other threads (starting with the
+    // master victim) might be prematurely released from the barrier!!!
+    kmp_uint32 count;
+
+    count = KMP_TEST_THEN_INC32((kmp_int32 *)unfinished_threads);
+
+    KA_TRACE(
+        20,
+        ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
+         gtid, count + 1, task_team));
+
+    *thread_finished = FALSE;
+  }
+  TCW_4(victim_td->td.td_deque_ntasks,
+        TCR_4(victim_td->td.td_deque_ntasks) - 1);
+
+
+  __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
+
+  KMP_COUNT_BLOCK(TASK_stolen);
+  KA_TRACE(
+      10,
+      ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p "
+       "ntasks=%d head=%u tail=%u\n",
+       gtid, taskdata, __kmp_gtid_from_thread(victim), task_team,
+       victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
+       victim_td->td.td_deque_tail));
 
-    task = KMP_TASKDATA_TO_TASK( taskdata );
-    return task;
+  task = KMP_TASKDATA_TO_TASK(taskdata);
+  return task;
 }
 
 
-//-----------------------------------------------------------------------------
-// __kmp_execute_tasks_template: Choose and execute tasks until either the condition
-// is statisfied (return true) or there are none left (return false).
+// __kmp_execute_tasks_template: Choose and execute tasks until either the
+// condition is statisfied (return true) or there are none left (return false).
+//
 // final_spin is TRUE if this is the spin at the release barrier.
 // thread_finished indicates whether the thread is finished executing all
 // the tasks it has on its deque, and is at the release barrier.
@@ -2072,289 +2104,318 @@ __kmp_steal_task( kmp_info_t *victim, km
 // spinner == NULL means only execute a single task and return.
 // checker is the value to check to terminate the spin.
 template <class C>
-static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
-                                               int *thread_finished
-                                               USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
-    kmp_task_team_t *     task_team = thread->th.th_task_team;
-    kmp_thread_data_t *   threads_data;
-    kmp_task_t *          task;
-    kmp_info_t *          other_thread;
-    kmp_taskdata_t *      current_task = thread -> th.th_current_task;
-    volatile kmp_uint32 * unfinished_threads;
-    kmp_int32             nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid;
-
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
-
-    if (task_team == NULL) return FALSE;
-
-    KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n",
-                  gtid, final_spin, *thread_finished) );
-
-    thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
-    threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
-    KMP_DEBUG_ASSERT( threads_data != NULL );
+static inline int __kmp_execute_tasks_template(
+    kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin,
+    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+    kmp_int32 is_constrained) {
+  kmp_task_team_t *task_team = thread->th.th_task_team;
+  kmp_thread_data_t *threads_data;
+  kmp_task_t *task;
+  kmp_info_t *other_thread;
+  kmp_taskdata_t *current_task = thread->th.th_current_task;
+  volatile kmp_uint32 *unfinished_threads;
+  kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0,
+                      tid = thread->th.th_info.ds.ds_tid;
+
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+  KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
+
+  if (task_team == NULL)
+    return FALSE;
+
+  KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
+                "*thread_finished=%d\n",
+                gtid, final_spin, *thread_finished));
+
+  thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
+  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
+  KMP_DEBUG_ASSERT(threads_data != NULL);
 
-    nthreads = task_team -> tt.tt_nproc;
-    unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
+  nthreads = task_team->tt.tt_nproc;
+  unfinished_threads = &(task_team->tt.tt_unfinished_threads);
 #if OMP_45_ENABLED
-    KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
+  KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks);
 #else
-    KMP_DEBUG_ASSERT( nthreads > 1 );
+  KMP_DEBUG_ASSERT(nthreads > 1);
 #endif
-    KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 );
+  KMP_DEBUG_ASSERT((int)(TCR_4(*unfinished_threads)) >= 0);
 
-    while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs
-        while (1) { // Inner loop to find a task and execute it
-            task = NULL;
-            if (use_own_tasks) { // check on own queue first
-                task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained );
-            }
-            if ((task == NULL) && (nthreads > 1)) { // Steal a task
-                int asleep = 1;
-                use_own_tasks = 0;
-                // Try to steal from the last place I stole from successfully.
-                if (victim == -2) { // haven't stolen anything yet
-                    victim = threads_data[tid].td.td_deque_last_stolen;
-                    if (victim != -1) // if we have a last stolen from victim, get the thread
-                        other_thread = threads_data[victim].td.td_thr;
-                }
-                if (victim != -1) { // found last victim
-                    asleep = 0;
-                }
-                else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread
-                    do { // Find a different thread to steal work from.
-                        // Pick a random thread. Initial plan was to cycle through all the threads, and only return if
-                        // we tried to steal from every thread, and failed.  Arch says that's not such a great idea.
-                        victim = __kmp_get_random(thread) % (nthreads - 1);
-                        if (victim >= tid) {
-                            ++victim;  // Adjusts random distribution to exclude self
-                        }
-                        // Found a potential victim
-                        other_thread = threads_data[victim].td.td_thr;
-                        // There is a slight chance that __kmp_enable_tasking() did not wake up all threads
-                        // waiting at the barrier.  If victim is sleeping, then wake it up.  Since we were going to
-                        // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway,
-                        // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks
-                        // do not sleep at the separate tasking barrier, so this isn't a problem.
-                        asleep = 0;
-                        if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
-                             (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
-                             (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
-                            asleep = 1;
-                            __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc);
-                            // A sleeping thread should not have any tasks on it's queue. There is a slight
-                            // possibility that it resumes, steals a task from another thread, which spawns more
-                            // tasks, all in the time that it takes this thread to check => don't write an assertion
-                            // that the victim's queue is empty.  Try stealing from a different thread.
-                        }
-                    } while (asleep);
-                }
-
-                if (!asleep) {
-                    // We have a victim to try to steal from
-                    task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained);
-                }
-                if (task != NULL) { // set last stolen to victim
-                    if (threads_data[tid].td.td_deque_last_stolen != victim) {
-                        threads_data[tid].td.td_deque_last_stolen = victim;
-                        // The pre-refactored code did not try more than 1 successful new vicitm,
-                        // unless the last one generated more local tasks; new_victim keeps track of this
-                        new_victim = 1;
-                    }
-                }
-                else { // No tasks found; unset last_stolen
-                    KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
-                    victim = -2; // no successful victim found
-                }
-            }
+  while (1) { // Outer loop keeps trying to find tasks in case of single thread
+    // getting tasks from target constructs
+    while (1) { // Inner loop to find a task and execute it
+      task = NULL;
+      if (use_own_tasks) { // check on own queue first
+        task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
+      }
+      if ((task == NULL) && (nthreads > 1)) { // Steal a task
+        int asleep = 1;
+        use_own_tasks = 0;
+        // Try to steal from the last place I stole from successfully.
+        if (victim == -2) { // haven't stolen anything yet
+          victim = threads_data[tid].td.td_deque_last_stolen;
+          if (victim !=
+              -1) // if we have a last stolen from victim, get the thread
+            other_thread = threads_data[victim].td.td_thr;
+        }
+        if (victim != -1) { // found last victim
+          asleep = 0;
+        } else if (!new_victim) { // no recent steals and we haven't already
+          // used a new victim; select a random thread
+          do { // Find a different thread to steal work from.
+            // Pick a random thread. Initial plan was to cycle through all the
+            // threads, and only return if we tried to steal from every thread,
+            // and failed.  Arch says that's not such a great idea.
+            victim = __kmp_get_random(thread) % (nthreads - 1);
+            if (victim >= tid) {
+              ++victim; // Adjusts random distribution to exclude self
+            }
+            // Found a potential victim
+            other_thread = threads_data[victim].td.td_thr;
+            // There is a slight chance that __kmp_enable_tasking() did not wake
+            // up all threads waiting at the barrier.  If victim is sleeping,
+            // then wake it up. Since we were going to pay the cache miss
+            // penalty for referencing another thread's kmp_info_t struct
+            // anyway,
+            // the check shouldn't cost too much performance at this point. In
+            // extra barrier mode, tasks do not sleep at the separate tasking
+            // barrier, so this isn't a problem.
+            asleep = 0;
+            if ((__kmp_tasking_mode == tskm_task_teams) &&
+                (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
+                (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) {
+              asleep = 1;
+              __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread),
+                                        other_thread->th.th_sleep_loc);
+              // A sleeping thread should not have any tasks on it's queue.
+              // There is a slight possibility that it resumes, steals a task
+              // from another thread, which spawns more tasks, all in the time
+              // that it takes this thread to check => don't write an assertion
+              // that the victim's queue is empty.  Try stealing from a
+              // different thread.
+            }
+          } while (asleep);
+        }
+
+        if (!asleep) {
+          // We have a victim to try to steal from
+          task = __kmp_steal_task(other_thread, gtid, task_team,
+                                  unfinished_threads, thread_finished,
+                                  is_constrained);
+        }
+        if (task != NULL) { // set last stolen to victim
+          if (threads_data[tid].td.td_deque_last_stolen != victim) {
+            threads_data[tid].td.td_deque_last_stolen = victim;
+            // The pre-refactored code did not try more than 1 successful new
+            // vicitm, unless the last one generated more local tasks;
+            // new_victim keeps track of this
+            new_victim = 1;
+          }
+        } else { // No tasks found; unset last_stolen
+          KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
+          victim = -2; // no successful victim found
+        }
+      }
 
-            if (task == NULL) // break out of tasking loop
-                break;
+      if (task == NULL) // break out of tasking loop
+        break;
 
-            // Found a task; execute it
+// Found a task; execute it
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
-            if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
-                if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably
-                    itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
-                }
-                __kmp_itt_task_starting( itt_sync_obj );
-            }
+      if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
+        if (itt_sync_obj == NULL) { // we are at fork barrier where we could not
+          // get the object reliably
+          itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
+        }
+        __kmp_itt_task_starting(itt_sync_obj);
+      }
 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
-            __kmp_invoke_task( gtid, task, current_task );
+      __kmp_invoke_task(gtid, task, current_task);
 #if USE_ITT_BUILD
-            if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj );
+      if (itt_sync_obj != NULL)
+        __kmp_itt_task_finished(itt_sync_obj);
 #endif /* USE_ITT_BUILD */
-            // If this thread is only partway through the barrier and the condition is met, then return now,
-            // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop
-            // in the barrier, waiting to be released, we know that the termination condition will not be
-            // satisified, so don't waste any cycles checking it.
-            if (flag == NULL || (!final_spin && flag->done_check())) {
-                KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
-                return TRUE;
-            }
-            if (thread->th.th_task_team == NULL) {
-                break;
-            }
-            KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
-            // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks
-            if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
-                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid));
-                use_own_tasks = 1;
-                new_victim = 0;
-            }
-        }
+      // If this thread is only partway through the barrier and the condition is
+      // met, then return now, so that the barrier gather/release pattern can
+      // proceed. If this thread is in the last spin loop in the barrier,
+      // waiting to be released, we know that the termination condition will not
+      // be satisified, so don't waste any cycles checking it.
+      if (flag == NULL || (!final_spin && flag->done_check())) {
+        KA_TRACE(
+            15,
+            ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
+             gtid));
+        return TRUE;
+      }
+      if (thread->th.th_task_team == NULL) {
+        break;
+      }
+      // Yield before executing next task
+      KMP_YIELD(__kmp_library == library_throughput);
+      // If execution of a stolen task results in more tasks being placed on our
+      // run queue, reset use_own_tasks
+      if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
+        KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned "
+                      "other tasks, restart\n",
+                      gtid));
+        use_own_tasks = 1;
+        new_victim = 0;
+      }
+    }
 
-        // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied.
+// The task source has been exhausted. If in final spin loop of barrier, check
+// if termination condition is satisfied.
 #if OMP_45_ENABLED
-        // The work queue may be empty but there might be proxy tasks still executing
-        if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
+    // The work queue may be empty but there might be proxy tasks still
+    // executing
+    if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0)
 #else
-        if (final_spin)
+    if (final_spin)
 #endif
-        {
-            // First, decrement the #unfinished threads, if that has not already been done.  This decrement
-            // might be to the spin location, and result in the termination condition being satisfied.
-            if (! *thread_finished) {
-                kmp_uint32 count;
-
-                count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
-                KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n",
-                              gtid, count, task_team) );
-                *thread_finished = TRUE;
-            }
-
-            // It is now unsafe to reference thread->th.th_team !!!
-            // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through
-            // the barrier, where it might reset each thread's th.th_team field for the next parallel region.
-            // If we can steal more work, we know that this has not happened yet.
-            if (flag != NULL && flag->done_check()) {
-                KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) );
-                return TRUE;
-            }
-        }
+    {
+      // First, decrement the #unfinished threads, if that has not already been
+      // done.  This decrement might be to the spin location, and result in the
+      // termination condition being satisfied.
+      if (!*thread_finished) {
+        kmp_uint32 count;
 
-        // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out
-        if (thread->th.th_task_team == NULL) {
-            KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) );
-            return FALSE;
-        }
+        count = KMP_TEST_THEN_DEC32((kmp_int32 *)unfinished_threads) - 1;
+        KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec "
+                      "unfinished_threads to %d task_team=%p\n",
+                      gtid, count, task_team));
+        *thread_finished = TRUE;
+      }
 
-#if OMP_45_ENABLED
-        // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute
-        // tasks from own queue
-        if (nthreads == 1)
-            use_own_tasks = 1;
-        else
-#endif
-        {
-            KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) );
-            return FALSE;
-        }
+      // It is now unsafe to reference thread->th.th_team !!!
+      // Decrementing task_team->tt.tt_unfinished_threads can allow the master
+      // thread to pass through the barrier, where it might reset each thread's
+      // th.th_team field for the next parallel region. If we can steal more
+      // work, we know that this has not happened yet.
+      if (flag != NULL && flag->done_check()) {
+        KA_TRACE(
+            15,
+            ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
+             gtid));
+        return TRUE;
+      }
     }
-}
-
-int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
-                           int *thread_finished
-                           USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
-    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-}
 
-int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
-                           int *thread_finished
-                           USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
-    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-}
+    // If this thread's task team is NULL, master has recognized that there are
+    // no more tasks; bail out
+    if (thread->th.th_task_team == NULL) {
+      KA_TRACE(15,
+               ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
+      return FALSE;
+    }
 
-int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
-                               int *thread_finished
-                               USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained)
-{
-    return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished
-                                        USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+#if OMP_45_ENABLED
+    // We could be getting tasks from target constructs; if this is the only
+    // thread, keep trying to execute tasks from own queue
+    if (nthreads == 1)
+      use_own_tasks = 1;
+    else
+#endif
+    {
+      KA_TRACE(15,
+               ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
+      return FALSE;
+    }
+  }
+}
+
+int __kmp_execute_tasks_32(
+    kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin,
+    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+    kmp_int32 is_constrained) {
+  return __kmp_execute_tasks_template(
+      thread, gtid, flag, final_spin,
+      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+int __kmp_execute_tasks_64(
+    kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin,
+    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+    kmp_int32 is_constrained) {
+  return __kmp_execute_tasks_template(
+      thread, gtid, flag, final_spin,
+      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+}
+
+int __kmp_execute_tasks_oncore(
+    kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin,
+    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+    kmp_int32 is_constrained) {
+  return __kmp_execute_tasks_template(
+      thread, gtid, flag, final_spin,
+      thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
 }
 
-
-
-//-----------------------------------------------------------------------------
 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
 // next barrier so they can assist in executing enqueued tasks.
 // First thread in allocates the task team atomically.
+static void __kmp_enable_tasking(kmp_task_team_t *task_team,
+                                 kmp_info_t *this_thr) {
+  kmp_thread_data_t *threads_data;
+  int nthreads, i, is_init_thread;
+
+  KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n",
+                __kmp_gtid_from_thread(this_thr)));
+
+  KMP_DEBUG_ASSERT(task_team != NULL);
+  KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
+
+  nthreads = task_team->tt.tt_nproc;
+  KMP_DEBUG_ASSERT(nthreads > 0);
+  KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
+
+  // Allocate or increase the size of threads_data if necessary
+  is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
+
+  if (!is_init_thread) {
+    // Some other thread already set up the array.
+    KA_TRACE(
+        20,
+        ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
+         __kmp_gtid_from_thread(this_thr)));
+    return;
+  }
+  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
+  KMP_DEBUG_ASSERT(threads_data != NULL);
+
+  if ((__kmp_tasking_mode == tskm_task_teams) &&
+      (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
+    // Release any threads sleeping at the barrier, so that they can steal
+    // tasks and execute them.  In extra barrier mode, tasks do not sleep
+    // at the separate tasking barrier, so this isn't a problem.
+    for (i = 0; i < nthreads; i++) {
+      volatile void *sleep_loc;
+      kmp_info_t *thread = threads_data[i].td.td_thr;
 
-static void
-__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
-{
-    kmp_thread_data_t *threads_data;
-    int nthreads, i, is_init_thread;
-
-    KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
-                    __kmp_gtid_from_thread( this_thr ) ) );
-
-    KMP_DEBUG_ASSERT(task_team != NULL);
-    KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
-
-    nthreads = task_team->tt.tt_nproc;
-    KMP_DEBUG_ASSERT(nthreads > 0);
-    KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
-
-    // Allocate or increase the size of threads_data if necessary
-    is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
-
-    if (!is_init_thread) {
-        // Some other thread already set up the array.
-        KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
-                        __kmp_gtid_from_thread( this_thr ) ) );
-        return;
-    }
-    threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
-    KMP_DEBUG_ASSERT( threads_data != NULL );
-
-    if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
-         ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
-    {
-        // Release any threads sleeping at the barrier, so that they can steal
-        // tasks and execute them.  In extra barrier mode, tasks do not sleep
-        // at the separate tasking barrier, so this isn't a problem.
-        for (i = 0; i < nthreads; i++) {
-            volatile void *sleep_loc;
-            kmp_info_t *thread = threads_data[i].td.td_thr;
-
-            if (i == this_thr->th.th_info.ds.ds_tid) {
-                continue;
-            }
-            // Since we haven't locked the thread's suspend mutex lock at this
-            // point, there is a small window where a thread might be putting
-            // itself to sleep, but hasn't set the th_sleep_loc field yet.
-            // To work around this, __kmp_execute_tasks_template() periodically checks
-            // see if other threads are sleeping (using the same random
-            // mechanism that is used for task stealing) and awakens them if
-            // they are.
-            if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
-            {
-                KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
-                                 __kmp_gtid_from_thread( this_thr ),
-                                 __kmp_gtid_from_thread( thread ) ) );
-                __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
-            }
-            else {
-                KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
-                                 __kmp_gtid_from_thread( this_thr ),
-                                 __kmp_gtid_from_thread( thread ) ) );
-            }
-        }
+      if (i == this_thr->th.th_info.ds.ds_tid) {
+        continue;
+      }
+      // Since we haven't locked the thread's suspend mutex lock at this
+      // point, there is a small window where a thread might be putting
+      // itself to sleep, but hasn't set the th_sleep_loc field yet.
+      // To work around this, __kmp_execute_tasks_template() periodically checks
+      // see if other threads are sleeping (using the same random mechanism that
+      // is used for task stealing) and awakens them if they are.
+      if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) {
+        KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n",
+                      __kmp_gtid_from_thread(this_thr),
+                      __kmp_gtid_from_thread(thread)));
+        __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
+      } else {
+        KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
+                      __kmp_gtid_from_thread(this_thr),
+                      __kmp_gtid_from_thread(thread)));
+      }
     }
+  }
 
-    KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
-                    __kmp_gtid_from_thread( this_thr ) ) );
+  KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n",
+                __kmp_gtid_from_thread(this_thr)));
 }
 
-
-/* ------------------------------------------------------------------------ */
 /* // TODO: Check the comment consistency
  * Utility routines for "task teams".  A task team (kmp_task_t) is kind of
  * like a shadow of the kmp_team_t data struct, with a different lifetime.
@@ -2389,685 +2450,683 @@ __kmp_enable_tasking( kmp_task_team_t *t
  * barriers, when no explicit tasks were spawned (pushed, actually).
  */
 
-
-static kmp_task_team_t *__kmp_free_task_teams = NULL;           // Free list for task_team data structures
+static kmp_task_team_t *__kmp_free_task_teams =
+    NULL; // Free list for task_team data structures
 // Lock for task team data structures
-static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
-
+static kmp_bootstrap_lock_t __kmp_task_team_lock =
+    KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
 
-//------------------------------------------------------------------------------
 // __kmp_alloc_task_deque:
 // Allocates a task deque for a particular thread, and initialize the necessary
 // data structures relating to the deque.  This only happens once per thread
-// per task team since task teams are recycled.
-// No lock is needed during allocation since each thread allocates its own
-// deque.
-
-static void
-__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
-{
-    __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
-    KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
-
-    // Initialize last stolen task field to "none"
-    thread_data -> td.td_deque_last_stolen = -1;
-
-    KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
-    KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
-    KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
-
-    KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
-                   __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) );
-    // Allocate space for task deque, and zero the deque
-    // Cannot use __kmp_thread_calloc() because threads not around for
-    // kmp_reap_task_team( ).
-    thread_data -> td.td_deque = (kmp_taskdata_t **)
-            __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
-	thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
+// per task team since task teams are recycled. No lock is needed during
+// allocation since each thread allocates its own deque.
+static void __kmp_alloc_task_deque(kmp_info_t *thread,
+                                   kmp_thread_data_t *thread_data) {
+  __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
+  KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
+
+  // Initialize last stolen task field to "none"
+  thread_data->td.td_deque_last_stolen = -1;
+
+  KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
+  KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
+  KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
+
+  KE_TRACE(
+      10,
+      ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
+       __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
+  // Allocate space for task deque, and zero the deque
+  // Cannot use __kmp_thread_calloc() because threads not around for
+  // kmp_reap_task_team( ).
+  thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
+      INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
+  thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
 }
 
-//------------------------------------------------------------------------------
 // __kmp_realloc_task_deque:
-// Re-allocates a task deque for a particular thread, copies the content from the old deque
-// and adjusts the necessary data structures relating to the deque.
-// This operation must be done with a the deque_lock being held
-
-static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data )
-{
-    kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
-    kmp_int32 new_size = 2 * size;
-
-    KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n",
-                  __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) );
-
-    kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *));
-
-    int i,j;
-    for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ )
-       new_deque[j] = thread_data->td.td_deque[i];
-
-    __kmp_free(thread_data->td.td_deque);
-
-    thread_data -> td.td_deque_head = 0;
-    thread_data -> td.td_deque_tail = size;
-    thread_data -> td.td_deque = new_deque;
-    thread_data -> td.td_deque_size = new_size;
+// Re-allocates a task deque for a particular thread, copies the content from
+// the old deque and adjusts the necessary data structures relating to the
+// deque. This operation must be done with a the deque_lock being held
+static void __kmp_realloc_task_deque(kmp_info_t *thread,
+                                     kmp_thread_data_t *thread_data) {
+  kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
+  kmp_int32 new_size = 2 * size;
+
+  KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
+                "%d] for thread_data %p\n",
+                __kmp_gtid_from_thread(thread), size, new_size, thread_data));
+
+  kmp_taskdata_t **new_deque =
+      (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *));
+
+  int i, j;
+  for (i = thread_data->td.td_deque_head, j = 0; j < size;
+       i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
+    new_deque[j] = thread_data->td.td_deque[i];
+
+  __kmp_free(thread_data->td.td_deque);
+
+  thread_data->td.td_deque_head = 0;
+  thread_data->td.td_deque_tail = size;
+  thread_data->td.td_deque = new_deque;
+  thread_data->td.td_deque_size = new_size;
 }
 
-//------------------------------------------------------------------------------
 // __kmp_free_task_deque:
-// Deallocates a task deque for a particular thread.
-// Happens at library deallocation so don't need to reset all thread data fields.
-
-static void
-__kmp_free_task_deque( kmp_thread_data_t *thread_data )
-{
-    __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
+// Deallocates a task deque for a particular thread. Happens at library
+// deallocation so don't need to reset all thread data fields.
+static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
+  __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
 
-    if ( thread_data -> td.td_deque != NULL ) {
-        TCW_4(thread_data -> td.td_deque_ntasks, 0);
-         __kmp_free( thread_data -> td.td_deque );
-        thread_data -> td.td_deque = NULL;
-    }
-    __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
+  if (thread_data->td.td_deque != NULL) {
+    TCW_4(thread_data->td.td_deque_ntasks, 0);
+    __kmp_free(thread_data->td.td_deque);
+    thread_data->td.td_deque = NULL;
+  }
+  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
 
 #ifdef BUILD_TIED_TASK_STACK
-    // GEH: Figure out what to do here for td_susp_tied_tasks
-    if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
-        __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
-    }
+  // GEH: Figure out what to do here for td_susp_tied_tasks
+  if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
+    __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
+  }
 #endif // BUILD_TIED_TASK_STACK
 }
 
-
-//------------------------------------------------------------------------------
 // __kmp_realloc_task_threads_data:
-// Allocates a threads_data array for a task team, either by allocating an initial
-// array or enlarging an existing array.  Only the first thread to get the lock
-// allocs or enlarges the array and re-initializes the array eleemnts.
+// Allocates a threads_data array for a task team, either by allocating an
+// initial array or enlarging an existing array.  Only the first thread to get
+// the lock allocs or enlarges the array and re-initializes the array eleemnts.
 // That thread returns "TRUE", the rest return "FALSE".
 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
 // The current size is given by task_team -> tt.tt_max_threads.
+static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
+                                           kmp_task_team_t *task_team) {
+  kmp_thread_data_t **threads_data_p;
+  kmp_int32 nthreads, maxthreads;
+  int is_init_thread = FALSE;
+
+  if (TCR_4(task_team->tt.tt_found_tasks)) {
+    // Already reallocated and initialized.
+    return FALSE;
+  }
+
+  threads_data_p = &task_team->tt.tt_threads_data;
+  nthreads = task_team->tt.tt_nproc;
+  maxthreads = task_team->tt.tt_max_threads;
+
+  // All threads must lock when they encounter the first task of the implicit
+  // task region to make sure threads_data fields are (re)initialized before
+  // used.
+  __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
 
-static int
-__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
-{
-    kmp_thread_data_t ** threads_data_p;
-    kmp_int32            nthreads, maxthreads;
-    int                  is_init_thread = FALSE;
-
-    if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
-        // Already reallocated and initialized.
-        return FALSE;
-    }
-
-    threads_data_p = & task_team -> tt.tt_threads_data;
-    nthreads   = task_team -> tt.tt_nproc;
-    maxthreads = task_team -> tt.tt_max_threads;
-
-    // All threads must lock when they encounter the first task of the implicit task
-    // region to make sure threads_data fields are (re)initialized before used.
-    __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
-
-    if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
-        // first thread to enable tasking
-        kmp_team_t *team = thread -> th.th_team;
-        int i;
-
-        is_init_thread = TRUE;
-        if ( maxthreads < nthreads ) {
-
-            if ( *threads_data_p != NULL ) {
-                kmp_thread_data_t *old_data = *threads_data_p;
-                kmp_thread_data_t *new_data = NULL;
-
-                KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
-                               "threads data for task_team %p, new_size = %d, old_size = %d\n",
-                               __kmp_gtid_from_thread( thread ), task_team,
-                               nthreads, maxthreads ) );
-                // Reallocate threads_data to have more elements than current array
-                // Cannot use __kmp_thread_realloc() because threads not around for
-                // kmp_reap_task_team( ).  Note all new array entries are initialized
-                // to zero by __kmp_allocate().
-                new_data = (kmp_thread_data_t *)
-                            __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
-                // copy old data to new data
-                KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t),
-                              (void *) old_data,
-                              maxthreads * sizeof(kmp_taskdata_t *) );
+  if (!TCR_4(task_team->tt.tt_found_tasks)) {
+    // first thread to enable tasking
+    kmp_team_t *team = thread->th.th_team;
+    int i;
+
+    is_init_thread = TRUE;
+    if (maxthreads < nthreads) {
+
+      if (*threads_data_p != NULL) {
+        kmp_thread_data_t *old_data = *threads_data_p;
+        kmp_thread_data_t *new_data = NULL;
+
+        KE_TRACE(
+            10,
+            ("__kmp_realloc_task_threads_data: T#%d reallocating "
+             "threads data for task_team %p, new_size = %d, old_size = %d\n",
+             __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
+        // Reallocate threads_data to have more elements than current array
+        // Cannot use __kmp_thread_realloc() because threads not around for
+        // kmp_reap_task_team( ).  Note all new array entries are initialized
+        // to zero by __kmp_allocate().
+        new_data = (kmp_thread_data_t *)__kmp_allocate(
+            nthreads * sizeof(kmp_thread_data_t));
+        // copy old data to new data
+        KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t),
+                     (void *)old_data, maxthreads * sizeof(kmp_taskdata_t *));
 
 #ifdef BUILD_TIED_TASK_STACK
-                // GEH: Figure out if this is the right thing to do
-                for (i = maxthreads; i < nthreads; i++) {
-                    kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
-                    __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
-                }
+        // GEH: Figure out if this is the right thing to do
+        for (i = maxthreads; i < nthreads; i++) {
+          kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
+          __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
+        }
 #endif // BUILD_TIED_TASK_STACK
-                // Install the new data and free the old data
-                (*threads_data_p) = new_data;
-                __kmp_free( old_data );
-            }
-            else {
-                KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
-                               "threads data for task_team %p, size = %d\n",
-                               __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
-                // Make the initial allocate for threads_data array, and zero entries
-                // Cannot use __kmp_thread_calloc() because threads not around for
-                // kmp_reap_task_team( ).
-                ANNOTATE_IGNORE_WRITES_BEGIN();
-                *threads_data_p = (kmp_thread_data_t *)
-                                  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
-                ANNOTATE_IGNORE_WRITES_END();
+        // Install the new data and free the old data
+        (*threads_data_p) = new_data;
+        __kmp_free(old_data);
+      } else {
+        KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating "
+                      "threads data for task_team %p, size = %d\n",
+                      __kmp_gtid_from_thread(thread), task_team, nthreads));
+        // Make the initial allocate for threads_data array, and zero entries
+        // Cannot use __kmp_thread_calloc() because threads not around for
+        // kmp_reap_task_team( ).
+        ANNOTATE_IGNORE_WRITES_BEGIN();
+        *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
+            nthreads * sizeof(kmp_thread_data_t));
+        ANNOTATE_IGNORE_WRITES_END();
 #ifdef BUILD_TIED_TASK_STACK
-                // GEH: Figure out if this is the right thing to do
-                for (i = 0; i < nthreads; i++) {
-                    kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
-                    __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
-                }
-#endif // BUILD_TIED_TASK_STACK
-            }
-            task_team -> tt.tt_max_threads = nthreads;
-        }
-        else {
-            // If array has (more than) enough elements, go ahead and use it
-            KMP_DEBUG_ASSERT( *threads_data_p != NULL );
-        }
-
-        // initialize threads_data pointers back to thread_info structures
+        // GEH: Figure out if this is the right thing to do
         for (i = 0; i < nthreads; i++) {
-            kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
-            thread_data -> td.td_thr = team -> t.t_threads[i];
-
-            if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
-                // The last stolen field survives across teams / barrier, and the number
-                // of threads may have changed.  It's possible (likely?) that a new
-                // parallel region will exhibit the same behavior as the previous region.
-                thread_data -> td.td_deque_last_stolen = -1;
-            }
+          kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
+          __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
         }
+#endif // BUILD_TIED_TASK_STACK
+      }
+      task_team->tt.tt_max_threads = nthreads;
+    } else {
+      // If array has (more than) enough elements, go ahead and use it
+      KMP_DEBUG_ASSERT(*threads_data_p != NULL);
+    }
 
-        KMP_MB();
-        TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
+    // initialize threads_data pointers back to thread_info structures
+    for (i = 0; i < nthreads; i++) {
+      kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
+      thread_data->td.td_thr = team->t.t_threads[i];
+
+      if (thread_data->td.td_deque_last_stolen >= nthreads) {
+        // The last stolen field survives across teams / barrier, and the number
+        // of threads may have changed.  It's possible (likely?) that a new
+        // parallel region will exhibit the same behavior as previous region.
+        thread_data->td.td_deque_last_stolen = -1;
+      }
     }
 
-    __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
-    return is_init_thread;
-}
+    KMP_MB();
+    TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
+  }
 
+  __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
+  return is_init_thread;
+}
 
-//------------------------------------------------------------------------------
 // __kmp_free_task_threads_data:
 // Deallocates a threads_data array for a task team, including any attached
 // tasking deques.  Only occurs at library shutdown.
-
-static void
-__kmp_free_task_threads_data( kmp_task_team_t *task_team )
-{
-    __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
-    if ( task_team -> tt.tt_threads_data != NULL ) {
-        int i;
-        for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
-            __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
-        }
-        __kmp_free( task_team -> tt.tt_threads_data );
-        task_team -> tt.tt_threads_data = NULL;
-    }
-    __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
+static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
+  __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
+  if (task_team->tt.tt_threads_data != NULL) {
+    int i;
+    for (i = 0; i < task_team->tt.tt_max_threads; i++) {
+      __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
+    }
+    __kmp_free(task_team->tt.tt_threads_data);
+    task_team->tt.tt_threads_data = NULL;
+  }
+  __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
 }
 
-
-//------------------------------------------------------------------------------
 // __kmp_allocate_task_team:
 // Allocates a task team associated with a specific team, taking it from
-// the global task team free list if possible.  Also initializes data structures.
-
-static kmp_task_team_t *
-__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
-{
-    kmp_task_team_t *task_team = NULL;
-    int nthreads;
-
-    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
-                    (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
-
-    if (TCR_PTR(__kmp_free_task_teams) != NULL) {
-        // Take a task team from the task team pool
-        __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
-        if (__kmp_free_task_teams != NULL) {
-            task_team = __kmp_free_task_teams;
-            TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
-            task_team -> tt.tt_next = NULL;
-        }
-        __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
-    }
-
-    if (task_team == NULL) {
-        KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
-                       "task team for team %p\n",
-                       __kmp_gtid_from_thread( thread ), team ) );
-        // Allocate a new task team if one is not available.
-        // Cannot use __kmp_thread_malloc() because threads not around for
-        // kmp_reap_task_team( ).
-        task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
-        __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
-        //task_team -> tt.tt_threads_data = NULL;   // AC: __kmp_allocate zeroes returned memory
-        //task_team -> tt.tt_max_threads = 0;
-        //task_team -> tt.tt_next = NULL;
-    }
-
-    TCW_4(task_team -> tt.tt_found_tasks, FALSE);
-#if OMP_45_ENABLED
-    TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE);
-#endif
-    task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
-
-    TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
-    TCW_4( task_team -> tt.tt_active, TRUE );
-
-    KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n",
-                    (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) );
-    return task_team;
+// the global task team free list if possible.  Also initializes data
+// structures.
+static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
+                                                 kmp_team_t *team) {
+  kmp_task_team_t *task_team = NULL;
+  int nthreads;
+
+  KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n",
+                (thread ? __kmp_gtid_from_thread(thread) : -1), team));
+
+  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
+    // Take a task team from the task team pool
+    __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
+    if (__kmp_free_task_teams != NULL) {
+      task_team = __kmp_free_task_teams;
+      TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
+      task_team->tt.tt_next = NULL;
+    }
+    __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
+  }
+
+  if (task_team == NULL) {
+    KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
+                  "task team for team %p\n",
+                  __kmp_gtid_from_thread(thread), team));
+    // Allocate a new task team if one is not available.
+    // Cannot use __kmp_thread_malloc() because threads not around for
+    // kmp_reap_task_team( ).
+    task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
+    __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
+    // AC: __kmp_allocate zeroes returned memory
+    // task_team -> tt.tt_threads_data = NULL;
+    // task_team -> tt.tt_max_threads = 0;
+    // task_team -> tt.tt_next = NULL;
+  }
+
+  TCW_4(task_team->tt.tt_found_tasks, FALSE);
+#if OMP_45_ENABLED
+  TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+#endif
+  task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
+
+  TCW_4(task_team->tt.tt_unfinished_threads, nthreads);
+  TCW_4(task_team->tt.tt_active, TRUE);
+
+  KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p "
+                "unfinished_threads init'd to %d\n",
+                (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
+                task_team->tt.tt_unfinished_threads));
+  return task_team;
 }
 
-
-//------------------------------------------------------------------------------
 // __kmp_free_task_team:
 // Frees the task team associated with a specific thread, and adds it
 // to the global task team free list.
+void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
+  KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n",
+                thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
+
+  // Put task team back on free list
+  __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
+
+  KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
+  task_team->tt.tt_next = __kmp_free_task_teams;
+  TCW_PTR(__kmp_free_task_teams, task_team);
 
-void
-__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
-{
-    KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
-                    thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
-
-    // Put task team back on free list
-    __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
-
-    KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
-    task_team -> tt.tt_next = __kmp_free_task_teams;
-    TCW_PTR(__kmp_free_task_teams, task_team);
-
-    __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
+  __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
 }
 
-
-//------------------------------------------------------------------------------
 // __kmp_reap_task_teams:
 // Free all the task teams on the task team free list.
 // Should only be done during library shutdown.
-// Cannot do anything that needs a thread structure or gtid since they are already gone.
-
-void
-__kmp_reap_task_teams( void )
-{
-    kmp_task_team_t   *task_team;
-
-    if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
-        // Free all task_teams on the free list
-        __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
-        while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
-            __kmp_free_task_teams = task_team -> tt.tt_next;
-            task_team -> tt.tt_next = NULL;
-
-            // Free threads_data if necessary
-            if ( task_team -> tt.tt_threads_data != NULL ) {
-                __kmp_free_task_threads_data( task_team );
-            }
-            __kmp_free( task_team );
-        }
-        __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
+// Cannot do anything that needs a thread structure or gtid since they are
+// already gone.
+void __kmp_reap_task_teams(void) {
+  kmp_task_team_t *task_team;
+
+  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
+    // Free all task_teams on the free list
+    __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
+    while ((task_team = __kmp_free_task_teams) != NULL) {
+      __kmp_free_task_teams = task_team->tt.tt_next;
+      task_team->tt.tt_next = NULL;
+
+      // Free threads_data if necessary
+      if (task_team->tt.tt_threads_data != NULL) {
+        __kmp_free_task_threads_data(task_team);
+      }
+      __kmp_free(task_team);
     }
+    __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
+  }
 }
 
-//------------------------------------------------------------------------------
 // __kmp_wait_to_unref_task_teams:
 // Some threads could still be in the fork barrier release code, possibly
 // trying to steal tasks.  Wait for each thread to unreference its task team.
-//
-void
-__kmp_wait_to_unref_task_teams(void)
-{
-    kmp_info_t *thread;
-    kmp_uint32 spins;
-    int done;
-
-    KMP_INIT_YIELD( spins );
-
-    for (;;) {
-        done = TRUE;
-
-        // TODO: GEH - this may be is wrong because some sync would be necessary
-        //             in case threads are added to the pool during the traversal.
-        //             Need to verify that lock for thread pool is held when calling
-        //             this routine.
-        for (thread = (kmp_info_t *)__kmp_thread_pool;
-             thread != NULL;
-             thread = thread->th.th_next_pool)
-        {
+void __kmp_wait_to_unref_task_teams(void) {
+  kmp_info_t *thread;
+  kmp_uint32 spins;
+  int done;
+
+  KMP_INIT_YIELD(spins);
+
+  for (;;) {
+    done = TRUE;
+
+    // TODO: GEH - this may be is wrong because some sync would be necessary
+    // in case threads are added to the pool during the traversal. Need to
+    // verify that lock for thread pool is held when calling this routine.
+    for (thread = (kmp_info_t *)__kmp_thread_pool; thread != NULL;
+         thread = thread->th.th_next_pool) {
 #if KMP_OS_WINDOWS
-            DWORD exit_val;
+      DWORD exit_val;
 #endif
-            if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
-                KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
-                               __kmp_gtid_from_thread( thread ) ) );
-                continue;
-            }
+      if (TCR_PTR(thread->th.th_task_team) == NULL) {
+        KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
+                      __kmp_gtid_from_thread(thread)));
+        continue;
+      }
 #if KMP_OS_WINDOWS
-            // TODO: GEH - add this check for Linux* OS / OS X* as well?
-            if (!__kmp_is_thread_alive(thread, &exit_val)) {
-                thread->th.th_task_team = NULL;
-                continue;
-            }
+      // TODO: GEH - add this check for Linux* OS / OS X* as well?
+      if (!__kmp_is_thread_alive(thread, &exit_val)) {
+        thread->th.th_task_team = NULL;
+        continue;
+      }
 #endif
 
-            done = FALSE;  // Because th_task_team pointer is not NULL for this thread
-
-            KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
-                           __kmp_gtid_from_thread( thread ) ) );
+      done = FALSE; // Because th_task_team pointer is not NULL for this thread
 
-            if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
-                volatile void *sleep_loc;
-                // If the thread is sleeping, awaken it.
-                if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
-                    KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
-                                    __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
-                    __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
-                }
-            }
+      KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to "
+                    "unreference task_team\n",
+                    __kmp_gtid_from_thread(thread)));
+
+      if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+        volatile void *sleep_loc;
+        // If the thread is sleeping, awaken it.
+        if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) {
+          KA_TRACE(
+              10,
+              ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
+               __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
+          __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc);
         }
-        if (done) {
-            break;
-        }
-
-        // If we are oversubscribed,
-        // or have waited a bit (and library mode is throughput), yield.
-        // Pause is in the following code.
-        KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
-        KMP_YIELD_SPIN( spins );        // Yields only if KMP_LIBRARY=throughput
+      }
+    }
+    if (done) {
+      break;
     }
-}
 
+    // If we are oversubscribed, or have waited a bit (and library mode is
+    // throughput), yield. Pause is in the following code.
+    KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
+    KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput
+  }
+}
 
-//------------------------------------------------------------------------------
 // __kmp_task_team_setup:  Create a task_team for the current team, but use
 // an already created, unused one if it already exists.
-void
-__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always )
-{
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
+void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) {
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
 
-    // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next.
-    // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use.
-    if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) {
-        team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team );
-        KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n",
-                      __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state],
-                      ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
-    }
-
-    // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is
-    // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the
-    // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely
-    // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for
-    // serialized teams.
-    if (team->t.t_nproc > 1) {
-        int other_team = 1 - this_thr->th.th_task_state;
-        if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
-                team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team );
-                KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n",
-                                __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
-                              ((team != NULL) ? team->t.t_id : -1), other_team ));
-        }
-        else { // Leave the old task team struct in place for the upcoming region; adjust as needed
-            kmp_task_team_t *task_team = team->t.t_task_team[other_team];
-            if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) {
-                TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
-                TCW_4(task_team->tt.tt_found_tasks, FALSE);
+  // If this task_team hasn't been created yet, allocate it. It will be used in
+  // the region after the next.
+  // If it exists, it is the current task team and shouldn't be touched yet as
+  // it may still be in use.
+  if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
+      (always || team->t.t_nproc > 1)) {
+    team->t.t_task_team[this_thr->th.th_task_state] =
+        __kmp_allocate_task_team(this_thr, team);
+    KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p "
+                  "for team %d at parity=%d\n",
+                  __kmp_gtid_from_thread(this_thr),
+                  team->t.t_task_team[this_thr->th.th_task_state],
+                  ((team != NULL) ? team->t.t_id : -1),
+                  this_thr->th.th_task_state));
+  }
+
+  // After threads exit the release, they will call sync, and then point to this
+  // other task_team; make sure it is allocated and properly initialized. As
+  // threads spin in the barrier release phase, they will continue to use the
+  // previous task_team struct(above), until they receive the signal to stop
+  // checking for tasks (they can't safely reference the kmp_team_t struct,
+  // which could be reallocated by the master thread). No task teams are formed
+  // for serialized teams.
+  if (team->t.t_nproc > 1) {
+    int other_team = 1 - this_thr->th.th_task_state;
+    if (team->t.t_task_team[other_team] == NULL) { // setup other team as well
+      team->t.t_task_team[other_team] =
+          __kmp_allocate_task_team(this_thr, team);
+      KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new "
+                    "task_team %p for team %d at parity=%d\n",
+                    __kmp_gtid_from_thread(this_thr),
+                    team->t.t_task_team[other_team],
+                    ((team != NULL) ? team->t.t_id : -1), other_team));
+    } else { // Leave the old task team struct in place for the upcoming region;
+      // adjust as needed
+      kmp_task_team_t *task_team = team->t.t_task_team[other_team];
+      if (!task_team->tt.tt_active ||
+          team->t.t_nproc != task_team->tt.tt_nproc) {
+        TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
+        TCW_4(task_team->tt.tt_found_tasks, FALSE);
 #if OMP_45_ENABLED
-                TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
+        TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
 #endif
-                TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc );
-                TCW_4(task_team->tt.tt_active, TRUE );
-            }
-            // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary
-            KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n",
-                          __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team],
-                          ((team != NULL) ? team->t.t_id : -1), other_team ));
-        }
+        TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc);
+        TCW_4(task_team->tt.tt_active, TRUE);
+      }
+      // if team size has changed, the first thread to enable tasking will
+      // realloc threads_data if necessary
+      KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team "
+                    "%p for team %d at parity=%d\n",
+                    __kmp_gtid_from_thread(this_thr),
+                    team->t.t_task_team[other_team],
+                    ((team != NULL) ? team->t.t_id : -1), other_team));
     }
+  }
 }
 
-
-//------------------------------------------------------------------------------
 // __kmp_task_team_sync: Propagation of task team data from team to threads
 // which happens just after the release phase of a team barrier.  This may be
 // called by any thread, but only for teams with # threads > 1.
+void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
 
-void
-__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
-{
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-
-    // Toggle the th_task_state field, to switch which task_team this thread refers to
-    this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
-    // It is now safe to propagate the task team pointer from the team struct to the current thread.
-    TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]);
-    KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n",
-                  __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team,
-                  ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
-}
-
-
-//--------------------------------------------------------------------------------------------
-// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather
-// phase.  Only called by master thread if #threads in team > 1 or if proxy tasks were created.
-// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0
-// optionally as the last argument. When wait is zero, master thread does not wait for
-// unfinished_threads to reach 0.
-void
-__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj)
-                      , int wait)
-{
-    kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
-
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
-    KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
-
-    if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) {
-        if (wait) {
-            KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n",
-                          __kmp_gtid_from_thread(this_thr), task_team));
-            // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait
-            // here for tasks to complete. To avoid memory contention, only master thread checks termination condition.
-            kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
-            flag.wait(this_thr, TRUE
-                      USE_ITT_BUILD_ARG(itt_sync_obj));
-        }
-        // Deactivate the old task team, so that the worker threads will stop referencing it while spinning.
-        KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
-                      "setting active to false, setting local and team's pointer to NULL\n",
-                      __kmp_gtid_from_thread(this_thr), task_team));
-#if OMP_45_ENABLED
-        KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE );
-        TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE );
+  // Toggle the th_task_state field, to switch which task_team this thread
+  // refers to
+  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
+  // It is now safe to propagate the task team pointer from the team struct to
+  // the current thread.
+  TCW_PTR(this_thr->th.th_task_team,
+          team->t.t_task_team[this_thr->th.th_task_state]);
+  KA_TRACE(20,
+           ("__kmp_task_team_sync: Thread T#%d task team switched to task_team "
+            "%p from Team #%d (parity=%d)\n",
+            __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
+            ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state));
+}
+
+// __kmp_task_team_wait: Master thread waits for outstanding tasks after the
+// barrier gather phase. Only called by master thread if #threads in team > 1 or
+// if proxy tasks were created.
+//
+// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off
+// by passing in 0 optionally as the last argument. When wait is zero, master
+// thread does not wait for unfinished_threads to reach 0.
+void __kmp_task_team_wait(
+    kmp_info_t *this_thr,
+    kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) {
+  kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
+
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
+  KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
+
+  if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
+    if (wait) {
+      KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks "
+                    "(for unfinished_threads to reach 0) on task_team = %p\n",
+                    __kmp_gtid_from_thread(this_thr), task_team));
+      // Worker threads may have dropped through to release phase, but could
+      // still be executing tasks. Wait here for tasks to complete. To avoid
+      // memory contention, only master thread checks termination condition.
+      kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U);
+      flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
+    }
+    // Deactivate the old task team, so that the worker threads will stop
+    // referencing it while spinning.
+    KA_TRACE(
+        20,
+        ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: "
+         "setting active to false, setting local and team's pointer to NULL\n",
+         __kmp_gtid_from_thread(this_thr), task_team));
+#if OMP_45_ENABLED
+    KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
+                     task_team->tt.tt_found_proxy_tasks == TRUE);
+    TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
 #else
-        KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
+    KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1);
 #endif
-        TCW_SYNC_4( task_team->tt.tt_active, FALSE );
-        KMP_MB();
+    TCW_SYNC_4(task_team->tt.tt_active, FALSE);
+    KMP_MB();
 
-        TCW_PTR(this_thr->th.th_task_team, NULL);
-    }
+    TCW_PTR(this_thr->th.th_task_team, NULL);
+  }
 }
 
-
-//------------------------------------------------------------------------------
 // __kmp_tasking_barrier:
 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
-// Internal function to execute all tasks prior to a regular barrier or a
-// join barrier.  It is a full barrier itself, which unfortunately turns
-// regular barriers into double barriers and join barriers into 1 1/2
-// barriers.
-void
-__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
-{
-    volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
-    int flag = FALSE;
-    KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
+// Internal function to execute all tasks prior to a regular barrier or a join
+// barrier. It is a full barrier itself, which unfortunately turns regular
+// barriers into double barriers and join barriers into 1 1/2 barriers.
+void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) {
+  volatile kmp_uint32 *spin =
+      &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads;
+  int flag = FALSE;
+  KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
 
 #if USE_ITT_BUILD
-    KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
+  KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL);
 #endif /* USE_ITT_BUILD */
-    kmp_flag_32 spin_flag(spin, 0U);
-    while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag
-                                     USE_ITT_BUILD_ARG(NULL), 0 ) ) {
+  kmp_flag_32 spin_flag(spin, 0U);
+  while (!spin_flag.execute_tasks(thread, gtid, TRUE,
+                                  &flag USE_ITT_BUILD_ARG(NULL), 0)) {
 #if USE_ITT_BUILD
-        // TODO: What about itt_sync_obj??
-        KMP_FSYNC_SPIN_PREPARE( spin );
+    // TODO: What about itt_sync_obj??
+    KMP_FSYNC_SPIN_PREPARE(spin);
 #endif /* USE_ITT_BUILD */
 
-        if( TCR_4(__kmp_global.g.g_done) ) {
-            if( __kmp_global.g.g_abort )
-                __kmp_abort_thread( );
-            break;
-        }
-        KMP_YIELD( TRUE );       // GH: We always yield here
+    if (TCR_4(__kmp_global.g.g_done)) {
+      if (__kmp_global.g.g_abort)
+        __kmp_abort_thread();
+      break;
     }
+    KMP_YIELD(TRUE); // GH: We always yield here
+  }
 #if USE_ITT_BUILD
-    KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
+  KMP_FSYNC_SPIN_ACQUIRED((void *)spin);
 #endif /* USE_ITT_BUILD */
 }
 
-
 #if OMP_45_ENABLED
 
-/* __kmp_give_task puts a task into a given thread queue if:
-    - the queue for that thread was created
-    - there's space in that queue
-
-    Because of this, __kmp_push_task needs to check if there's space after getting the lock
- */
-static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass )
-{
-    kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task);
-    kmp_task_team_t *	task_team = taskdata->td_task_team;
-
-    KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) );
-
-    // If task_team is NULL something went really bad...
-    KMP_DEBUG_ASSERT( task_team != NULL );
-
-    bool result = false;
-    kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ];
-
-    if (thread_data -> td.td_deque == NULL ) {
-        // There's no queue in this thread, go find another one
-        // We're guaranteed that at least one thread has a queue
-        KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) );
-        return result;
-    }
-
-    if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
-    {
-        KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
-
-        // if this deque is bigger than the pass ratio give a chance to another thread
-        if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result;
-
-        __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
-        __kmp_realloc_task_deque(thread,thread_data);
-
-    } else {
-
-       __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock );
-
-       if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) )
-       {
-           KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) );
-
-           // if this deque is bigger than the pass ratio give a chance to another thread
-           if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass )
-              goto release_and_exit;
-
-           __kmp_realloc_task_deque(thread,thread_data);
-       }
-    }
-
-    // lock is held here, and there is space in the deque
-
-    thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
-    // Wrap index.
-    thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td);
-    TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
-
-    result = true;
-    KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) );
+// __kmp_give_task puts a task into a given thread queue if:
+//  - the queue for that thread was created
+//  - there's space in that queue
+// Because of this, __kmp_push_task needs to check if there's space after
+// getting the lock
+static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
+                            kmp_int32 pass) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  kmp_task_team_t *task_team = taskdata->td_task_team;
+
+  KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n",
+                taskdata, tid));
+
+  // If task_team is NULL something went really bad...
+  KMP_DEBUG_ASSERT(task_team != NULL);
+
+  bool result = false;
+  kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
+
+  if (thread_data->td.td_deque == NULL) {
+    // There's no queue in this thread, go find another one
+    // We're guaranteed that at least one thread has a queue
+    KA_TRACE(30,
+             ("__kmp_give_task: thread %d has no queue while giving task %p.\n",
+              tid, taskdata));
+    return result;
+  }
+
+  if (TCR_4(thread_data->td.td_deque_ntasks) >=
+      TASK_DEQUE_SIZE(thread_data->td)) {
+    KA_TRACE(
+        30,
+        ("__kmp_give_task: queue is full while giving task %p to thread %d.\n",
+         taskdata, tid));
+
+    // if this deque is bigger than the pass ratio give a chance to another
+    // thread
+    if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
+      return result;
+
+    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+    __kmp_realloc_task_deque(thread, thread_data);
+
+  } else {
+
+    __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
+
+    if (TCR_4(thread_data->td.td_deque_ntasks) >=
+        TASK_DEQUE_SIZE(thread_data->td)) {
+      KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to "
+                    "thread %d.\n",
+                    taskdata, tid));
+
+      // if this deque is bigger than the pass ratio give a chance to another
+      // thread
+      if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
+        goto release_and_exit;
+
+      __kmp_realloc_task_deque(thread, thread_data);
+    }
+  }
+
+  // lock is held here, and there is space in the deque
+
+  thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
+  // Wrap index.
+  thread_data->td.td_deque_tail =
+      (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
+  TCW_4(thread_data->td.td_deque_ntasks,
+        TCR_4(thread_data->td.td_deque_ntasks) + 1);
+
+  result = true;
+  KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n",
+                taskdata, tid));
 
 release_and_exit:
-    __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock );
+  __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
 
-     return result;
+  return result;
 }
 
-
-/* The finish of the a proxy tasks is divided in two pieces:
+/* The finish of the proxy tasks is divided in two pieces:
     - the top half is the one that can be done from a thread outside the team
     - the bottom half must be run from a them within the team
 
-    In order to run the bottom half the task gets queued back into one of the threads of the team.
-    Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers.
-    So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts:
+   In order to run the bottom half the task gets queued back into one of the
+   threads of the team. Once the td_incomplete_child_task counter of the parent
+   is decremented the threads can leave the barriers. So, the bottom half needs
+   to be queued before the counter is decremented. The top half is therefore
+   divided in two parts:
     - things that can be run before queuing the bottom half
     - things that must be run after queuing the bottom half
 
-    This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this
-    we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half.
-*/
-
-static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata )
-{
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
-
-    taskdata -> td_flags.complete = 1;   // mark the task as completed
-
-    if ( taskdata->td_taskgroup )
-       KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
+   This creates a second race as the bottom half can free the task before the
+   second top half is executed. To avoid this we use the
+   td_incomplete_child_task of the proxy task to synchronize the top and bottom
+   half. */
+static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+  KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
+
+  taskdata->td_flags.complete = 1; // mark the task as completed
+
+  if (taskdata->td_taskgroup)
+    KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count));
+
+  // Create an imaginary children for this task so the bottom half cannot
+  // release the task before we have completed the second top half
+  TCI_4(taskdata->td_incomplete_child_tasks);
+}
+
+static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
+  kmp_int32 children = 0;
+
+  // Predecrement simulated by "- 1" calculation
+  children =
+      KMP_TEST_THEN_DEC32(
+          (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) -
+      1;
+  KMP_DEBUG_ASSERT(children >= 0);
+
+  // Remove the imaginary children
+  TCD_4(taskdata->td_incomplete_child_tasks);
+}
+
+static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
+  kmp_info_t *thread = __kmp_threads[gtid];
+
+  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
+  KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
+                   1); // top half must run before bottom half
+
+  // We need to wait to make sure the top half is finished
+  // Spinning here should be ok as this should happen quickly
+  while (TCR_4(taskdata->td_incomplete_child_tasks) > 0)
+    ;
 
-    // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half
-    TCI_4(taskdata->td_incomplete_child_tasks);
-}
-
-static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata )
-{
-    kmp_int32 children = 0;
-
-    // Predecrement simulated by "- 1" calculation
-    children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
-    KMP_DEBUG_ASSERT( children >= 0 );
-
-    // Remove the imaginary children
-    TCD_4(taskdata->td_incomplete_child_tasks);
-}
-
-static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask )
-{
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
-    kmp_info_t * thread = __kmp_threads[ gtid ];
-
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY );
-    KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half
-
-    // We need to wait to make sure the top half is finished
-    // Spinning here should be ok as this should happen quickly
-    while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ;
-
-    __kmp_release_deps(gtid,taskdata);
-    __kmp_free_task_and_ancestors(gtid, taskdata, thread);
+  __kmp_release_deps(gtid, taskdata);
+  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
 }
 
 /*!
@@ -3075,132 +3134,153 @@ static void __kmp_bottom_half_finish_pro
 @param gtid Global Thread ID of encountering thread
 @param ptask Task which execution is completed
 
-Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly.
+Execute the completation of a proxy task from a thread of that is part of the
+team. Run first and bottom halves directly.
 */
-void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask )
-{
-    KMP_DEBUG_ASSERT( ptask != NULL );
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
-    KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) );
-
-    KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
-
-    __kmp_first_top_half_finish_proxy(taskdata);
-    __kmp_second_top_half_finish_proxy(taskdata);
-    __kmp_bottom_half_finish_proxy(gtid,ptask);
-
-    KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) );
+void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) {
+  KMP_DEBUG_ASSERT(ptask != NULL);
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
+  KA_TRACE(
+      10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
+           gtid, taskdata));
+
+  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
+
+  __kmp_first_top_half_finish_proxy(taskdata);
+  __kmp_second_top_half_finish_proxy(taskdata);
+  __kmp_bottom_half_finish_proxy(gtid, ptask);
+
+  KA_TRACE(10,
+           ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
+            gtid, taskdata));
 }
 
 /*!
 @ingroup TASKING
 @param ptask Task which execution is completed
 
-Execute the completation of a proxy task from a thread that could not belong to the team.
+Execute the completation of a proxy task from a thread that could not belong to
+the team.
 */
-void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask )
-{
-    KMP_DEBUG_ASSERT( ptask != NULL );
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask);
+void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) {
+  KMP_DEBUG_ASSERT(ptask != NULL);
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
 
-    KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) );
+  KA_TRACE(
+      10,
+      ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
+       taskdata));
 
-    KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY );
+  KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
 
-    __kmp_first_top_half_finish_proxy(taskdata);
+  __kmp_first_top_half_finish_proxy(taskdata);
 
-    // Enqueue task to complete bottom half completion from a thread within the corresponding team
-    kmp_team_t * team = taskdata->td_team;
-    kmp_int32 nthreads = team->t.t_nproc;
-    kmp_info_t *thread;
+  // Enqueue task to complete bottom half completion from a thread within the
+  // corresponding team
+  kmp_team_t *team = taskdata->td_team;
+  kmp_int32 nthreads = team->t.t_nproc;
+  kmp_info_t *thread;
 
-    //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here
-    kmp_int32 start_k = 0;
-    kmp_int32 pass = 1;
-    kmp_int32 k = start_k;
+  // This should be similar to start_k = __kmp_get_random( thread ) % nthreads
+  // but we cannot use __kmp_get_random here
+  kmp_int32 start_k = 0;
+  kmp_int32 pass = 1;
+  kmp_int32 k = start_k;
 
-    do {
-        //For now we're just linearly trying to find a thread
-        thread = team->t.t_threads[k];
-        k = (k+1) % nthreads;
+  do {
+    // For now we're just linearly trying to find a thread
+    thread = team->t.t_threads[k];
+    k = (k + 1) % nthreads;
 
-        // we did a full pass through all the threads
-        if ( k == start_k ) pass = pass << 1;
+    // we did a full pass through all the threads
+    if (k == start_k)
+      pass = pass << 1;
 
-    } while ( !__kmp_give_task( thread, k,  ptask, pass ) );
+  } while (!__kmp_give_task(thread, k, ptask, pass));
 
-    __kmp_second_top_half_finish_proxy(taskdata);
+  __kmp_second_top_half_finish_proxy(taskdata);
 
-    KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) );
+  KA_TRACE(
+      10,
+      ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
+       taskdata));
 }
 
-//---------------------------------------------------------------------------------
-// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop
+// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task
+// for taskloop
 //
 // thread:   allocating thread
 // task_src: pointer to source task to be duplicated
 // returns:  a pointer to the allocated kmp_task_t structure (task).
-kmp_task_t *
-__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src )
-{
-    kmp_task_t     *task;
-    kmp_taskdata_t *taskdata;
-    kmp_taskdata_t *taskdata_src;
-    kmp_taskdata_t *parent_task = thread->th.th_current_task;
-    size_t shareds_offset;
-    size_t task_size;
-
-    KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) );
-    taskdata_src = KMP_TASK_TO_TASKDATA( task_src );
-    KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task
-    KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT );
-    task_size = taskdata_src->td_size_alloc;
-
-    // Allocate a kmp_taskdata_t block and a kmp_task_t block.
-    KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) );
-    #if USE_FAST_MEMORY
-    taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size );
-    #else
-    taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size );
-    #endif /* USE_FAST_MEMORY */
-    KMP_MEMCPY(taskdata, taskdata_src, task_size);
-
-    task = KMP_TASKDATA_TO_TASK(taskdata);
-
-    // Initialize new task (only specific fields not affected by memcpy)
-    taskdata->td_task_id = KMP_GEN_TASK_ID();
-    if( task->shareds != NULL ) { // need setup shareds pointer
-        shareds_offset = (char*)task_src->shareds - (char*)taskdata_src;
-        task->shareds = &((char*)taskdata)[shareds_offset];
-        KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 );
-    }
-    taskdata->td_alloc_thread = thread;
-    taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
-
-    // Only need to keep track of child task counts if team parallel and tasking not serialized
-    if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) {
-        KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
-        if ( parent_task->td_taskgroup )
-            KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
-        // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
-        if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT )
-            KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
-    }
-
-    KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
-                  thread, taskdata, taskdata->td_parent) );
+kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
+  kmp_task_t *task;
+  kmp_taskdata_t *taskdata;
+  kmp_taskdata_t *taskdata_src;
+  kmp_taskdata_t *parent_task = thread->th.th_current_task;
+  size_t shareds_offset;
+  size_t task_size;
+
+  KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
+                task_src));
+  taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
+  KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
+                   TASK_FULL); // it should not be proxy task
+  KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
+  task_size = taskdata_src->td_size_alloc;
+
+  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
+  KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
+                task_size));
+#if USE_FAST_MEMORY
+  taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
+#else
+  taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
+#endif /* USE_FAST_MEMORY */
+  KMP_MEMCPY(taskdata, taskdata_src, task_size);
+
+  task = KMP_TASKDATA_TO_TASK(taskdata);
+
+  // Initialize new task (only specific fields not affected by memcpy)
+  taskdata->td_task_id = KMP_GEN_TASK_ID();
+  if (task->shareds != NULL) { // need setup shareds pointer
+    shareds_offset = (char *)task_src->shareds - (char *)taskdata_src;
+    task->shareds = &((char *)taskdata)[shareds_offset];
+    KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) ==
+                     0);
+  }
+  taskdata->td_alloc_thread = thread;
+  taskdata->td_taskgroup =
+      parent_task
+          ->td_taskgroup; // task inherits the taskgroup from the parent task
+
+  // Only need to keep track of child task counts if team parallel and tasking
+  // not serialized
+  if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
+    KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks));
+    if (parent_task->td_taskgroup)
+      KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count));
+    // Only need to keep track of allocated child tasks for explicit tasks since
+    // implicit not deallocated
+    if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
+      KMP_TEST_THEN_INC32(
+          (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks));
+  }
+
+  KA_TRACE(20,
+           ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
+            thread, taskdata, taskdata->td_parent));
 #if OMPT_SUPPORT
-    __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine);
+  __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid,
+                       (void *)task->routine);
 #endif
-    return task;
+  return task;
 }
 
 // Routine optionally generated by th ecompiler for setting the lastprivate flag
 // and calling needed constructors for private/firstprivate objects
 // (used to form taskloop tasks from pattern task)
-typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
+typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
 
-//---------------------------------------------------------------------------------
 // __kmp_taskloop_linear: Start tasks of the taskloop linearly
 //
 // loc       Source location information
@@ -3212,114 +3292,120 @@ typedef void(*p_task_dup_t)(kmp_task_t *
 // sched     Schedule specified 0/1/2 for none/grainsize/num_tasks
 // grainsize Schedule value if specified
 // task_dup  Tasks duplication routine
-void
-__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
-                kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
-                int sched, kmp_uint64 grainsize, void *task_dup )
-{
-    KMP_COUNT_BLOCK(OMP_TASKLOOP);
-    KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
-    p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
-    kmp_uint64 tc;
-    kmp_uint64 lower = *lb; // compiler provides global bounds here
-    kmp_uint64 upper = *ub;
-    kmp_uint64 i, num_tasks = 0, extras = 0;
-    kmp_info_t *thread = __kmp_threads[gtid];
-    kmp_taskdata_t *current_task = thread->th.th_current_task;
-    kmp_task_t *next_task;
-    kmp_int32 lastpriv = 0;
-    size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure
-    size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure
-
-    // compute trip count
-    if ( st == 1 ) {   // most common case
-        tc = upper - lower + 1;
-    } else if ( st < 0 ) {
-        tc = (lower - upper) / (-st) + 1;
-    } else {       // st > 0
-        tc = (upper - lower) / st + 1;
-    }
-    if(tc == 0) {
-        KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
-        // free the pattern task and exit
-        __kmp_task_start( gtid, task, current_task );
-        // do not execute anything for zero-trip loop
-        __kmp_task_finish( gtid, task, current_task );
-        return;
-    }
-
-    // compute num_tasks/grainsize based on the input provided
-    switch( sched ) {
-    case 0: // no schedule clause specified, we can choose the default
-            // let's try to schedule (team_size*10) tasks
-        grainsize = thread->th.th_team_nproc * 10;
-    case 2: // num_tasks provided
-        if( grainsize > tc ) {
-            num_tasks = tc;   // too big num_tasks requested, adjust values
-            grainsize = 1;
-            extras = 0;
-        } else {
-            num_tasks = grainsize;
-            grainsize = tc / num_tasks;
-            extras = tc % num_tasks;
-        }
-        break;
-    case 1: // grainsize provided
-        if( grainsize > tc ) {
-            num_tasks = 1;    // too big grainsize requested, adjust values
-            grainsize = tc;
-            extras = 0;
-        } else {
-            num_tasks = tc / grainsize;
-            grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations
-            extras = tc % num_tasks;
-        }
-        break;
-    default:
-        KMP_ASSERT2(0, "unknown scheduling of taskloop");
+void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
+                           kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
+                           int sched, kmp_uint64 grainsize, void *task_dup) {
+  KMP_COUNT_BLOCK(OMP_TASKLOOP);
+  KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
+  p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
+  kmp_uint64 tc;
+  kmp_uint64 lower = *lb; // compiler provides global bounds here
+  kmp_uint64 upper = *ub;
+  kmp_uint64 i, num_tasks = 0, extras = 0;
+  kmp_info_t *thread = __kmp_threads[gtid];
+  kmp_taskdata_t *current_task = thread->th.th_current_task;
+  kmp_task_t *next_task;
+  kmp_int32 lastpriv = 0;
+  size_t lower_offset =
+      (char *)lb - (char *)task; // remember offset of lb in the task structure
+  size_t upper_offset =
+      (char *)ub - (char *)task; // remember offset of ub in the task structure
+
+  // compute trip count
+  if (st == 1) { // most common case
+    tc = upper - lower + 1;
+  } else if (st < 0) {
+    tc = (lower - upper) / (-st) + 1;
+  } else { // st > 0
+    tc = (upper - lower) / st + 1;
+  }
+  if (tc == 0) {
+    KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid));
+    // free the pattern task and exit
+    __kmp_task_start(gtid, task, current_task);
+    // do not execute anything for zero-trip loop
+    __kmp_task_finish(gtid, task, current_task);
+    return;
+  }
+
+  // compute num_tasks/grainsize based on the input provided
+  switch (sched) {
+  case 0: // no schedule clause specified, we can choose the default
+    // let's try to schedule (team_size*10) tasks
+    grainsize = thread->th.th_team_nproc * 10;
+  case 2: // num_tasks provided
+    if (grainsize > tc) {
+      num_tasks = tc; // too big num_tasks requested, adjust values
+      grainsize = 1;
+      extras = 0;
+    } else {
+      num_tasks = grainsize;
+      grainsize = tc / num_tasks;
+      extras = tc % num_tasks;
+    }
+    break;
+  case 1: // grainsize provided
+    if (grainsize > tc) {
+      num_tasks = 1; // too big grainsize requested, adjust values
+      grainsize = tc;
+      extras = 0;
+    } else {
+      num_tasks = tc / grainsize;
+      grainsize =
+          tc /
+          num_tasks; // adjust grainsize for balanced distribution of iterations
+      extras = tc % num_tasks;
+    }
+    break;
+  default:
+    KMP_ASSERT2(0, "unknown scheduling of taskloop");
+  }
+  KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
+  KMP_DEBUG_ASSERT(num_tasks > extras);
+  KMP_DEBUG_ASSERT(num_tasks > 0);
+  KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize "
+                "%lld, extras %lld\n",
+                gtid, num_tasks, grainsize, extras));
+
+  // Main loop, launch num_tasks tasks, assign grainsize iterations each task
+  for (i = 0; i < num_tasks; ++i) {
+    kmp_uint64 chunk_minus_1;
+    if (extras == 0) {
+      chunk_minus_1 = grainsize - 1;
+    } else {
+      chunk_minus_1 = grainsize;
+      --extras; // first extras iterations get bigger chunk (grainsize+1)
     }
-    KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras);
-    KMP_DEBUG_ASSERT(num_tasks > extras);
-    KMP_DEBUG_ASSERT(num_tasks > 0);
-    KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n",
-                  gtid, num_tasks, grainsize, extras));
-
-    // Main loop, launch num_tasks tasks, assign grainsize iterations each task
-    for( i = 0; i < num_tasks; ++i ) {
-        kmp_uint64 chunk_minus_1;
-        if( extras == 0 ) {
-            chunk_minus_1 = grainsize - 1;
-        } else {
-            chunk_minus_1 = grainsize;
-            --extras; // first extras iterations get bigger chunk (grainsize+1)
-        }
-        upper = lower + st * chunk_minus_1;
-        if( i == num_tasks - 1 ) {
-            // schedule the last task, set lastprivate flag
-            lastpriv = 1;
+    upper = lower + st * chunk_minus_1;
+    if (i == num_tasks - 1) {
+      // schedule the last task, set lastprivate flag
+      lastpriv = 1;
 #if KMP_DEBUG
-            if( st == 1 )
-                KMP_DEBUG_ASSERT(upper == *ub);
-            else if( st > 0 )
-                KMP_DEBUG_ASSERT(upper+st > *ub);
-            else
-                KMP_DEBUG_ASSERT(upper+st < *ub);
-#endif
-        }
-        next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
-        *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds
-        *(kmp_uint64*)((char*)next_task + upper_offset) = upper;
-        if( ptask_dup != NULL )
-            ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc.
-        KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n",
-                      gtid, next_task, lower, upper, lower_offset, upper_offset));
-        __kmp_omp_task(gtid, next_task, true); // schedule new task
-        lower = upper + st; // adjust lower bound for the next iteration
-    }
-    // free the pattern task and exit
-    __kmp_task_start( gtid, task, current_task );
-    // do not execute the pattern task, just do bookkeeping
-    __kmp_task_finish( gtid, task, current_task );
+      if (st == 1)
+        KMP_DEBUG_ASSERT(upper == *ub);
+      else if (st > 0)
+        KMP_DEBUG_ASSERT(upper + st > *ub);
+      else
+        KMP_DEBUG_ASSERT(upper + st < *ub);
+#endif
+    }
+    next_task = __kmp_task_dup_alloc(thread, task); // allocate new task
+    *(kmp_uint64 *)((char *)next_task + lower_offset) =
+        lower; // adjust task-specific bounds
+    *(kmp_uint64 *)((char *)next_task + upper_offset) = upper;
+    if (ptask_dup != NULL)
+      ptask_dup(next_task, task,
+                lastpriv); // set lastprivate flag, construct fistprivates, etc.
+    KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper "
+                  "%lld (offsets %p %p)\n",
+                  gtid, next_task, lower, upper, lower_offset, upper_offset));
+    __kmp_omp_task(gtid, next_task, true); // schedule new task
+    lower = upper + st; // adjust lower bound for the next iteration
+  }
+  // free the pattern task and exit
+  __kmp_task_start(gtid, task, current_task);
+  // do not execute the pattern task, just do bookkeeping
+  __kmp_task_finish(gtid, task, current_task);
 }
 
 /*!
@@ -3338,34 +3424,34 @@ __kmp_taskloop_linear(ident_t *loc, int
 
 Execute the taskloop construct.
 */
-void
-__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
-                kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
-                int nogroup, int sched, kmp_uint64 grainsize, void *task_dup )
-{
-    kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
-    KMP_DEBUG_ASSERT( task != NULL );
-
-    KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n",
-        gtid, taskdata, *lb, *ub, st, grainsize, sched));
-
-    // check if clause value first
-    if( if_val == 0 ) { // if(0) specified, mark task as serial
-        taskdata->td_flags.task_serial = 1;
-        taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
-    }
-    if( nogroup == 0 ) {
-        __kmpc_taskgroup( loc, gtid );
-    }
-
-    if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) {
-        __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup );
-    }
-
-    if( nogroup == 0 ) {
-        __kmpc_end_taskgroup( loc, gtid );
-    }
-    KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
+void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val,
+                     kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup,
+                     int sched, kmp_uint64 grainsize, void *task_dup) {
+  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
+  KMP_DEBUG_ASSERT(task != NULL);
+
+  KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub "
+                "%lld st %lld, grain %llu(%d)\n",
+                gtid, taskdata, *lb, *ub, st, grainsize, sched));
+
+  // check if clause value first
+  if (if_val == 0) { // if(0) specified, mark task as serial
+    taskdata->td_flags.task_serial = 1;
+    taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied
+  }
+  if (nogroup == 0) {
+    __kmpc_taskgroup(loc, gtid);
+  }
+
+  if (1 /* AC: use some heuristic here to choose task scheduling method */) {
+    __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, sched, grainsize,
+                          task_dup);
+  }
+
+  if (nogroup == 0) {
+    __kmpc_end_taskgroup(loc, gtid);
+  }
+  KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid));
 }
 
 #endif

Modified: openmp/trunk/runtime/src/kmp_taskq.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_taskq.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_taskq.cpp (original)
+++ openmp/trunk/runtime/src/kmp_taskq.cpp Fri May 12 13:01:32 2017
@@ -14,762 +14,748 @@
 
 
 #include "kmp.h"
+#include "kmp_error.h"
 #include "kmp_i18n.h"
 #include "kmp_io.h"
-#include "kmp_error.h"
 
 #define MAX_MESSAGE 512
 
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-/*
- * Taskq routines and global variables
- */
+/* Taskq routines and global variables */
 
-#define KMP_DEBUG_REF_CTS(x)    KF_TRACE(1, x);
+#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
 
 #define THREAD_ALLOC_FOR_TASKQ
 
-static int
-in_parallel_context( kmp_team_t *team )
-{
-    return ! team -> t.t_serialized;
-}
-
-static void
-__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
-{
-    int                gtid = *gtid_ref;
-    int                tid  = __kmp_tid_from_gtid( gtid );
-    kmp_uint32         my_token;
-    kmpc_task_queue_t *taskq;
-    kmp_taskq_t       *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
+static int in_parallel_context(kmp_team_t *team) {
+  return !team->t.t_serialized;
+}
+
+static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
+  int gtid = *gtid_ref;
+  int tid = __kmp_tid_from_gtid(gtid);
+  kmp_uint32 my_token;
+  kmpc_task_queue_t *taskq;
+  kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
 
-    if ( __kmp_env_consistency_check )
+  if (__kmp_env_consistency_check)
 #if KMP_USE_DYNAMIC_LOCK
-        __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 );
+    __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0);
 #else
-        __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
+    __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL);
 #endif
 
-    if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
-        KMP_MB();       /* Flush all pending memory write invalidates.  */
+  if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
+    KMP_MB(); /* Flush all pending memory write invalidates.  */
 
-        /* GEH - need check here under stats to make sure   */
-        /*       inside task (curr_thunk[*tid_ref] != NULL) */
+    /* GEH - need check here under stats to make sure   */
+    /*       inside task (curr_thunk[*tid_ref] != NULL) */
 
-        my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
+    my_token = tq->tq_curr_thunk[tid]->th_tasknum;
 
-        taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
+    taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
 
-        KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
-        KMP_MB();
-    }
+    KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
+    KMP_MB();
+  }
 }
 
-static void
-__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
-{
-    int           gtid = *gtid_ref;
-    int           tid  = __kmp_tid_from_gtid( gtid );
-    kmp_uint32    my_token;
-    kmp_taskq_t  *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
+static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
+  int gtid = *gtid_ref;
+  int tid = __kmp_tid_from_gtid(gtid);
+  kmp_uint32 my_token;
+  kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq;
 
-    if ( __kmp_env_consistency_check )
-        __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
+  if (__kmp_env_consistency_check)
+    __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref);
 
-    if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
-        KMP_MB();       /* Flush all pending memory write invalidates.  */
+  if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) {
+    KMP_MB(); /* Flush all pending memory write invalidates.  */
 
-        /* GEH - need check here under stats to make sure */
-        /*       inside task (curr_thunk[tid] != NULL)    */
+    /* GEH - need check here under stats to make sure */
+    /*       inside task (curr_thunk[tid] != NULL)    */
 
-        my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
+    my_token = tq->tq_curr_thunk[tid]->th_tasknum;
 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */
+    KMP_MB(); /* Flush all pending memory write invalidates.  */
 
-        tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
+    tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving =
+        my_token + 1;
 
-        KMP_MB();       /* Flush all pending memory write invalidates.  */
-    }
+    KMP_MB(); /* Flush all pending memory write invalidates.  */
+  }
 }
 
-static void
-__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
-{
-    kmp_uint32 my_token;
-    kmpc_task_queue_t *taskq;
+static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) {
+  kmp_uint32 my_token;
+  kmpc_task_queue_t *taskq;
 
-    /* assume we are always called from an active parallel context */
+  /* assume we are always called from an active parallel context */
 
-    KMP_MB();       /* Flush all pending memory write invalidates.  */
+  KMP_MB(); /* Flush all pending memory write invalidates.  */
 
-    my_token =  thunk -> th_tasknum;
+  my_token = thunk->th_tasknum;
 
-    taskq =  thunk -> th.th_shareds -> sv_queue;
+  taskq = thunk->th.th_shareds->sv_queue;
 
-    if(taskq->tq_tasknum_serving <= my_token) {
-        KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
-        KMP_MB();
-        taskq->tq_tasknum_serving = my_token +1;
-        KMP_MB();
-    }
+  if (taskq->tq_tasknum_serving <= my_token) {
+    KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
+    KMP_MB();
+    taskq->tq_tasknum_serving = my_token + 1;
+    KMP_MB();
+  }
 }
 
 #ifdef KMP_DEBUG
 
-static void
-__kmp_dump_TQF(kmp_int32 flags)
-{
-    if (flags & TQF_IS_ORDERED)
-        __kmp_printf("ORDERED ");
-    if (flags & TQF_IS_LASTPRIVATE)
-        __kmp_printf("LAST_PRIV ");
-    if (flags & TQF_IS_NOWAIT)
-        __kmp_printf("NOWAIT ");
-    if (flags & TQF_HEURISTICS)
-        __kmp_printf("HEURIST ");
-    if (flags & TQF_INTERFACE_RESERVED1)
-        __kmp_printf("RESERV1 ");
-    if (flags & TQF_INTERFACE_RESERVED2)
-        __kmp_printf("RESERV2 ");
-    if (flags & TQF_INTERFACE_RESERVED3)
-        __kmp_printf("RESERV3 ");
-    if (flags & TQF_INTERFACE_RESERVED4)
-        __kmp_printf("RESERV4 ");
-    if (flags & TQF_IS_LAST_TASK)
-        __kmp_printf("LAST_TASK ");
-    if (flags & TQF_TASKQ_TASK)
-        __kmp_printf("TASKQ_TASK ");
-    if (flags & TQF_RELEASE_WORKERS)
-        __kmp_printf("RELEASE ");
-    if (flags & TQF_ALL_TASKS_QUEUED)
-        __kmp_printf("ALL_QUEUED ");
-    if (flags & TQF_PARALLEL_CONTEXT)
-        __kmp_printf("PARALLEL ");
-    if (flags & TQF_DEALLOCATED)
-        __kmp_printf("DEALLOC ");
-    if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
-        __kmp_printf("(NONE)");
-}
-
-static void
-__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
-{
-    int i;
-    int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
+static void __kmp_dump_TQF(kmp_int32 flags) {
+  if (flags & TQF_IS_ORDERED)
+    __kmp_printf("ORDERED ");
+  if (flags & TQF_IS_LASTPRIVATE)
+    __kmp_printf("LAST_PRIV ");
+  if (flags & TQF_IS_NOWAIT)
+    __kmp_printf("NOWAIT ");
+  if (flags & TQF_HEURISTICS)
+    __kmp_printf("HEURIST ");
+  if (flags & TQF_INTERFACE_RESERVED1)
+    __kmp_printf("RESERV1 ");
+  if (flags & TQF_INTERFACE_RESERVED2)
+    __kmp_printf("RESERV2 ");
+  if (flags & TQF_INTERFACE_RESERVED3)
+    __kmp_printf("RESERV3 ");
+  if (flags & TQF_INTERFACE_RESERVED4)
+    __kmp_printf("RESERV4 ");
+  if (flags & TQF_IS_LAST_TASK)
+    __kmp_printf("LAST_TASK ");
+  if (flags & TQF_TASKQ_TASK)
+    __kmp_printf("TASKQ_TASK ");
+  if (flags & TQF_RELEASE_WORKERS)
+    __kmp_printf("RELEASE ");
+  if (flags & TQF_ALL_TASKS_QUEUED)
+    __kmp_printf("ALL_QUEUED ");
+  if (flags & TQF_PARALLEL_CONTEXT)
+    __kmp_printf("PARALLEL ");
+  if (flags & TQF_DEALLOCATED)
+    __kmp_printf("DEALLOC ");
+  if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS)))
+    __kmp_printf("(NONE)");
+}
+
+static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk,
+                             kmp_int32 global_tid) {
+  int i;
+  int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
 
-    __kmp_printf("\tThunk at %p on (%d):  ", thunk, global_tid);
+  __kmp_printf("\tThunk at %p on (%d):  ", thunk, global_tid);
 
-    if (thunk != NULL) {
-        for (i = 0; i < nproc; i++) {
-            if( tq->tq_curr_thunk[i] == thunk ) {
-                __kmp_printf("[%i] ", i);
-            }
-        }
-        __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
-        __kmp_printf("th_task=%p, ", thunk->th_task);
-        __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
-        __kmp_printf("th_status=%d, ", thunk->th_status);
-        __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
-        __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags);
+  if (thunk != NULL) {
+    for (i = 0; i < nproc; i++) {
+      if (tq->tq_curr_thunk[i] == thunk) {
+        __kmp_printf("[%i] ", i);
+      }
     }
+    __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
+    __kmp_printf("th_task=%p, ", thunk->th_task);
+    __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
+    __kmp_printf("th_status=%d, ", thunk->th_status);
+    __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
+    __kmp_printf("th_flags=");
+    __kmp_dump_TQF(thunk->th_flags);
+  }
 
-    __kmp_printf("\n");
+  __kmp_printf("\n");
 }
 
-static void
-__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
-{
-    kmpc_thunk_t *th;
+static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) {
+  kmpc_thunk_t *th;
 
-    __kmp_printf("    Thunk stack for T#%d:  ", thread_num);
+  __kmp_printf("    Thunk stack for T#%d:  ", thread_num);
 
-    for (th = thunk; th != NULL; th = th->th_encl_thunk )
-        __kmp_printf("%p ", th);
+  for (th = thunk; th != NULL; th = th->th_encl_thunk)
+    __kmp_printf("%p ", th);
 
-    __kmp_printf("\n");
+  __kmp_printf("\n");
 }
 
-static void
-__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
-{
-    int                  qs, count, i;
-    kmpc_thunk_t        *thunk;
-    kmpc_task_queue_t   *taskq;
+static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue,
+                                  kmp_int32 global_tid) {
+  int qs, count, i;
+  kmpc_thunk_t *thunk;
+  kmpc_task_queue_t *taskq;
 
-    __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
+  __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
 
-    if (queue != NULL) {
-        int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
+  if (queue != NULL) {
+    int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
 
-    if ( __kmp_env_consistency_check ) {
-        __kmp_printf("    tq_loc             : ");
+    if (__kmp_env_consistency_check) {
+      __kmp_printf("    tq_loc             : ");
     }
-        if (in_parallel) {
+    if (in_parallel) {
 
-            //if (queue->tq.tq_parent != 0)
-                //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+      // if (queue->tq.tq_parent != 0)
+      //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
 
-            //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
+      //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
 
-            __kmp_printf("    tq_parent          : %p\n", queue->tq.tq_parent);
-            __kmp_printf("    tq_first_child     : %p\n", queue->tq_first_child);
-            __kmp_printf("    tq_next_child      : %p\n", queue->tq_next_child);
-            __kmp_printf("    tq_prev_child      : %p\n", queue->tq_prev_child);
-            __kmp_printf("    tq_ref_count       : %d\n", queue->tq_ref_count);
+      __kmp_printf("    tq_parent          : %p\n", queue->tq.tq_parent);
+      __kmp_printf("    tq_first_child     : %p\n", queue->tq_first_child);
+      __kmp_printf("    tq_next_child      : %p\n", queue->tq_next_child);
+      __kmp_printf("    tq_prev_child      : %p\n", queue->tq_prev_child);
+      __kmp_printf("    tq_ref_count       : %d\n", queue->tq_ref_count);
 
-            //__kmp_release_lock(& queue->tq_link_lck, global_tid);
+      //__kmp_release_lock(& queue->tq_link_lck, global_tid);
 
-            //if (queue->tq.tq_parent != 0)
-                //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+      // if (queue->tq.tq_parent != 0)
+      //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
 
-            //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
-            //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+      //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
+      //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-        }
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
+    }
 
-        __kmp_printf("    tq_shareds         : ");
-        for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
-            __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
-        __kmp_printf("\n");
+    __kmp_printf("    tq_shareds         : ");
+    for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
+      __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
+    __kmp_printf("\n");
 
-        if (in_parallel) {
-            __kmp_printf("    tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
-            __kmp_printf("    tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
-        }
+    if (in_parallel) {
+      __kmp_printf("    tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
+      __kmp_printf("    tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
+    }
 
-        __kmp_printf("    tq_queue           : %p\n", queue->tq_queue);
-        __kmp_printf("    tq_thunk_space     : %p\n", queue->tq_thunk_space);
-        __kmp_printf("    tq_taskq_slot      : %p\n", queue->tq_taskq_slot);
-
-        __kmp_printf("    tq_free_thunks     : ");
-        for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
-            __kmp_printf("%p ", thunk);
-        __kmp_printf("\n");
+    __kmp_printf("    tq_queue           : %p\n", queue->tq_queue);
+    __kmp_printf("    tq_thunk_space     : %p\n", queue->tq_thunk_space);
+    __kmp_printf("    tq_taskq_slot      : %p\n", queue->tq_taskq_slot);
+
+    __kmp_printf("    tq_free_thunks     : ");
+    for (thunk = queue->tq_free_thunks; thunk != NULL;
+         thunk = thunk->th.th_next_free)
+      __kmp_printf("%p ", thunk);
+    __kmp_printf("\n");
 
-        __kmp_printf("    tq_nslots          : %d\n", queue->tq_nslots);
-        __kmp_printf("    tq_head            : %d\n", queue->tq_head);
-        __kmp_printf("    tq_tail            : %d\n", queue->tq_tail);
-        __kmp_printf("    tq_nfull           : %d\n", queue->tq_nfull);
-        __kmp_printf("    tq_hiwat           : %d\n", queue->tq_hiwat);
-        __kmp_printf("    tq_flags           : "); __kmp_dump_TQF(queue->tq_flags);
-        __kmp_printf("\n");
+    __kmp_printf("    tq_nslots          : %d\n", queue->tq_nslots);
+    __kmp_printf("    tq_head            : %d\n", queue->tq_head);
+    __kmp_printf("    tq_tail            : %d\n", queue->tq_tail);
+    __kmp_printf("    tq_nfull           : %d\n", queue->tq_nfull);
+    __kmp_printf("    tq_hiwat           : %d\n", queue->tq_hiwat);
+    __kmp_printf("    tq_flags           : ");
+    __kmp_dump_TQF(queue->tq_flags);
+    __kmp_printf("\n");
 
-        if (in_parallel) {
-            __kmp_printf("    tq_th_thunks       : ");
-            for (i = 0; i < queue->tq_nproc; i++) {
-                __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
-            }
-            __kmp_printf("\n");
-        }
+    if (in_parallel) {
+      __kmp_printf("    tq_th_thunks       : ");
+      for (i = 0; i < queue->tq_nproc; i++) {
+        __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
+      }
+      __kmp_printf("\n");
+    }
 
-        __kmp_printf("\n");
-        __kmp_printf("    Queue slots:\n");
+    __kmp_printf("\n");
+    __kmp_printf("    Queue slots:\n");
 
+    qs = queue->tq_tail;
+    for (count = 0; count < queue->tq_nfull; ++count) {
+      __kmp_printf("(%d)", qs);
+      __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid);
+      qs = (qs + 1) % queue->tq_nslots;
+    }
 
-        qs = queue->tq_tail;
-        for ( count = 0; count < queue->tq_nfull; ++count ) {
-            __kmp_printf("(%d)", qs);
-            __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
-            qs = (qs+1) % queue->tq_nslots;
-        }
+    __kmp_printf("\n");
 
+    if (in_parallel) {
+      if (queue->tq_taskq_slot != NULL) {
+        __kmp_printf("    TaskQ slot:\n");
+        __kmp_dump_thunk(tq, (kmpc_thunk_t *)queue->tq_taskq_slot, global_tid);
         __kmp_printf("\n");
-
-        if (in_parallel) {
-            if (queue->tq_taskq_slot != NULL) {
-                __kmp_printf("    TaskQ slot:\n");
-                __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
-                __kmp_printf("\n");
-            }
-            //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
-            //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
-        }
+      }
+      //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
+      //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
     }
+  }
 
-    __kmp_printf("    Taskq freelist: ");
+  __kmp_printf("    Taskq freelist: ");
 
-    //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
+  //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+  // Make sure data structures are in consistent state before querying them
+  // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+  KMP_MB();
 
-    for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
-        __kmp_printf("%p ", taskq);
+  for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free)
+    __kmp_printf("%p ", taskq);
 
-    //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+  //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
 
-    __kmp_printf("\n\n");
+  __kmp_printf("\n\n");
 }
 
-static void
-__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
-{
-    int i, count, qs;
-    int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
-    kmpc_task_queue_t *queue = curr_queue;
+static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq,
+                                           kmpc_task_queue_t *curr_queue,
+                                           kmp_int32 level,
+                                           kmp_int32 global_tid) {
+  int i, count, qs;
+  int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
+  kmpc_task_queue_t *queue = curr_queue;
 
-    if (curr_queue == NULL)
-        return;
+  if (curr_queue == NULL)
+    return;
 
-    __kmp_printf("    ");
+  __kmp_printf("    ");
 
-    for (i=0; i<level; i++)
-        __kmp_printf("  ");
+  for (i = 0; i < level; i++)
+    __kmp_printf("  ");
 
-    __kmp_printf("%p", curr_queue);
+  __kmp_printf("%p", curr_queue);
 
-    for (i = 0; i < nproc; i++) {
-        if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
-            __kmp_printf(" [%i]", i);
-        }
+  for (i = 0; i < nproc; i++) {
+    if (tq->tq_curr_thunk[i] &&
+        tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) {
+      __kmp_printf(" [%i]", i);
     }
+  }
 
-    __kmp_printf(":");
+  __kmp_printf(":");
 
-    //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
+  //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+  // Make sure data structures are in consistent state before querying them
+  // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+  KMP_MB();
 
-    qs = curr_queue->tq_tail;
+  qs = curr_queue->tq_tail;
 
-    for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
-        __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
-         qs = (qs+1) % curr_queue->tq_nslots;
-    }
+  for (count = 0; count < curr_queue->tq_nfull; ++count) {
+    __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
+    qs = (qs + 1) % curr_queue->tq_nslots;
+  }
 
-    //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
+  //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
 
-    __kmp_printf("\n");
-
-    if (curr_queue->tq_first_child) {
-        //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+  __kmp_printf("\n");
 
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+  if (curr_queue->tq_first_child) {
+    //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
 
-        if (curr_queue->tq_first_child) {
-            for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
-                queue != NULL;
-                queue = queue->tq_next_child) {
-                __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
-            }
-        }
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
 
-        //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+    if (curr_queue->tq_first_child) {
+      for (queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+           queue != NULL; queue = queue->tq_next_child) {
+        __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid);
+      }
     }
+
+    //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+  }
 }
 
-static void
-__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
-{
-    __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
+static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq,
+                                       kmpc_task_queue_t *tqroot,
+                                       kmp_int32 global_tid) {
+  __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
 
-    __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
+  __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid);
 
-    __kmp_printf("\n");
+  __kmp_printf("\n");
 }
 #endif
 
-/* --------------------------------------------------------------------------- */
-
-/*
-    New taskq storage routines that try to minimize overhead of mallocs but
-    still provide cache line alignment.
-*/
-
+/* New taskq storage routines that try to minimize overhead of mallocs but
+   still provide cache line alignment. */
+static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) {
+  void *addr, *orig_addr;
+  size_t bytes;
 
-static void *
-__kmp_taskq_allocate(size_t size, kmp_int32 global_tid)
-{
-    void *addr, *orig_addr;
-    size_t bytes;
+  KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size,
+               global_tid));
 
-    KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) );
-
-    bytes = sizeof(void *) + CACHE_LINE + size;
+  bytes = sizeof(void *) + CACHE_LINE + size;
 
 #ifdef THREAD_ALLOC_FOR_TASKQ
-    orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
+  orig_addr =
+      (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes);
 #else
-    KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) );
-    orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes );
+  KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes));
+  orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes);
 #endif /* THREAD_ALLOC_FOR_TASKQ */
 
-    if (orig_addr == 0)
-        KMP_FATAL( OutOfHeapMemory );
+  if (orig_addr == 0)
+    KMP_FATAL(OutOfHeapMemory);
 
-    addr = orig_addr;
+  addr = orig_addr;
 
-    if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
-        KB_TRACE( 50, ("__kmp_taskq_allocate:  adjust for cache alignment\n" ) );
-        addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
-    }
+  if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) {
+    KB_TRACE(50, ("__kmp_taskq_allocate:  adjust for cache alignment\n"));
+    addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1));
+  }
 
-    (* (void **) addr) = orig_addr;
+  (*(void **)addr) = orig_addr;
 
-    KB_TRACE( 10, ("__kmp_taskq_allocate:  allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
-             orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1,
-             (int) size, global_tid ));
+  KB_TRACE(10,
+           ("__kmp_taskq_allocate:  allocate: %p, use: %p - %p, size: %d, "
+            "gtid: %d\n",
+            orig_addr, ((void **)addr) + 1,
+            ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid));
 
-    return ( ((void **) addr) + 1 );
+  return (((void **)addr) + 1);
 }
 
-static void
-__kmpc_taskq_free(void *p, kmp_int32 global_tid)
-{
-    KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
+static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) {
+  KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid));
 
-    KB_TRACE(10, ("__kmpc_taskq_free:  freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid ));
+  KB_TRACE(10, ("__kmpc_taskq_free:  freeing: %p, gtid: %d\n",
+                (*(((void **)p) - 1)), global_tid));
 
 #ifdef THREAD_ALLOC_FOR_TASKQ
-    __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) );
+  __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1));
 #else
-    KMP_INTERNAL_FREE( *( ((void **) p)-1) );
+  KMP_INTERNAL_FREE(*(((void **)p) - 1));
 #endif /* THREAD_ALLOC_FOR_TASKQ */
 }
 
-/* --------------------------------------------------------------------------- */
-
-/*
- *      Keep freed kmpc_task_queue_t on an internal freelist and recycle since
- *      they're of constant size.
- */
+/* Keep freed kmpc_task_queue_t on an internal freelist and recycle since
+   they're of constant size. */
 
 static kmpc_task_queue_t *
-__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
-                    kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk,
-                    size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
-{
-    kmp_int32                  i;
-    size_t                     bytes;
-    kmpc_task_queue_t          *new_queue;
-    kmpc_aligned_shared_vars_t *shared_var_array;
-    char                       *shared_var_storage;
-    char                       *pt; /* for doing byte-adjusted address computations */
-
-    __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
-
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-    if( tq->tq_freelist ) {
-        new_queue =  tq -> tq_freelist;
-        tq -> tq_freelist =  tq -> tq_freelist -> tq.tq_next_free;
-
-        KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
-
-        new_queue->tq_flags = 0;
-
-        __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
-    }
-    else {
-        __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
-
-        new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid);
-        new_queue->tq_flags = 0;
-    }
-
-    /*  space in the task queue for queue slots (allocate as one big chunk */
-    /* of storage including new_taskq_task space)                          */
-
-    sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));         /* pad to cache line size */
-    pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
-    new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
-    *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
-
-    /*  chain the allocated thunks into a freelist for this queue  */
-
-    new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
-
-    for (i = 0; i < (nthunks - 2); i++) {
-        ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
+__kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots,
+                  kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc,
+                  size_t sizeof_thunk, size_t sizeof_shareds,
+                  kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) {
+  kmp_int32 i;
+  size_t bytes;
+  kmpc_task_queue_t *new_queue;
+  kmpc_aligned_shared_vars_t *shared_var_array;
+  char *shared_var_storage;
+  char *pt; /* for doing byte-adjusted address computations */
+
+  __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
+
+  // Make sure data structures are in consistent state before querying them
+  // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+  KMP_MB();
+
+  if (tq->tq_freelist) {
+    new_queue = tq->tq_freelist;
+    tq->tq_freelist = tq->tq_freelist->tq.tq_next_free;
+
+    KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
+
+    new_queue->tq_flags = 0;
+
+    __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
+  } else {
+    __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
+
+    new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate(
+        sizeof(kmpc_task_queue_t), global_tid);
+    new_queue->tq_flags = 0;
+  }
+
+  /*  space in the task queue for queue slots (allocate as one big chunk */
+  /* of storage including new_taskq_task space)                          */
+
+  sizeof_thunk +=
+      (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */
+  pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid);
+  new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
+  *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
+
+  /*  chain the allocated thunks into a freelist for this queue  */
+
+  new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
+
+  for (i = 0; i < (nthunks - 2); i++) {
+    ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free =
+        (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk);
 #ifdef KMP_DEBUG
-        ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
+    ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED;
 #endif
-    }
+  }
 
-    ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
+  ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL;
 #ifdef KMP_DEBUG
-    ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
+  ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags =
+      TQF_DEALLOCATED;
 #endif
 
-    /* initialize the locks */
+  /* initialize the locks */
 
-    if (in_parallel) {
-        __kmp_init_lock( & new_queue->tq_link_lck );
-        __kmp_init_lock( & new_queue->tq_free_thunks_lck );
-        __kmp_init_lock( & new_queue->tq_queue_lck );
-    }
+  if (in_parallel) {
+    __kmp_init_lock(&new_queue->tq_link_lck);
+    __kmp_init_lock(&new_queue->tq_free_thunks_lck);
+    __kmp_init_lock(&new_queue->tq_queue_lck);
+  }
 
-    /* now allocate the slots */
+  /* now allocate the slots */
 
-    bytes = nslots * sizeof (kmpc_aligned_queue_slot_t);
-    new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
+  bytes = nslots * sizeof(kmpc_aligned_queue_slot_t);
+  new_queue->tq_queue =
+      (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid);
 
-    /*  space for array of pointers to shared variable structures */
-    sizeof_shareds += sizeof(kmpc_task_queue_t *);
-    sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));     /* pad to cache line size */
+  /*  space for array of pointers to shared variable structures */
+  sizeof_shareds += sizeof(kmpc_task_queue_t *);
+  sizeof_shareds +=
+      (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */
 
-    bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t);
-    shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
+  bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t);
+  shared_var_array =
+      (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid);
 
-    bytes = nshareds * sizeof_shareds;
-    shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid);
-
-    for (i=0; i<nshareds; i++) {
-        shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
-        shared_var_array[i].ai_data->sv_queue = new_queue;
-    }
-    new_queue->tq_shareds = shared_var_array;
+  bytes = nshareds * sizeof_shareds;
+  shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid);
 
+  for (i = 0; i < nshareds; i++) {
+    shared_var_array[i].ai_data =
+        (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds);
+    shared_var_array[i].ai_data->sv_queue = new_queue;
+  }
+  new_queue->tq_shareds = shared_var_array;
 
-    /* array for number of outstanding thunks per thread */
+  /* array for number of outstanding thunks per thread */
 
-    if (in_parallel) {
-        bytes = nproc * sizeof(kmpc_aligned_int32_t);
-        new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
-        new_queue->tq_nproc     = nproc;
+  if (in_parallel) {
+    bytes = nproc * sizeof(kmpc_aligned_int32_t);
+    new_queue->tq_th_thunks =
+        (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid);
+    new_queue->tq_nproc = nproc;
 
-        for (i=0; i<nproc; i++)
-            new_queue->tq_th_thunks[i].ai_data = 0;
-    }
+    for (i = 0; i < nproc; i++)
+      new_queue->tq_th_thunks[i].ai_data = 0;
+  }
 
-    return new_queue;
+  return new_queue;
 }
 
-static void
-__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid)
-{
-    __kmpc_taskq_free(p->tq_thunk_space, global_tid);
-    __kmpc_taskq_free(p->tq_queue, global_tid);
+static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p,
+                             int in_parallel, kmp_int32 global_tid) {
+  __kmpc_taskq_free(p->tq_thunk_space, global_tid);
+  __kmpc_taskq_free(p->tq_queue, global_tid);
 
-    /* free shared var structure storage */
-    __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid);
+  /* free shared var structure storage */
+  __kmpc_taskq_free((void *)p->tq_shareds[0].ai_data, global_tid);
 
-    /* free array of pointers to shared vars storage */
-    __kmpc_taskq_free(p->tq_shareds, global_tid);
+  /* free array of pointers to shared vars storage */
+  __kmpc_taskq_free(p->tq_shareds, global_tid);
 
 #ifdef KMP_DEBUG
-    p->tq_first_child = NULL;
-    p->tq_next_child = NULL;
-    p->tq_prev_child = NULL;
-    p->tq_ref_count = -10;
-    p->tq_shareds = NULL;
-    p->tq_tasknum_queuing = 0;
-    p->tq_tasknum_serving = 0;
-    p->tq_queue = NULL;
-    p->tq_thunk_space = NULL;
-    p->tq_taskq_slot = NULL;
-    p->tq_free_thunks = NULL;
-    p->tq_nslots = 0;
-    p->tq_head = 0;
-    p->tq_tail = 0;
-    p->tq_nfull = 0;
-    p->tq_hiwat = 0;
+  p->tq_first_child = NULL;
+  p->tq_next_child = NULL;
+  p->tq_prev_child = NULL;
+  p->tq_ref_count = -10;
+  p->tq_shareds = NULL;
+  p->tq_tasknum_queuing = 0;
+  p->tq_tasknum_serving = 0;
+  p->tq_queue = NULL;
+  p->tq_thunk_space = NULL;
+  p->tq_taskq_slot = NULL;
+  p->tq_free_thunks = NULL;
+  p->tq_nslots = 0;
+  p->tq_head = 0;
+  p->tq_tail = 0;
+  p->tq_nfull = 0;
+  p->tq_hiwat = 0;
 
-    if (in_parallel) {
-        int i;
+  if (in_parallel) {
+    int i;
 
-        for (i=0; i<p->tq_nproc; i++)
-            p->tq_th_thunks[i].ai_data = 0;
-    }
-    if ( __kmp_env_consistency_check )
-        p->tq_loc = NULL;
-    KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
-    p->tq_flags = TQF_DEALLOCATED;
+    for (i = 0; i < p->tq_nproc; i++)
+      p->tq_th_thunks[i].ai_data = 0;
+  }
+  if (__kmp_env_consistency_check)
+    p->tq_loc = NULL;
+  KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED);
+  p->tq_flags = TQF_DEALLOCATED;
 #endif /* KMP_DEBUG */
 
-    if (in_parallel)  {
-        __kmpc_taskq_free(p->tq_th_thunks, global_tid);
-        __kmp_destroy_lock(& p->tq_link_lck);
-        __kmp_destroy_lock(& p->tq_queue_lck);
-        __kmp_destroy_lock(& p->tq_free_thunks_lck);
-    }
+  if (in_parallel) {
+    __kmpc_taskq_free(p->tq_th_thunks, global_tid);
+    __kmp_destroy_lock(&p->tq_link_lck);
+    __kmp_destroy_lock(&p->tq_queue_lck);
+    __kmp_destroy_lock(&p->tq_free_thunks_lck);
+  }
 #ifdef KMP_DEBUG
-    p->tq_th_thunks = NULL;
+  p->tq_th_thunks = NULL;
 #endif /* KMP_DEBUG */
 
-    KMP_MB();  /* make sure data structures are in consistent state before querying them */
-               /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+  // Make sure data structures are in consistent state before querying them
+  // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+  KMP_MB();
 
-    __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
-    p->tq.tq_next_free = tq->tq_freelist;
+  __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid);
+  p->tq.tq_next_free = tq->tq_freelist;
 
-    tq->tq_freelist = p;
-    __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
+  tq->tq_freelist = p;
+  __kmp_release_lock(&tq->tq_freelist_lck, global_tid);
 }
 
-/*
- *    Once a group of thunks has been allocated for use in a particular queue,
- *    these are managed via a per-queue freelist.
- *    We force a check that there's always a thunk free if we need one.
- */
+/* Once a group of thunks has been allocated for use in a particular queue,
+   these are managed via a per-queue freelist.
+   We force a check that there's always a thunk free if we need one. */
 
-static kmpc_thunk_t *
-__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid)
-{
-    kmpc_thunk_t *fl;
+static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue,
+                                       int in_parallel, kmp_int32 global_tid) {
+  kmpc_thunk_t *fl;
 
-    if (in_parallel) {
-        __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
-
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-    }
+  if (in_parallel) {
+    __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
+  }
 
-    fl = queue->tq_free_thunks;
+  fl = queue->tq_free_thunks;
 
-    KMP_DEBUG_ASSERT (fl != NULL);
+  KMP_DEBUG_ASSERT(fl != NULL);
 
-    queue->tq_free_thunks = fl->th.th_next_free;
-    fl->th_flags = 0;
+  queue->tq_free_thunks = fl->th.th_next_free;
+  fl->th_flags = 0;
 
-    if (in_parallel)
-        __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+  if (in_parallel)
+    __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
 
-    return fl;
+  return fl;
 }
 
-static void
-__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid)
-{
+static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p,
+                             int in_parallel, kmp_int32 global_tid) {
 #ifdef KMP_DEBUG
-    p->th_task = 0;
-    p->th_encl_thunk = 0;
-    p->th_status = 0;
-    p->th_tasknum = 0;
-    /* Also could zero pointers to private vars */
+  p->th_task = 0;
+  p->th_encl_thunk = 0;
+  p->th_status = 0;
+  p->th_tasknum = 0;
+/* Also could zero pointers to private vars */
 #endif
 
-    if (in_parallel) {
-        __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
-
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-    }
+  if (in_parallel) {
+    __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid);
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
+  }
 
-    p->th.th_next_free = queue->tq_free_thunks;
-    queue->tq_free_thunks = p;
+  p->th.th_next_free = queue->tq_free_thunks;
+  queue->tq_free_thunks = p;
 
 #ifdef KMP_DEBUG
-    p->th_flags = TQF_DEALLOCATED;
+  p->th_flags = TQF_DEALLOCATED;
 #endif
 
-    if (in_parallel)
-        __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
+  if (in_parallel)
+    __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid);
 }
 
-/* --------------------------------------------------------------------------- */
-
 /*  returns nonzero if the queue just became full after the enqueue  */
+static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid,
+                                    kmpc_task_queue_t *queue,
+                                    kmpc_thunk_t *thunk, int in_parallel) {
+  kmp_int32 ret;
+
+  /*  dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the
+   * master is executing then)  */
+  if (in_parallel) {
+    __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
+  }
 
-static kmp_int32
-__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel )
-{
-    kmp_int32    ret;
-
-    /*  dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then)  */
-    if (in_parallel) {
-        __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-    }
-
-    KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);  /*  check queue not full  */
-
-    queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
-
-    if (queue->tq_head >= queue->tq_nslots)
-        queue->tq_head = 0;
+  KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots); // check queue not full
 
-    (queue->tq_nfull)++;
+  queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
 
-    KMP_MB();   /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */
+  if (queue->tq_head >= queue->tq_nslots)
+    queue->tq_head = 0;
 
-    ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
+  (queue->tq_nfull)++;
 
-    if (in_parallel) {
-        /* don't need to wait until workers are released before unlocking */
-        __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+  KMP_MB(); /* to assure that nfull is seen to increase before
+               TQF_ALL_TASKS_QUEUED is set */
 
-        if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
-            /* If just creating the root queue, the worker threads are waiting at */
-            /* a join barrier until now, when there's something in the queue for  */
-            /* them to do; release them now to do work.                           */
-            /* This should only be done when this is the first task enqueued,     */
-            /* so reset the flag here also.                                       */
+  ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
 
-            tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;  /* no lock needed, workers are still in spin mode */
+  if (in_parallel) {
+    /* don't need to wait until workers are released before unlocking */
+    __kmp_release_lock(&queue->tq_queue_lck, global_tid);
 
-            KMP_MB();   /* avoid releasing barrier twice if taskq_task switches threads */
+    if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
+      // If just creating the root queue, the worker threads are waiting at a
+      // join barrier until now, when there's something in the queue for them to
+      // do; release them now to do work. This should only be done when this is
+      // the first task enqueued, so reset the flag here also.
+      tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers
+                                                      are still in spin mode */
+      // avoid releasing barrier twice if taskq_task switches threads
+      KMP_MB();
 
-            __kmpc_end_barrier_master( NULL, global_tid);
-        }
+      __kmpc_end_barrier_master(NULL, global_tid);
     }
+  }
 
-    return ret;
+  return ret;
 }
 
-static kmpc_thunk_t *
-__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel)
-{
-    kmpc_thunk_t *pt;
-    int           tid = __kmp_tid_from_gtid( global_tid );
+static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid,
+                                        kmpc_task_queue_t *queue,
+                                        int in_parallel) {
+  kmpc_thunk_t *pt;
+  int tid = __kmp_tid_from_gtid(global_tid);
 
-    KMP_DEBUG_ASSERT (queue->tq_nfull > 0);  /*  check queue not empty  */
+  KMP_DEBUG_ASSERT(queue->tq_nfull > 0); /*  check queue not empty  */
 
-    if (queue->tq.tq_parent != NULL && in_parallel) {
-        int ct;
-        __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-        ct = ++(queue->tq_ref_count);
-        __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
-          __LINE__, global_tid, queue, ct));
-    }
+  if (queue->tq.tq_parent != NULL && in_parallel) {
+    int ct;
+    __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+    ct = ++(queue->tq_ref_count);
+    __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+    KMP_DEBUG_REF_CTS(
+        ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
+  }
 
-    pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
+  pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
 
-    if (queue->tq_tail >= queue->tq_nslots)
-        queue->tq_tail = 0;
+  if (queue->tq_tail >= queue->tq_nslots)
+    queue->tq_tail = 0;
 
-    if (in_parallel) {
-        queue->tq_th_thunks[tid].ai_data++;
+  if (in_parallel) {
+    queue->tq_th_thunks[tid].ai_data++;
 
-        KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */
+    KMP_MB(); /* necessary so ai_data increment is propagated to other threads
+                 immediately (digital) */
 
-        KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
-            global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
-    }
+    KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding "
+                   "thunks from queue %p\n",
+                   global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
+  }
 
-    (queue->tq_nfull)--;
+  (queue->tq_nfull)--;
 
 #ifdef KMP_DEBUG
-    KMP_MB();
+  KMP_MB();
 
-    /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */
+  /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is
+   * decremented */
 
-    KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
+  KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
 
-    if (in_parallel) {
-        KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
-    }
+  if (in_parallel) {
+    KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <=
+                     __KMP_TASKQ_THUNKS_PER_TH);
+  }
 #endif
 
-    return pt;
+  return pt;
 }
 
-/*
- * Find the next (non-null) task to dequeue and return it.
+/* Find the next (non-null) task to dequeue and return it.
  * This is never called unless in_parallel=TRUE
  *
  * Here are the rules for deciding which queue to take the task from:
@@ -792,1241 +778,1252 @@ __kmp_dequeue_task (kmp_int32 global_tid
  * TQF_IS_LASTPRIVATE).
  */
 
-static kmpc_thunk_t *
-__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
-{
-    kmpc_thunk_t *pt  = NULL;
-    int           tid = __kmp_tid_from_gtid( global_tid );
-
-    /* To prevent deadlock from tq_queue_lck if queue already deallocated */
-    if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
-
-        __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-
-        /* Check again to avoid race in __kmpc_end_taskq() */
-        if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
-
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-            if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
-                /* if there's enough room in the queue and the dispatcher */
-                /* (taskq task) is available, schedule more tasks         */
-                pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
-                queue->tq_taskq_slot = NULL;
-            }
-            else if (queue->tq_nfull == 0 ||
-                     queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
-                /* do nothing if no thunks available or this thread can't */
-                /* run any because it already is executing too many       */
-
-                pt = NULL;
-            }
-            else if (queue->tq_nfull > 1) {
-                /*  always safe to schedule a task even if TQF_IS_LASTPRIVATE  */
+static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid,
+                                              kmpc_task_queue_t *queue) {
+  kmpc_thunk_t *pt = NULL;
+  int tid = __kmp_tid_from_gtid(global_tid);
+
+  /* To prevent deadlock from tq_queue_lck if queue already deallocated */
+  if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+
+    __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
+
+    /* Check again to avoid race in __kmpc_end_taskq() */
+    if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
+
+      if ((queue->tq_taskq_slot != NULL) &&
+          (queue->tq_nfull <= queue->tq_hiwat)) {
+        /* if there's enough room in the queue and the dispatcher */
+        /* (taskq task) is available, schedule more tasks         */
+        pt = (kmpc_thunk_t *)queue->tq_taskq_slot;
+        queue->tq_taskq_slot = NULL;
+      } else if (queue->tq_nfull == 0 ||
+                 queue->tq_th_thunks[tid].ai_data >=
+                     __KMP_TASKQ_THUNKS_PER_TH) {
+        /* do nothing if no thunks available or this thread can't */
+        /* run any because it already is executing too many       */
+        pt = NULL;
+      } else if (queue->tq_nfull > 1) {
+        /*  always safe to schedule a task even if TQF_IS_LASTPRIVATE  */
+
+        pt = __kmp_dequeue_task(global_tid, queue, TRUE);
+      } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
+        // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE
+        pt = __kmp_dequeue_task(global_tid, queue, TRUE);
+      } else if (queue->tq_flags & TQF_IS_LAST_TASK) {
+        /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task()   */
+        /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
+        /* instrumentation does copy-out.                                  */
+        pt = __kmp_dequeue_task(global_tid, queue, TRUE);
+        pt->th_flags |=
+            TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */
+      }
+    }
+
+    /* GEH - What happens here if is lastprivate, but not last task? */
+    __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+  }
 
-                pt = __kmp_dequeue_task (global_tid, queue, TRUE);
-            }
-            else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
-                /*  one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE  */
-
-                pt = __kmp_dequeue_task (global_tid, queue, TRUE);
-            }
-            else if (queue->tq_flags & TQF_IS_LAST_TASK) {
-                /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task()   */
-                /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
-                /* instrumentation does copy-out.                                  */
-
-                pt = __kmp_dequeue_task (global_tid, queue, TRUE);
-                pt->th_flags |= TQF_IS_LAST_TASK;  /* don't need test_then_or since already locked */
-            }
-        }
-
-        /* GEH - What happens here if is lastprivate, but not last task? */
-        __kmp_release_lock(& queue->tq_queue_lck, global_tid);
-    }
-
-    return pt;
+  return pt;
 }
 
-/*
- * Walk a tree of queues starting at queue's first child
- * and return a non-NULL thunk if one can be scheduled.
- * Must only be called when in_parallel=TRUE
- */
+/* Walk a tree of queues starting at queue's first child and return a non-NULL
+   thunk if one can be scheduled. Must only be called when in_parallel=TRUE */
 
 static kmpc_thunk_t *
-__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
-{
-    kmpc_thunk_t *pt = NULL;
-    kmpc_task_queue_t *queue = curr_queue;
-
-    if (curr_queue->tq_first_child != NULL) {
-        __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-        queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
-        if (queue == NULL) {
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
-            return NULL;
-        }
-
-        while (queue != NULL)  {
-            int ct;
-            kmpc_task_queue_t *next;
-
-            ct= ++(queue->tq_ref_count);
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
-              __LINE__, global_tid, queue, ct));
-
-            pt = __kmp_find_task_in_queue (global_tid, queue);
+__kmp_find_task_in_descendant_queue(kmp_int32 global_tid,
+                                    kmpc_task_queue_t *curr_queue) {
+  kmpc_thunk_t *pt = NULL;
+  kmpc_task_queue_t *queue = curr_queue;
+
+  if (curr_queue->tq_first_child != NULL) {
+    __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
 
-            if (pt != NULL) {
-                int ct;
+    queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+    if (queue == NULL) {
+      __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+      return NULL;
+    }
 
-                __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+    while (queue != NULL) {
+      int ct;
+      kmpc_task_queue_t *next;
 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+      ct = ++(queue->tq_ref_count);
+      __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+      KMP_DEBUG_REF_CTS(
+          ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
 
-                ct = --(queue->tq_ref_count);
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-                  __LINE__, global_tid, queue, ct));
-                KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
+      pt = __kmp_find_task_in_queue(global_tid, queue);
 
-                __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+      if (pt != NULL) {
+        int ct;
 
-                return pt;
-            }
+        __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+        // Make sure data structures in consistent state before querying them
+        // Seems to work without this for digital/alpha, needed for IBM/RS6000
+        KMP_MB();
 
-            /* although reference count stays active during descendant walk, shouldn't matter  */
-            /* since if children still exist, reference counts aren't being monitored anyway   */
+        ct = --(queue->tq_ref_count);
+        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+                           global_tid, queue, ct));
+        KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
 
-            pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
+        __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
 
-            if (pt != NULL) {
-                int ct;
+        return pt;
+      }
 
-                __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+      /* although reference count stays active during descendant walk, shouldn't
+         matter  since if children still exist, reference counts aren't being
+         monitored anyway   */
 
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+      pt = __kmp_find_task_in_descendant_queue(global_tid, queue);
 
-                ct = --(queue->tq_ref_count);
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-                  __LINE__, global_tid, queue, ct));
-                KMP_DEBUG_ASSERT( ct >= 0 );
+      if (pt != NULL) {
+        int ct;
 
-                __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+        __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+        // Make sure data structures in consistent state before querying them
+        // Seems to work without this for digital/alpha, needed for IBM/RS6000
+        KMP_MB();
 
-                return pt;
-            }
+        ct = --(queue->tq_ref_count);
+        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+                           global_tid, queue, ct));
+        KMP_DEBUG_ASSERT(ct >= 0);
 
-            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
+        __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+        return pt;
+      }
 
-            next = queue->tq_next_child;
+      __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+      // Make sure data structures in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
 
-            ct = --(queue->tq_ref_count);
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-              __LINE__, global_tid, queue, ct));
-            KMP_DEBUG_ASSERT( ct >= 0 );
+      next = queue->tq_next_child;
 
-            queue = next;
-        }
+      ct = --(queue->tq_ref_count);
+      KMP_DEBUG_REF_CTS(
+          ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
+      KMP_DEBUG_ASSERT(ct >= 0);
 
-        __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
+      queue = next;
     }
 
-    return pt;
-}
+    __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+  }
 
-/*
- * Walk up the taskq tree looking for a task to execute.
- * If we get to the root, search the tree for a descendent queue task.
- * Must only be called when in_parallel=TRUE
- */
+  return pt;
+}
 
+/* Walk up the taskq tree looking for a task to execute. If we get to the root,
+   search the tree for a descendent queue task. Must only be called when
+   in_parallel=TRUE */
 static kmpc_thunk_t *
-__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
-{
-    kmpc_task_queue_t *queue;
-    kmpc_thunk_t      *pt;
-
-    pt = NULL;
-
-    if (curr_queue->tq.tq_parent != NULL) {
-        queue = curr_queue->tq.tq_parent;
-
-        while (queue != NULL) {
-            if (queue->tq.tq_parent != NULL) {
-                int ct;
-                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-                ct = ++(queue->tq_ref_count);
-                __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
-                  __LINE__, global_tid, queue, ct));
-            }
+__kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid,
+                                  kmpc_task_queue_t *curr_queue) {
+  kmpc_task_queue_t *queue;
+  kmpc_thunk_t *pt;
 
-            pt = __kmp_find_task_in_queue (global_tid, queue);
-            if (pt != NULL) {
-                if (queue->tq.tq_parent != NULL) {
-                    int ct;
-                    __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
-                    KMP_MB();  /* make sure data structures are in consistent state before querying them   */
-                               /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
-
-                    ct = --(queue->tq_ref_count);
-                    KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-                      __LINE__, global_tid, queue, ct));
-                    KMP_DEBUG_ASSERT( ct >= 0 );
+  pt = NULL;
 
-                    __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-                }
+  if (curr_queue->tq.tq_parent != NULL) {
+    queue = curr_queue->tq.tq_parent;
 
-                return pt;
-            }
+    while (queue != NULL) {
+      if (queue->tq.tq_parent != NULL) {
+        int ct;
+        __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+        // Make sure data structures in consistent state before querying them
+        // Seems to work without this for digital/alpha, needed for IBM/RS6000
+        KMP_MB();
 
-            if (queue->tq.tq_parent != NULL) {
-                int ct;
-                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                           /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-                ct = --(queue->tq_ref_count);
-                KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-                  __LINE__, global_tid, queue, ct));
-                KMP_DEBUG_ASSERT( ct >= 0 );
-            }
-            queue = queue->tq.tq_parent;
+        ct = ++(queue->tq_ref_count);
+        __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__,
+                           global_tid, queue, ct));
+      }
+
+      pt = __kmp_find_task_in_queue(global_tid, queue);
+      if (pt != NULL) {
+        if (queue->tq.tq_parent != NULL) {
+          int ct;
+          __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+          // Make sure data structures in consistent state before querying them
+          // Seems to work without this for digital/alpha, needed for IBM/RS6000
+          KMP_MB();
+
+          ct = --(queue->tq_ref_count);
+          KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+                             global_tid, queue, ct));
+          KMP_DEBUG_ASSERT(ct >= 0);
 
-            if (queue != NULL)
-                __kmp_release_lock(& queue->tq_link_lck, global_tid);
+          __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
         }
 
+        return pt;
+      }
+
+      if (queue->tq.tq_parent != NULL) {
+        int ct;
+        __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+        // Make sure data structures in consistent state before querying them
+        // Seems to work without this for digital/alpha, needed for IBM/RS6000
+        KMP_MB();
+
+        ct = --(queue->tq_ref_count);
+        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__,
+                           global_tid, queue, ct));
+        KMP_DEBUG_ASSERT(ct >= 0);
+      }
+      queue = queue->tq.tq_parent;
+
+      if (queue != NULL)
+        __kmp_release_lock(&queue->tq_link_lck, global_tid);
     }
+  }
 
-    pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
+  pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root);
 
-    return pt;
+  return pt;
 }
 
-static int
-__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
-{
-    int i;
+static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) {
+  int i;
 
-    /* KMP_MB(); *//* is this really necessary? */
+  /* KMP_MB(); */ /* is this really necessary? */
 
-    for (i=0; i<queue->tq_nproc; i++) {
-        if (queue->tq_th_thunks[i].ai_data != 0)
-            return FALSE;
-    }
+  for (i = 0; i < queue->tq_nproc; i++) {
+    if (queue->tq_th_thunks[i].ai_data != 0)
+      return FALSE;
+  }
 
-    return TRUE;
+  return TRUE;
 }
 
-static int
-__kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
-{
-    return (queue->tq_first_child != NULL);
+static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) {
+  return (queue->tq_first_child != NULL);
 }
 
-static void
-__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel )
-{
+static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid,
+                                         kmpc_task_queue_t *queue,
+                                         int in_parallel) {
 #ifdef KMP_DEBUG
-    kmp_int32     i;
-    kmpc_thunk_t *thunk;
+  kmp_int32 i;
+  kmpc_thunk_t *thunk;
 #endif
 
-    KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
-    KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
-
-    /*  sub-queue in a recursion, not the root task queue  */
-    KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
-
-    if (in_parallel) {
-        __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-    }
-
-    KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
+  KF_TRACE(50,
+           ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
+  KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid));
+
+  /*  sub-queue in a recursion, not the root task queue  */
+  KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL);
+
+  if (in_parallel) {
+    __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
+  }
 
-    /*  unlink queue from its siblings if any at this level  */
-    if (queue->tq_prev_child != NULL)
-        queue->tq_prev_child->tq_next_child = queue->tq_next_child;
-    if (queue->tq_next_child != NULL)
-        queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
-    if (queue->tq.tq_parent->tq_first_child == queue)
-        queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
+  KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
 
-    queue->tq_prev_child = NULL;
-    queue->tq_next_child = NULL;
+  /*  unlink queue from its siblings if any at this level  */
+  if (queue->tq_prev_child != NULL)
+    queue->tq_prev_child->tq_next_child = queue->tq_next_child;
+  if (queue->tq_next_child != NULL)
+    queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
+  if (queue->tq.tq_parent->tq_first_child == queue)
+    queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
 
-    if (in_parallel) {
-        KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
-          __LINE__, global_tid, queue, queue->tq_ref_count));
+  queue->tq_prev_child = NULL;
+  queue->tq_next_child = NULL;
 
-        /* wait until all other threads have stopped accessing this queue */
-        while (queue->tq_ref_count > 1) {
-            __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+  if (in_parallel) {
+    KMP_DEBUG_REF_CTS(
+        ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
+         __LINE__, global_tid, queue, queue->tq_ref_count));
 
-            KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
+    /* wait until all other threads have stopped accessing this queue */
+    while (queue->tq_ref_count > 1) {
+      __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
 
-            __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+      KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE,
+                     NULL);
 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-        }
-
-        __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
+      __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
     }
 
-    KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n",
-      __LINE__, global_tid, queue));
+    __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+  }
 
-#ifdef KMP_DEBUG
-    KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
-    KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
+  KMP_DEBUG_REF_CTS(
+      ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue));
 
-    for (i=0; i<queue->tq_nproc; i++) {
-        KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
-    }
+#ifdef KMP_DEBUG
+  KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
+  KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
 
-    i = 0;
-    for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
-        ++i;
+  for (i = 0; i < queue->tq_nproc; i++) {
+    KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
+  }
+
+  i = 0;
+  for (thunk = queue->tq_free_thunks; thunk != NULL;
+       thunk = thunk->th.th_next_free)
+    ++i;
 
-    KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
+  KMP_ASSERT(i ==
+             queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
 #endif
 
-    /*  release storage for queue entry  */
-    __kmp_free_taskq ( tq, queue, TRUE, global_tid );
+  /*  release storage for queue entry  */
+  __kmp_free_taskq(tq, queue, TRUE, global_tid);
 
-    KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
-    KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+  KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
+  KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
 }
 
-/*
- * Starting from indicated queue, proceed downward through tree and
- * remove all taskqs which are finished, but only go down to taskqs
- * which have the "nowait" clause present.  Assume this is only called
- * when in_parallel=TRUE.
- */
-
-static void
-__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
-{
-    kmpc_task_queue_t *queue = curr_queue;
-
-    if (curr_queue->tq_first_child != NULL) {
-        __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
-        KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                   /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-        queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
-        if (queue != NULL) {
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
-            return;
-        }
-
-        while (queue != NULL)  {
-            kmpc_task_queue_t *next;
-            int ct = ++(queue->tq_ref_count);
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
-              __LINE__, global_tid, queue, ct));
-
-
-            /* although reference count stays active during descendant walk, */
-            /* shouldn't matter since if children still exist, reference     */
-            /* counts aren't being monitored anyway                          */
-
-            if (queue->tq_flags & TQF_IS_NOWAIT) {
-                __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
-
-                if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
-                    __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
-
-                    /*
-                     Only remove this if we have not already marked it for deallocation.
-                     This should prevent multiple threads from trying to free this.
-                     */
-
-                    if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
-                        if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
-                            queue->tq_flags |= TQF_DEALLOCATED;
-                            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
-
-                            __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
-
-                            /* Can't do any more here since can't be sure where sibling queue is so just exit this level */
-                            return;
-                        }
-                        else {
-                            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
-                        }
-                    }
-                    /* otherwise, just fall through and decrement reference count */
-                }
-            }
+/* Starting from indicated queue, proceed downward through tree and remove all
+   taskqs which are finished, but only go down to taskqs which have the "nowait"
+   clause present.  Assume this is only called when in_parallel=TRUE. */
 
-            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-            next = queue->tq_next_child;
-
-            ct = --(queue->tq_ref_count);
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-              __LINE__, global_tid, queue, ct));
-            KMP_DEBUG_ASSERT( ct >= 0 );
-
-            queue = next;
-        }
+static void __kmp_find_and_remove_finished_child_taskq(
+    kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) {
+  kmpc_task_queue_t *queue = curr_queue;
 
-        __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
-    }
-}
-
-/*
- * Starting from indicated queue, proceed downward through tree and
- * remove all taskq's assuming all are finished and
- * assuming NO other threads are executing at this point.
- */
-
-static void
-__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
-{
-    kmpc_task_queue_t *next_child;
-
-    queue = (kmpc_task_queue_t *) queue->tq_first_child;
-
-    while (queue != NULL)  {
-        __kmp_remove_all_child_taskq ( tq, global_tid, queue );
+  if (curr_queue->tq_first_child != NULL) {
+    __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+    // Make sure data structures are in consistent state before querying them
+    // Seems to work without this call for digital/alpha, needed for IBM/RS6000
+    KMP_MB();
 
-        next_child = queue->tq_next_child;
-        queue->tq_flags |= TQF_DEALLOCATED;
-        __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
-        queue = next_child;
+    queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
+    if (queue != NULL) {
+      __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+      return;
     }
-}
 
-static void
-__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel )
-{
-    kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
-    kmp_int32          tid   = __kmp_tid_from_gtid( global_tid );
-
-    KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
-    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
-    KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
-    KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
-
-    /*
-     * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows:
-     *
-     * happens exactly once:
-     * 1) __kmpc_taskq             : push (if returning thunk only)
-     * 4) __kmpc_end_taskq_task    : pop
-     *
-     * optionally happens *each* time taskq task is dequeued/enqueued:
-     * 2) __kmpc_taskq_task        : pop
-     * 3) __kmp_execute_task_from_queue  : push
-     *
-     * execution ordering:  1,(2,3)*,4
-     */
-
-    if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
-        kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
-        thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
-
-        if ( __kmp_env_consistency_check ) {
-            __kmp_push_workshare( global_tid,
-                    (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
-                    queue->tq_loc );
-        }
-    }
-    else {
-        if ( __kmp_env_consistency_check )
-            __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
-    }
+    while (queue != NULL) {
+      kmpc_task_queue_t *next;
+      int ct = ++(queue->tq_ref_count);
+      KMP_DEBUG_REF_CTS(
+          ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct));
+
+      /* although reference count stays active during descendant walk, */
+      /* shouldn't matter since if children still exist, reference     */
+      /* counts aren't being monitored anyway                          */
+
+      if (queue->tq_flags & TQF_IS_NOWAIT) {
+        __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
+
+        if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) &&
+            (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) &&
+            !__kmp_taskq_has_any_children(queue)) {
+
+          /* Only remove this if we have not already marked it for deallocation.
+             This should prevent multiple threads from trying to free this. */
+
+          if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) {
+            if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+              queue->tq_flags |= TQF_DEALLOCATED;
+              __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+
+              __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
+
+              /* Can't do any more here since can't be sure where sibling queue
+               * is so just exit this level */
+              return;
+            } else {
+              __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+            }
+          }
+          /* otherwise, just fall through and decrement reference count */
+        }
+      }
+
+      __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
+
+      next = queue->tq_next_child;
+
+      ct = --(queue->tq_ref_count);
+      KMP_DEBUG_REF_CTS(
+          ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
+      KMP_DEBUG_ASSERT(ct >= 0);
+
+      queue = next;
+    }
+
+    __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+  }
+}
+
+/* Starting from indicated queue, proceed downward through tree and remove all
+   taskq's assuming all are finished and assuming NO other threads are executing
+   at this point. */
+static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid,
+                                         kmpc_task_queue_t *queue) {
+  kmpc_task_queue_t *next_child;
+
+  queue = (kmpc_task_queue_t *)queue->tq_first_child;
+
+  while (queue != NULL) {
+    __kmp_remove_all_child_taskq(tq, global_tid, queue);
+
+    next_child = queue->tq_next_child;
+    queue->tq_flags |= TQF_DEALLOCATED;
+    __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE);
+    queue = next_child;
+  }
+}
+
+static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc,
+                                          kmp_int32 global_tid,
+                                          kmpc_thunk_t *thunk,
+                                          int in_parallel) {
+  kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
+  kmp_int32 tid = __kmp_tid_from_gtid(global_tid);
+
+  KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
+  KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
+  KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
+  KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
+
+  /* For the taskq task, the curr_thunk pushes and pop pairs are set up as
+   * follows:
+   *
+   * happens exactly once:
+   * 1) __kmpc_taskq             : push (if returning thunk only)
+   * 4) __kmpc_end_taskq_task    : pop
+   *
+   * optionally happens *each* time taskq task is dequeued/enqueued:
+   * 2) __kmpc_taskq_task        : pop
+   * 3) __kmp_execute_task_from_queue  : push
+   *
+   * execution ordering:  1,(2,3)*,4
+   */
+
+  if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
+    kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
+    thunk->th.th_shareds =
+        (kmpc_shared_vars_t *)queue->tq_shareds[index].ai_data;
+
+    if (__kmp_env_consistency_check) {
+      __kmp_push_workshare(global_tid,
+                           (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
+                                                              : ct_task,
+                           queue->tq_loc);
+    }
+  } else {
+    if (__kmp_env_consistency_check)
+      __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc);
+  }
+
+  if (in_parallel) {
+    thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
+    tq->tq_curr_thunk[tid] = thunk;
+
+    KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
+  }
+
+  KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk,
+                queue, global_tid));
+  thunk->th_task(global_tid, thunk);
+  KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue,
+                global_tid));
+
+  if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
+    if (__kmp_env_consistency_check)
+      __kmp_pop_workshare(global_tid,
+                          (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered
+                                                             : ct_task,
+                          queue->tq_loc);
 
     if (in_parallel) {
-        thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
-        tq->tq_curr_thunk[tid] = thunk;
-
-        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+      tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+      thunk->th_encl_thunk = NULL;
+      KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
     }
 
-    KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
-    thunk->th_task (global_tid, thunk);
-    KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
-
-    if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
-        if ( __kmp_env_consistency_check )
-            __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
-                                 queue->tq_loc );
-
-        if (in_parallel) {
-            tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
-            thunk->th_encl_thunk = NULL;
-            KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
-        }
-
-        if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
-            __kmp_taskq_check_ordered(global_tid, thunk);
-        }
+    if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
+      __kmp_taskq_check_ordered(global_tid, thunk);
+    }
 
-        __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
+    __kmp_free_thunk(queue, thunk, in_parallel, global_tid);
 
-        KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
-        KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+    KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n",
+                   global_tid, thunk));
+    KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
 
-        if (in_parallel) {
-            KMP_MB();   /* needed so thunk put on free list before outstanding thunk count is decremented */
+    if (in_parallel) {
+      KMP_MB(); /* needed so thunk put on free list before outstanding thunk
+                   count is decremented */
 
-            KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
+      KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
 
-            KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
-                global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
+      KF_TRACE(
+          200,
+          ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
+           global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue));
 
-            queue->tq_th_thunks[tid].ai_data--;
+      queue->tq_th_thunks[tid].ai_data--;
 
-            /* KMP_MB(); */     /* is MB really necessary ? */
-        }
+      /* KMP_MB(); */ /* is MB really necessary ? */
+    }
 
-        if (queue->tq.tq_parent != NULL && in_parallel) {
-            int ct;
-            __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-            ct = --(queue->tq_ref_count);
-            __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
-              __LINE__, global_tid, queue, ct));
-            KMP_DEBUG_ASSERT( ct >= 0 );
-        }
+    if (queue->tq.tq_parent != NULL && in_parallel) {
+      int ct;
+      __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+      ct = --(queue->tq_ref_count);
+      __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+      KMP_DEBUG_REF_CTS(
+          ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct));
+      KMP_DEBUG_ASSERT(ct >= 0);
     }
+  }
 }
 
-/* --------------------------------------------------------------------------- */
-
 /* starts a taskq; creates and returns a thunk for the taskq_task        */
 /* also, returns pointer to shared vars for this thread in "shareds" arg */
+kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid,
+                           kmpc_task_t taskq_task, size_t sizeof_thunk,
+                           size_t sizeof_shareds, kmp_int32 flags,
+                           kmpc_shared_vars_t **shareds) {
+  int in_parallel;
+  kmp_int32 nslots, nthunks, nshareds, nproc;
+  kmpc_task_queue_t *new_queue, *curr_queue;
+  kmpc_thunk_t *new_taskq_thunk;
+  kmp_info_t *th;
+  kmp_team_t *team;
+  kmp_taskq_t *tq;
+  kmp_int32 tid;
+
+  KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid));
+
+  th = __kmp_threads[global_tid];
+  team = th->th.th_team;
+  tq = &team->t.t_taskq;
+  nproc = team->t.t_nproc;
+  tid = __kmp_tid_from_gtid(global_tid);
 
-kmpc_thunk_t *
-__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
-              size_t sizeof_thunk, size_t sizeof_shareds,
-              kmp_int32 flags, kmpc_shared_vars_t **shareds )
-{
-    int                  in_parallel;
-    kmp_int32            nslots, nthunks, nshareds, nproc;
-    kmpc_task_queue_t   *new_queue, *curr_queue;
-    kmpc_thunk_t        *new_taskq_thunk;
-    kmp_info_t          *th;
-    kmp_team_t          *team;
-    kmp_taskq_t         *tq;
-    kmp_int32            tid;
-
-    KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid));
-
-    th = __kmp_threads[ global_tid ];
-    team = th -> th.th_team;
-    tq = & team -> t.t_taskq;
-    nproc = team -> t.t_nproc;
-    tid = __kmp_tid_from_gtid( global_tid );
-
-    /* find out whether this is a parallel taskq or serialized one. */
-    in_parallel = in_parallel_context( team );
-
-    if( ! tq->tq_root ) {
-        if (in_parallel) {
-            /* Vector ORDERED SECTION to taskq version */
-            th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
-
-            /* Vector ORDERED SECTION to taskq version */
-            th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
-        }
-
-        if (in_parallel) {
-            /* This shouldn't be a barrier region boundary, it will confuse the user. */
-            /* Need the boundary to be at the end taskq instead. */
-            if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
-                /* Creating the active root queue, and we are not the master thread. */
-                /* The master thread below created the queue and tasks have been     */
-                /* enqueued, and the master thread released this barrier.  This      */
-                /* worker thread can now proceed and execute tasks.  See also the    */
-                /* TQF_RELEASE_WORKERS which is used to handle this case.            */
-
-                *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
-
-                KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
-
-                return NULL;
-            }
-        }
-
-        /* master thread only executes this code */
-
-        if( tq->tq_curr_thunk_capacity < nproc ) {
-            if(tq->tq_curr_thunk)
-                __kmp_free(tq->tq_curr_thunk);
-            else {
-                /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */
-                __kmp_init_lock( & tq->tq_freelist_lck );
-            }
-
-            tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) );
-            tq -> tq_curr_thunk_capacity = nproc;
-        }
-
-        if (in_parallel)
-            tq->tq_global_flags = TQF_RELEASE_WORKERS;
-    }
-
-    /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
-    /*      on some heuristics (e.g., depth of queue nesting?).            */
-
-    nslots = (in_parallel) ? (2 * nproc) : 1;
-
-    /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
-    /* jobs being executed by other threads, and one extra for taskq slot          */
-
-    nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
-
-    /* Only the root taskq gets a per-thread array of shareds.       */
-    /* The rest of the taskq's only get one copy of the shared vars. */
-
-    nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
-
-    /*  create overall queue data structure and its components that require allocation */
-
-    new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
-        sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
-
-    /*  rest of new_queue initializations  */
-
-    new_queue->tq_flags           = flags & TQF_INTERFACE_FLAGS;
+  /* find out whether this is a parallel taskq or serialized one. */
+  in_parallel = in_parallel_context(team);
 
+  if (!tq->tq_root) {
     if (in_parallel) {
-        new_queue->tq_tasknum_queuing  = 0;
-        new_queue->tq_tasknum_serving  = 0;
-        new_queue->tq_flags           |= TQF_PARALLEL_CONTEXT;
-    }
+      /* Vector ORDERED SECTION to taskq version */
+      th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
 
-    new_queue->tq_taskq_slot   = NULL;
-    new_queue->tq_nslots       = nslots;
-    new_queue->tq_hiwat        = HIGH_WATER_MARK (nslots);
-    new_queue->tq_nfull        = 0;
-    new_queue->tq_head         = 0;
-    new_queue->tq_tail         = 0;
-    new_queue->tq_loc          = loc;
-
-    if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
-        /* prepare to serve the first-queued task's ORDERED directive */
-        new_queue->tq_tasknum_serving = 1;
-
-        /* Vector ORDERED SECTION to taskq version */
-        th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
-
-        /* Vector ORDERED SECTION to taskq version */
-        th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
+      /* Vector ORDERED SECTION to taskq version */
+      th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
     }
 
-    /*  create a new thunk for the taskq_task in the new_queue  */
-    *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
-
-    new_taskq_thunk->th.th_shareds = *shareds;
-    new_taskq_thunk->th_task       = taskq_task;
-    new_taskq_thunk->th_flags      = new_queue->tq_flags | TQF_TASKQ_TASK;
-    new_taskq_thunk->th_status     = 0;
-
-    KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
-
-    /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */
-
-    /* insert the new task queue into the tree, but only after all fields initialized */
-
     if (in_parallel) {
-        if( ! tq->tq_root ) {
-            new_queue->tq.tq_parent   = NULL;
-            new_queue->tq_first_child = NULL;
-            new_queue->tq_next_child  = NULL;
-            new_queue->tq_prev_child  = NULL;
-            new_queue->tq_ref_count   = 1;
-            tq->tq_root = new_queue;
-        }
-        else {
-            curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
-            new_queue->tq.tq_parent   = curr_queue;
-            new_queue->tq_first_child = NULL;
-            new_queue->tq_prev_child  = NULL;
-            new_queue->tq_ref_count   = 1;      /* for this the thread that built the queue */
-
-            KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n",
-              __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
-
-            __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
-
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
-
-            new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child;
-
-            if (curr_queue->tq_first_child != NULL)
-                curr_queue->tq_first_child->tq_prev_child = new_queue;
-
-            curr_queue->tq_first_child = new_queue;
-
-            __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
-        }
-
-        /* set up thunk stack only after code that determines curr_queue above */
-        new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
-        tq->tq_curr_thunk[tid] = new_taskq_thunk;
-
-        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
-    }
-    else {
-        new_taskq_thunk->th_encl_thunk = 0;
-        new_queue->tq.tq_parent   = NULL;
-        new_queue->tq_first_child = NULL;
-        new_queue->tq_next_child  = NULL;
-        new_queue->tq_prev_child  = NULL;
-        new_queue->tq_ref_count   = 1;
+      // This shouldn't be a barrier region boundary, it will confuse the user.
+      /* Need the boundary to be at the end taskq instead. */
+      if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
+        /* Creating the active root queue, and we are not the master thread. */
+        /* The master thread below created the queue and tasks have been     */
+        /* enqueued, and the master thread released this barrier.  This      */
+        /* worker thread can now proceed and execute tasks.  See also the    */
+        /* TQF_RELEASE_WORKERS which is used to handle this case.            */
+        *shareds = (kmpc_shared_vars_t *)tq->tq_root->tq_shareds[tid].ai_data;
+
+        KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
+
+        return NULL;
+      }
+    }
+
+    /* master thread only executes this code */
+    if (tq->tq_curr_thunk_capacity < nproc) {
+      if (tq->tq_curr_thunk)
+        __kmp_free(tq->tq_curr_thunk);
+      else {
+        /* only need to do this once at outer level, i.e. when tq_curr_thunk is
+         * still NULL */
+        __kmp_init_lock(&tq->tq_freelist_lck);
+      }
+
+      tq->tq_curr_thunk =
+          (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *));
+      tq->tq_curr_thunk_capacity = nproc;
     }
 
-#ifdef KMP_DEBUG
-    KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
-    KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
+    if (in_parallel)
+      tq->tq_global_flags = TQF_RELEASE_WORKERS;
+  }
 
-    if (in_parallel) {
-        KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
+  /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
+  /*      on some heuristics (e.g., depth of queue nesting?).            */
+  nslots = (in_parallel) ? (2 * nproc) : 1;
+
+  /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
+  /* jobs being executed by other threads, and one extra for taskq slot */
+  nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1)
+                          : nslots + 2;
+
+  /* Only the root taskq gets a per-thread array of shareds.       */
+  /* The rest of the taskq's only get one copy of the shared vars. */
+  nshareds = (!tq->tq_root && in_parallel) ? nproc : 1;
+
+  /*  create overall queue data structure and its components that require
+   * allocation */
+  new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds,
+                                nproc, sizeof_thunk, sizeof_shareds,
+                                &new_taskq_thunk, global_tid);
+
+  /*  rest of new_queue initializations  */
+  new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
+
+  if (in_parallel) {
+    new_queue->tq_tasknum_queuing = 0;
+    new_queue->tq_tasknum_serving = 0;
+    new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
+  }
+
+  new_queue->tq_taskq_slot = NULL;
+  new_queue->tq_nslots = nslots;
+  new_queue->tq_hiwat = HIGH_WATER_MARK(nslots);
+  new_queue->tq_nfull = 0;
+  new_queue->tq_head = 0;
+  new_queue->tq_tail = 0;
+  new_queue->tq_loc = loc;
+
+  if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
+    /* prepare to serve the first-queued task's ORDERED directive */
+    new_queue->tq_tasknum_serving = 1;
+
+    /* Vector ORDERED SECTION to taskq version */
+    th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
+
+    /* Vector ORDERED SECTION to taskq version */
+    th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
+  }
+
+  /*  create a new thunk for the taskq_task in the new_queue  */
+  *shareds = (kmpc_shared_vars_t *)new_queue->tq_shareds[0].ai_data;
+
+  new_taskq_thunk->th.th_shareds = *shareds;
+  new_taskq_thunk->th_task = taskq_task;
+  new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
+  new_taskq_thunk->th_status = 0;
+
+  KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
+
+  // Make sure these inits complete before threads start using this queue
+  /* KMP_MB(); */ // (necessary?)
+
+  /* insert the new task queue into the tree, but only after all fields
+   * initialized */
+
+  if (in_parallel) {
+    if (!tq->tq_root) {
+      new_queue->tq.tq_parent = NULL;
+      new_queue->tq_first_child = NULL;
+      new_queue->tq_next_child = NULL;
+      new_queue->tq_prev_child = NULL;
+      new_queue->tq_ref_count = 1;
+      tq->tq_root = new_queue;
     } else {
-        KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
-    }
+      curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
+      new_queue->tq.tq_parent = curr_queue;
+      new_queue->tq_first_child = NULL;
+      new_queue->tq_prev_child = NULL;
+      new_queue->tq_ref_count =
+          1; /* for this the thread that built the queue */
+
+      KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__,
+                         global_tid, new_queue, new_queue->tq_ref_count));
+
+      __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid);
+
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
+
+      new_queue->tq_next_child =
+          (struct kmpc_task_queue_t *)curr_queue->tq_first_child;
+
+      if (curr_queue->tq_first_child != NULL)
+        curr_queue->tq_first_child->tq_prev_child = new_queue;
+
+      curr_queue->tq_first_child = new_queue;
+
+      __kmp_release_lock(&curr_queue->tq_link_lck, global_tid);
+    }
+
+    /* set up thunk stack only after code that determines curr_queue above */
+    new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
+    tq->tq_curr_thunk[tid] = new_taskq_thunk;
+
+    KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
+  } else {
+    new_taskq_thunk->th_encl_thunk = 0;
+    new_queue->tq.tq_parent = NULL;
+    new_queue->tq_first_child = NULL;
+    new_queue->tq_next_child = NULL;
+    new_queue->tq_prev_child = NULL;
+    new_queue->tq_ref_count = 1;
+  }
 
-    KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
+#ifdef KMP_DEBUG
+  KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
+  KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid));
 
-    if (in_parallel) {
-        KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
-    }
+  if (in_parallel) {
+    KF_TRACE(25,
+             ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
+  } else {
+    KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue,
+                  global_tid));
+  }
+
+  KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid));
+
+  if (in_parallel) {
+    KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
+  }
 #endif /* KMP_DEBUG */
 
-    if ( __kmp_env_consistency_check )
-        __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
+  if (__kmp_env_consistency_check)
+    __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc);
 
-    KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid));
 
-    return new_taskq_thunk;
+  return new_taskq_thunk;
 }
 
-
 /*  ends a taskq; last thread out destroys the queue  */
 
-void
-__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
-{
+void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid,
+                      kmpc_thunk_t *taskq_thunk) {
 #ifdef KMP_DEBUG
-    kmp_int32           i;
+  kmp_int32 i;
 #endif
-    kmp_taskq_t        *tq;
-    int                 in_parallel;
-    kmp_info_t         *th;
-    kmp_int32           is_outermost;
-    kmpc_task_queue_t  *queue;
-    kmpc_thunk_t       *thunk;
-    int                 nproc;
-
-    KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid));
-
-    tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
-    nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
-
-    /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */
-    queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
-
-    KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
-    is_outermost = (queue == tq->tq_root);
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+  kmp_taskq_t *tq;
+  int in_parallel;
+  kmp_info_t *th;
+  kmp_int32 is_outermost;
+  kmpc_task_queue_t *queue;
+  kmpc_thunk_t *thunk;
+  int nproc;
+
+  KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid));
+
+  tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+  nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc;
+
+  /* For the outermost taskq only, all but one thread will have taskq_thunk ==
+   * NULL */
+  queue = (taskq_thunk == NULL) ? tq->tq_root
+                                : taskq_thunk->th.th_shareds->sv_queue;
+
+  KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
+  is_outermost = (queue == tq->tq_root);
+  in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+
+  if (in_parallel) {
+    kmp_uint32 spins;
+
+    /* this is just a safeguard to release the waiting threads if */
+    /* the outermost taskq never queues a task                    */
+
+    if (is_outermost && (KMP_MASTER_GTID(global_tid))) {
+      if (tq->tq_global_flags & TQF_RELEASE_WORKERS) {
+        /* no lock needed, workers are still in spin mode */
+        tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
+
+        __kmp_end_split_barrier(bs_plain_barrier, global_tid);
+      }
+    }
+
+    /* keep dequeueing work until all tasks are queued and dequeued */
+
+    do {
+      /* wait until something is available to dequeue */
+      KMP_INIT_YIELD(spins);
+
+      while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) &&
+             (!__kmp_taskq_has_any_children(queue)) &&
+             (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) {
+        KMP_YIELD_WHEN(TRUE, spins);
+      }
+
+      /* check to see if we can execute tasks in the queue */
+      while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) &&
+             (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) {
+        KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk,
+                      queue, global_tid));
+        __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+      }
+
+      /* see if work found can be found in a descendant queue */
+      if ((__kmp_taskq_has_any_children(queue)) &&
+          (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
+              NULL) {
+
+        KF_TRACE(50,
+                 ("Stole thunk: %p in descendant queue: %p while waiting in "
+                  "queue: %p (%d)\n",
+                  thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+
+        __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+      }
+
+    } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) ||
+             (queue->tq_nfull != 0));
+
+    KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue,
+                  global_tid));
+
+    /* wait while all tasks are not finished and more work found
+       in descendant queues */
+
+    while ((!__kmp_taskq_tasks_finished(queue)) &&
+           (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) !=
+               NULL) {
 
-    if (in_parallel) {
-        kmp_uint32 spins;
-
-        /* this is just a safeguard to release the waiting threads if */
-        /* the outermost taskq never queues a task                    */
-
-        if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
-            if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
-                /* no lock needed, workers are still in spin mode */
-                tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
-
-                __kmp_end_split_barrier( bs_plain_barrier, global_tid );
-            }
-        }
-
-        /* keep dequeueing work until all tasks are queued and dequeued */
-
-        do {
-            /* wait until something is available to dequeue */
-            KMP_INIT_YIELD(spins);
-
-            while ( (queue->tq_nfull == 0)
-                 && (queue->tq_taskq_slot == NULL)
-                 && (! __kmp_taskq_has_any_children(queue) )
-                 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
-                  ) {
-                KMP_YIELD_WHEN( TRUE, spins );
-            }
-
-            /* check to see if we can execute tasks in the queue */
-            while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
-                 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
-                  ) {
-                KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
-                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
-            }
-
-            /* see if work found can be found in a descendant queue */
-            if ( (__kmp_taskq_has_any_children(queue))
-              && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
-               ) {
+      KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in "
+                    "queue: %p (%d)\n",
+                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
 
-                KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
-                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
+      __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+    }
 
-                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
-            }
+    KF_TRACE(50, ("No work found in descendent queues or all work finished in "
+                  "queue: %p (%d)\n",
+                  queue, global_tid));
 
-        } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
-               || (queue->tq_nfull != 0)
-                );
+    if (!is_outermost) {
+      /* need to return if NOWAIT present and not outermost taskq */
 
-        KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
+      if (queue->tq_flags & TQF_IS_NOWAIT) {
+        __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
+        queue->tq_ref_count--;
+        KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0);
+        __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid);
 
-        /* wait while all tasks are not finished and more work found
-           in descendant queues */
+        KE_TRACE(
+            10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
 
-        while ( (!__kmp_taskq_tasks_finished(queue))
-             && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
-              ) {
+        return;
+      }
 
-            KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
-                thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+      __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
 
-            __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
+      /* WAIT until all tasks are finished and no child queues exist before
+       * proceeding */
+      KMP_INIT_YIELD(spins);
+
+      while (!__kmp_taskq_tasks_finished(queue) ||
+             __kmp_taskq_has_any_children(queue)) {
+        thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue);
+
+        if (thunk != NULL) {
+          KF_TRACE(50,
+                   ("Stole thunk: %p in ancestor queue: %p while waiting in "
+                    "queue: %p (%d)\n",
+                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+          __kmp_execute_task_from_queue(tq, loc, global_tid, thunk,
+                                        in_parallel);
         }
 
-        KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
+        KMP_YIELD_WHEN(thunk == NULL, spins);
 
-        if (!is_outermost) {
-            /* need to return if NOWAIT present and not outermost taskq */
+        __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue);
+      }
 
-            if (queue->tq_flags & TQF_IS_NOWAIT) {
-                __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-                queue->tq_ref_count--;
-                KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
-                __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
-
-                KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
-
-                return;
-            }
-
-            __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
-
-            /* WAIT until all tasks are finished and no child queues exist before proceeding */
-            KMP_INIT_YIELD(spins);
-
-            while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
-                thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
+      __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
+      if (!(queue->tq_flags & TQF_DEALLOCATED)) {
+        queue->tq_flags |= TQF_DEALLOCATED;
+      }
+      __kmp_release_lock(&queue->tq_queue_lck, global_tid);
 
-                if (thunk != NULL) {
-                    KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
-                                  thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
-                    __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
-                }
+      /* only the allocating thread can deallocate the queue */
+      if (taskq_thunk != NULL) {
+        __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE);
+      }
 
-                KMP_YIELD_WHEN( thunk == NULL, spins );
+      KE_TRACE(
+          10,
+          ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n",
+           global_tid));
 
-                __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
-            }
-
-            __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-            if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
-                queue->tq_flags |= TQF_DEALLOCATED;
-            }
-            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
+      return;
+    }
 
-            /* only the allocating thread can deallocate the queue */
-            if (taskq_thunk != NULL) {
-                __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
-            }
+    // Outermost Queue: steal work from descendants until all tasks are finished
 
-            KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
+    KMP_INIT_YIELD(spins);
 
-            return;
-        }
+    while (!__kmp_taskq_tasks_finished(queue)) {
+      thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
 
-        /* Outermost Queue: steal work from descendants until all tasks are finished */
+      if (thunk != NULL) {
+        KF_TRACE(50,
+                 ("Stole thunk: %p in descendant queue: %p while waiting in "
+                  "queue: %p (%d)\n",
+                  thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
 
-        KMP_INIT_YIELD(spins);
+        __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+      }
 
-        while (!__kmp_taskq_tasks_finished(queue)) {
-            thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
+      KMP_YIELD_WHEN(thunk == NULL, spins);
+    }
 
-            if (thunk != NULL) {
-                KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
-                    thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
+    /* Need this barrier to prevent destruction of queue before threads have all
+     * executed above code */
+    /* This may need to be done earlier when NOWAIT is implemented for the
+     * outermost level */
 
-                __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
-            }
+    if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) {
+      /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here;   */
+      /* for right now, everybody waits, and the master thread destroys the  */
+      /* remaining queues.                                                   */
 
-            KMP_YIELD_WHEN( thunk == NULL, spins );
-        }
-
-        /* Need this barrier to prevent destruction of queue before threads have all executed above code */
-        /* This may need to be done earlier when NOWAIT is implemented for the outermost level */
+      __kmp_remove_all_child_taskq(tq, global_tid, queue);
 
-        if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
-            /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here;   */
-            /* for right now, everybody waits, and the master thread destroys the  */
-            /* remaining queues.                                                   */
-
-            __kmp_remove_all_child_taskq( tq, global_tid, queue );
-
-            /* Now destroy the root queue */
-            KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
-            KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+      /* Now destroy the root queue */
+      KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n",
+                     global_tid, queue));
+      KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
 
 #ifdef KMP_DEBUG
-            /*  the root queue entry  */
-            KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
-
-            /*  children must all be gone by now because of barrier above */
-            KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
-
-            for (i=0; i<nproc; i++) {
-                KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
-            }
+      /*  the root queue entry  */
+      KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) &&
+                       (queue->tq_next_child == NULL));
 
-            for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
+      /*  children must all be gone by now because of barrier above */
+      KMP_DEBUG_ASSERT(queue->tq_first_child == NULL);
 
-            KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
+      for (i = 0; i < nproc; i++) {
+        KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
+      }
 
-            for (i = 0; i < nproc; i++) {
-                KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
-            }
+      for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL;
+           i++, thunk = thunk->th.th_next_free)
+        ;
+
+      KMP_DEBUG_ASSERT(i ==
+                       queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
+
+      for (i = 0; i < nproc; i++) {
+        KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]);
+      }
 #endif
-            /*  unlink the root queue entry  */
-            tq -> tq_root =  NULL;
+      /*  unlink the root queue entry  */
+      tq->tq_root = NULL;
 
-            /*  release storage for root queue entry  */
-            KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
+      /*  release storage for root queue entry  */
+      KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue,
+                    global_tid));
 
-            queue->tq_flags |= TQF_DEALLOCATED;
-            __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
+      queue->tq_flags |= TQF_DEALLOCATED;
+      __kmp_free_taskq(tq, queue, in_parallel, global_tid);
 
-            KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
+      KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid));
 
-            /* release the workers now that the data structures are up to date */
-            __kmp_end_split_barrier( bs_plain_barrier, global_tid );
-        }
+      /* release the workers now that the data structures are up to date */
+      __kmp_end_split_barrier(bs_plain_barrier, global_tid);
+    }
 
-        th = __kmp_threads[ global_tid ];
+    th = __kmp_threads[global_tid];
 
-        /* Reset ORDERED SECTION to parallel version */
-        th->th.th_dispatch->th_deo_fcn = 0;
+    /* Reset ORDERED SECTION to parallel version */
+    th->th.th_dispatch->th_deo_fcn = 0;
 
-        /* Reset ORDERED SECTION to parallel version */
-        th->th.th_dispatch->th_dxo_fcn = 0;
-    }
-    else {
-        /* in serial execution context, dequeue the last task  */
-        /* and execute it, if there were any tasks encountered */
+    /* Reset ORDERED SECTION to parallel version */
+    th->th.th_dispatch->th_dxo_fcn = 0;
+  } else {
+    /* in serial execution context, dequeue the last task  */
+    /* and execute it, if there were any tasks encountered */
 
-        if (queue->tq_nfull > 0) {
-            KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
+    if (queue->tq_nfull > 0) {
+      KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
 
-            thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
+      thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
 
-            if (queue->tq_flags & TQF_IS_LAST_TASK) {
-                /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
-                /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
-                /* instrumentation does copy-out.                                  */
+      if (queue->tq_flags & TQF_IS_LAST_TASK) {
+        /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
+        /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
+        /* instrumentation does copy-out.                                  */
 
-                /* no need for test_then_or call since already locked */
-                thunk->th_flags |= TQF_IS_LAST_TASK;
-            }
+        /* no need for test_then_or call since already locked */
+        thunk->th_flags |= TQF_IS_LAST_TASK;
+      }
 
-            KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
+      KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
+                    thunk, queue));
 
-            __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
-        }
+      __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel);
+    }
 
-        /* destroy the unattached serial queue now that there is no more work to do */
-        KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
-        KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+    // destroy the unattached serial queue now that there is no more work to do
+    KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n",
+                   queue, global_tid));
+    KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
 
 #ifdef KMP_DEBUG
-        i = 0;
-        for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
-            ++i;
-        KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
+    i = 0;
+    for (thunk = queue->tq_free_thunks; thunk != NULL;
+         thunk = thunk->th.th_next_free)
+      ++i;
+    KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1);
 #endif
-        /*  release storage for unattached serial queue  */
-        KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
-
-        queue->tq_flags |= TQF_DEALLOCATED;
-        __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
-    }
+    /*  release storage for unattached serial queue  */
+    KF_TRACE(50,
+             ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
+
+    queue->tq_flags |= TQF_DEALLOCATED;
+    __kmp_free_taskq(tq, queue, in_parallel, global_tid);
+  }
 
-    KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid));
 }
 
 /*  Enqueues a task for thunk previously created by __kmpc_task_buffer. */
 /*  Returns nonzero if just filled up queue  */
 
-kmp_int32
-__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
-{
-    kmp_int32          ret;
-    kmpc_task_queue_t *queue;
-    int                in_parallel;
-    kmp_taskq_t       *tq;
+kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) {
+  kmp_int32 ret;
+  kmpc_task_queue_t *queue;
+  int in_parallel;
+  kmp_taskq_t *tq;
 
-    KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid));
 
-    KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));  /*  thunk->th_task is a regular task  */
+  KMP_DEBUG_ASSERT(!(thunk->th_flags &
+                     TQF_TASKQ_TASK)); /*  thunk->th_task is a regular task  */
 
-    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
-    queue       = thunk->th.th_shareds->sv_queue;
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+  tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+  queue = thunk->th.th_shareds->sv_queue;
+  in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
 
-    if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
-        thunk->th_tasknum = ++queue->tq_tasknum_queuing;
+  if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
+    thunk->th_tasknum = ++queue->tq_tasknum_queuing;
 
-    /* For serial execution dequeue the preceding task and execute it, if one exists */
-    /* This cannot be the last task.  That one is handled in __kmpc_end_taskq */
+  /* For serial execution dequeue the preceding task and execute it, if one
+   * exists */
+  /* This cannot be the last task.  That one is handled in __kmpc_end_taskq */
 
-    if (!in_parallel && queue->tq_nfull > 0) {
-        kmpc_thunk_t *prev_thunk;
+  if (!in_parallel && queue->tq_nfull > 0) {
+    kmpc_thunk_t *prev_thunk;
 
-        KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
+    KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
 
-        prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
+    prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
 
-        KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
+    KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid,
+                  prev_thunk, queue));
 
-        __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
-    }
+    __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel);
+  }
 
-    /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private    */
-    /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the     */
-    /* task queue is not full and allocates a thunk (which is then passed to         */
-    /* __kmpc_task()).  So, the enqueue below should never fail due to a full queue. */
+  /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private
+     variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the
+     task queue is not full and allocates a thunk (which is then passed to
+     __kmpc_task()).  So, the enqueue below should never fail due to a full
+     queue. */
 
-    KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
-    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+  KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
+  KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
 
-    ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
+  ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel);
 
-    KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
-    KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
+  KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
+  KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid));
 
-    KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid));
 
-    return ret;
+  return ret;
 }
 
 /*  enqueues a taskq_task for thunk previously created by __kmpc_taskq  */
 /*  this should never be called unless in a parallel context            */
 
-void
-__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
-{
-    kmpc_task_queue_t *queue;
-    kmp_taskq_t       *tq  = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
-    int                tid = __kmp_tid_from_gtid( global_tid );
+void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk,
+                       kmp_int32 status) {
+  kmpc_task_queue_t *queue;
+  kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+  int tid = __kmp_tid_from_gtid(global_tid);
 
-    KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid));
-    KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
-    KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
+  KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid));
+  KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
+  KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid));
 
-    queue = thunk->th.th_shareds->sv_queue;
+  queue = thunk->th.th_shareds->sv_queue;
 
-    if ( __kmp_env_consistency_check )
-        __kmp_pop_workshare( global_tid, ct_taskq, loc );
+  if (__kmp_env_consistency_check)
+    __kmp_pop_workshare(global_tid, ct_taskq, loc);
 
-    /*  thunk->th_task is the taskq_task  */
-    KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
+  /*  thunk->th_task is the taskq_task  */
+  KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK);
 
-    /*  not supposed to call __kmpc_taskq_task if it's already enqueued  */
-    KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
+  /*  not supposed to call __kmpc_taskq_task if it's already enqueued  */
+  KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL);
 
-    /* dequeue taskq thunk from curr_thunk stack */
-    tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
-    thunk->th_encl_thunk = NULL;
+  /* dequeue taskq thunk from curr_thunk stack */
+  tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+  thunk->th_encl_thunk = NULL;
 
-    KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
+  KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
 
-    thunk->th_status = status;
+  thunk->th_status = status;
 
-    KMP_MB();  /*  flush thunk->th_status before taskq_task enqueued to avoid race condition  */
+  // Flush thunk->th_status before taskq_task enqueued to avoid race condition
+  KMP_MB();
 
-    /*  enqueue taskq_task in thunk into special slot in queue     */
-    /* GEH - probably don't need to lock taskq slot since only one */
-    /*       thread enqueues & already a lock set at dequeue point */
+  /* enqueue taskq_task in thunk into special slot in queue     */
+  /* GEH - probably don't need to lock taskq slot since only one */
+  /*       thread enqueues & already a lock set at dequeue point */
 
-    queue->tq_taskq_slot = thunk;
+  queue->tq_taskq_slot = thunk;
 
-    KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid));
 }
 
-/*  ends a taskq_task; done generating tasks  */
+/* ends a taskq_task; done generating tasks  */
 
-void
-__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
-{
-    kmp_taskq_t       *tq;
-    kmpc_task_queue_t *queue;
-    int                in_parallel;
-    int                tid;
+void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid,
+                           kmpc_thunk_t *thunk) {
+  kmp_taskq_t *tq;
+  kmpc_task_queue_t *queue;
+  int in_parallel;
+  int tid;
 
-    KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
 
-    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
-    queue       = thunk->th.th_shareds->sv_queue;
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
-    tid         = __kmp_tid_from_gtid( global_tid );
+  tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+  queue = thunk->th.th_shareds->sv_queue;
+  in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+  tid = __kmp_tid_from_gtid(global_tid);
 
-    if ( __kmp_env_consistency_check )
-        __kmp_pop_workshare( global_tid, ct_taskq, loc );
+  if (__kmp_env_consistency_check)
+    __kmp_pop_workshare(global_tid, ct_taskq, loc);
 
-    if (in_parallel) {
-#if KMP_ARCH_X86 || \
-    KMP_ARCH_X86_64
+  if (in_parallel) {
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 
-        KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
+    KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_ALL_TASKS_QUEUED);
 #else
-        {
-            __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
+    {
+      __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
 
-            KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                       /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
+      // Make sure data structures are in consistent state before querying them
+      // Seems to work without this for digital/alpha, needed for IBM/RS6000
+      KMP_MB();
 
-            queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
-
-            __kmp_release_lock(& queue->tq_queue_lck, global_tid);
-        }
-#endif
+      queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
+      __kmp_release_lock(&queue->tq_queue_lck, global_tid);
     }
+#endif
+  }
 
-    if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
-        /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */
-        /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task      */
-        /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.  When         */
-        /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so    */
-        /* we know the last one in the queue is the lastprivate task.  Mark the queue    */
-        /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that     */
-        /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */
-        /* bit signals the instrumented code to do copy-outs after execution).           */
-
-        if (! in_parallel) {
-            /* No synchronization needed for serial context */
-            queue->tq_flags |= TQF_IS_LAST_TASK;
-        }
-        else {
-#if KMP_ARCH_X86 || \
-    KMP_ARCH_X86_64
+  if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
+    /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in
+       the queue if TQF_IS_LASTPRIVATE so we can positively identify that last
+       task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.
+       When __kmpc_end_taskq_task() is called we are done generating all the
+       tasks, so we know the last one in the queue is the lastprivate task.
+       Mark the queue as having gotten to this state via tq_flags &
+       TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags &
+       TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do
+       copy-outs after execution). */
+    if (!in_parallel) {
+      /* No synchronization needed for serial context */
+      queue->tq_flags |= TQF_IS_LAST_TASK;
+    } else {
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 
-            KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
+      KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_IS_LAST_TASK);
 #else
-            {
-                __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
-
-                KMP_MB();  /* make sure data structures are in consistent state before querying them */
-                           /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
+      {
+        __kmp_acquire_lock(&queue->tq_queue_lck, global_tid);
 
-                queue->tq_flags |= TQF_IS_LAST_TASK;
+        // Make sure data structures in consistent state before querying them
+        // Seems to work without this for digital/alpha, needed for IBM/RS6000
+        KMP_MB();
 
-                __kmp_release_lock(& queue->tq_queue_lck, global_tid);
-            }
+        queue->tq_flags |= TQF_IS_LAST_TASK;
+        __kmp_release_lock(&queue->tq_queue_lck, global_tid);
+      }
 #endif
-            /* to prevent race condition where last task is dequeued but */
-            /* flag isn't visible yet (not sure about this)              */
-            KMP_MB();
-        }
+      /* to prevent race condition where last task is dequeued but */
+      /* flag isn't visible yet (not sure about this)              */
+      KMP_MB();
     }
+  }
 
-    /* dequeue taskq thunk from curr_thunk stack */
-    if (in_parallel) {
-        tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
-        thunk->th_encl_thunk = NULL;
+  /* dequeue taskq thunk from curr_thunk stack */
+  if (in_parallel) {
+    tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
+    thunk->th_encl_thunk = NULL;
 
-        KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
-    }
+    KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid));
+  }
 
-    KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
 }
 
 /* returns thunk for a regular task based on taskq_thunk              */
 /* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK)  */
 
-kmpc_thunk_t *
-__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
-{
-    kmp_taskq_t       *tq;
-    kmpc_task_queue_t *queue;
-    kmpc_thunk_t      *new_thunk;
-    int                in_parallel;
+kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid,
+                                 kmpc_thunk_t *taskq_thunk, kmpc_task_t task) {
+  kmp_taskq_t *tq;
+  kmpc_task_queue_t *queue;
+  kmpc_thunk_t *new_thunk;
+  int in_parallel;
 
-    KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid));
 
-    KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);  /*  taskq_thunk->th_task is the taskq_task  */
+  KMP_DEBUG_ASSERT(
+      taskq_thunk->th_flags &
+      TQF_TASKQ_TASK); /*  taskq_thunk->th_task is the taskq_task  */
 
-    tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
-    queue       = taskq_thunk->th.th_shareds->sv_queue;
-    in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
+  tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq;
+  queue = taskq_thunk->th.th_shareds->sv_queue;
+  in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
 
-    /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private */
-    /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the  */
-    /* task queue is not full and allocates a thunk (which is then passed to      */
-    /* __kmpc_task()).  So, we can pre-allocate a thunk here assuming it will be  */
-    /* the next to be enqueued in __kmpc_task().                                  */
+  /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private
+     variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the
+     task queue is not full and allocates a thunk (which is then passed to
+     __kmpc_task()).  So, we can pre-allocate a thunk here assuming it will be
+     the next to be enqueued in __kmpc_task(). */
 
-    new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
-    new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
-    new_thunk->th_encl_thunk = NULL;
-    new_thunk->th_task       = task;
+  new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid);
+  new_thunk->th.th_shareds = (kmpc_shared_vars_t *)queue->tq_shareds[0].ai_data;
+  new_thunk->th_encl_thunk = NULL;
+  new_thunk->th_task = task;
 
-    /* GEH - shouldn't need to lock the read of tq_flags here */
-    new_thunk->th_flags      = queue->tq_flags & TQF_INTERFACE_FLAGS;
+  /* GEH - shouldn't need to lock the read of tq_flags here */
+  new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
 
-    new_thunk->th_status     = 0;
+  new_thunk->th_status = 0;
 
-    KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
+  KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK));
 
-    KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
-    KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
+  KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
+  KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid));
 
-    KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid));
+  KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid));
 
-    return new_thunk;
+  return new_thunk;
 }
-
-/* --------------------------------------------------------------------------- */

Modified: openmp/trunk/runtime/src/kmp_threadprivate.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_threadprivate.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_threadprivate.cpp (original)
+++ openmp/trunk/runtime/src/kmp_threadprivate.cpp Fri May 12 13:01:32 2017
@@ -14,502 +14,476 @@
 
 
 #include "kmp.h"
-#include "kmp_itt.h"
 #include "kmp_i18n.h"
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
+#include "kmp_itt.h"
 
 #define USE_CHECKS_COMMON
 
-#define KMP_INLINE_SUBR         1
-
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
-void
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
-struct private_common *
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
+#define KMP_INLINE_SUBR 1
 
-struct shared_table     __kmp_threadprivate_d_table;
+void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
+                                           void *data_addr, size_t pc_size);
+struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
+                                                void *data_addr,
+                                                size_t pc_size);
 
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
+struct shared_table __kmp_threadprivate_d_table;
 
 static
 #ifdef KMP_INLINE_SUBR
-__forceinline
+    __forceinline
 #endif
-struct private_common *
-__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr )
+    struct private_common *
+    __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
+                                         void *pc_addr)
 
 {
-    struct private_common *tn;
+  struct private_common *tn;
 
 #ifdef KMP_TASK_COMMON_DEBUG
-    KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n",
-                    gtid, pc_addr ) );
-    dump_list();
+  KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
+                "address %p\n",
+                gtid, pc_addr));
+  dump_list();
 #endif
 
-    for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) {
-        if (tn->gbl_addr == pc_addr) {
+  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
+    if (tn->gbl_addr == pc_addr) {
 #ifdef KMP_TASK_COMMON_DEBUG
-            KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n",
-                            gtid, pc_addr ) );
+      KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
+                    "node %p on list\n",
+                    gtid, pc_addr));
 #endif
-            return tn;
-        }
+      return tn;
     }
-    return 0;
+  }
+  return 0;
 }
 
 static
 #ifdef KMP_INLINE_SUBR
-__forceinline
+    __forceinline
 #endif
-struct shared_common *
-__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr )
-{
-    struct shared_common *tn;
+    struct shared_common *
+    __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
+                                  void *pc_addr) {
+  struct shared_common *tn;
 
-    for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) {
-        if (tn->gbl_addr == pc_addr) {
+  for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
+    if (tn->gbl_addr == pc_addr) {
 #ifdef KMP_TASK_COMMON_DEBUG
-            KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
-                            gtid, pc_addr ) );
+      KC_TRACE(
+          10,
+          ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
+           gtid, pc_addr));
 #endif
-            return tn;
-        }
-    }
-    return 0;
-}
-
-
-/*
- *      Create a template for the data initialized storage.
- *      Either the template is NULL indicating zero fill,
- *      or the template is a copy of the original data.
- */
-
-static struct private_data *
-__kmp_init_common_data( void *pc_addr, size_t pc_size )
-{
-    struct private_data *d;
-    size_t       i;
-    char        *p;
-
-    d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) );
-/*
-    d->data = 0;  // AC: commented out because __kmp_allocate zeroes the memory
-    d->next = 0;
-*/
-    d->size = pc_size;
-    d->more = 1;
-
-    p = (char*)pc_addr;
-
-    for (i = pc_size;  i > 0; --i) {
-        if (*p++ != '\0') {
-            d->data = __kmp_allocate( pc_size );
-            KMP_MEMCPY( d->data, pc_addr, pc_size );
-            break;
-        }
+      return tn;
     }
-
-    return d;
+  }
+  return 0;
 }
 
-/*
- *      Initialize the data area from the template.
- */
-
-static void
-__kmp_copy_common_data( void *pc_addr, struct private_data *d )
-{
-    char *addr = (char *) pc_addr;
-    int   i, offset;
-
-    for (offset = 0; d != 0; d = d->next) {
-        for (i = d->more; i > 0; --i) {
-            if (d->data == 0)
-                memset( & addr[ offset ], '\0', d->size );
-            else
-                KMP_MEMCPY( & addr[ offset ], d->data, d->size );
-            offset += d->size;
-        }
+// Create a template for the data initialized storage. Either the template is
+// NULL indicating zero fill, or the template is a copy of the original data.
+static struct private_data *__kmp_init_common_data(void *pc_addr,
+                                                   size_t pc_size) {
+  struct private_data *d;
+  size_t i;
+  char *p;
+
+  d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
+  /*
+      d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
+     memory
+      d->next = 0;
+  */
+  d->size = pc_size;
+  d->more = 1;
+
+  p = (char *)pc_addr;
+
+  for (i = pc_size; i > 0; --i) {
+    if (*p++ != '\0') {
+      d->data = __kmp_allocate(pc_size);
+      KMP_MEMCPY(d->data, pc_addr, pc_size);
+      break;
+    }
+  }
+
+  return d;
+}
+
+// Initialize the data area from the template.
+static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
+  char *addr = (char *)pc_addr;
+  int i, offset;
+
+  for (offset = 0; d != 0; d = d->next) {
+    for (i = d->more; i > 0; --i) {
+      if (d->data == 0)
+        memset(&addr[offset], '\0', d->size);
+      else
+        KMP_MEMCPY(&addr[offset], d->data, d->size);
+      offset += d->size;
     }
+  }
 }
 
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
-void
-__kmp_common_initialize( void )
-{
-    if( ! TCR_4(__kmp_init_common) ) {
-        int q;
+void __kmp_common_initialize(void) {
+  if (!TCR_4(__kmp_init_common)) {
+    int q;
 #ifdef KMP_DEBUG
-        int gtid;
+    int gtid;
 #endif
 
-        __kmp_threadpriv_cache_list = NULL;
+    __kmp_threadpriv_cache_list = NULL;
 
 #ifdef KMP_DEBUG
-        /* verify the uber masters were initialized */
-        for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ )
-            if( __kmp_root[gtid] ) {
-                KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread );
-                for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q)
-                    KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] );
-/*                    __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/
-            }
+    /* verify the uber masters were initialized */
+    for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
+      if (__kmp_root[gtid]) {
+        KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
+        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
+          KMP_DEBUG_ASSERT(
+              !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
+        /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
+         * th.th_pri_common -> data[ q ] = 0;*/
+      }
 #endif /* KMP_DEBUG */
 
-        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
-            __kmp_threadprivate_d_table.data[ q ] = 0;
+    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
+      __kmp_threadprivate_d_table.data[q] = 0;
 
-        TCW_4(__kmp_init_common, TRUE);
-    }
+    TCW_4(__kmp_init_common, TRUE);
+  }
 }
 
 /* Call all destructors for threadprivate data belonging to all threads.
    Currently unused! */
-void
-__kmp_common_destroy( void )
-{
-    if( TCR_4(__kmp_init_common) ) {
-        int q;
-
-        TCW_4(__kmp_init_common, FALSE);
-
-        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
-            int gtid;
-            struct private_common *tn;
-            struct shared_common  *d_tn;
-
-            /*  C++ destructors need to be called once per thread before exiting  */
-            /*  don't call destructors for master thread though unless we used copy constructor */
-
-            for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) {
-                if (d_tn->is_vec) {
-                    if (d_tn->dt.dtorv != 0) {
-                        for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
-                            if( __kmp_threads[gtid] ) {
-                                if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
-                                                         (! KMP_UBER_GTID (gtid)) ) {
-                                    tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common,
-                                                                               gtid, d_tn->gbl_addr );
-                                    if (tn) {
-                                        (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len);
-                                    }
-                                }
-                            }
-                        }
-                        if (d_tn->obj_init != 0) {
-                            (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len);
-                        }
-                    }
-                } else {
-                    if (d_tn->dt.dtor != 0) {
-                        for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
-                            if( __kmp_threads[gtid] ) {
-                                if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
-                                                         (! KMP_UBER_GTID (gtid)) ) {
-                                    tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common,
-                                                                               gtid, d_tn->gbl_addr );
-                                    if (tn) {
-                                        (*d_tn->dt.dtor) (tn->par_addr);
-                                    }
-                                }
-                            }
-                        }
-                        if (d_tn->obj_init != 0) {
-                            (*d_tn->dt.dtor) (d_tn->obj_init);
-                        }
-                    }
+void __kmp_common_destroy(void) {
+  if (TCR_4(__kmp_init_common)) {
+    int q;
+
+    TCW_4(__kmp_init_common, FALSE);
+
+    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
+      int gtid;
+      struct private_common *tn;
+      struct shared_common *d_tn;
+
+      /* C++ destructors need to be called once per thread before exiting.
+         Don't call destructors for master thread though unless we used copy
+         constructor */
+
+      for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
+           d_tn = d_tn->next) {
+        if (d_tn->is_vec) {
+          if (d_tn->dt.dtorv != 0) {
+            for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
+              if (__kmp_threads[gtid]) {
+                if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
+                                       : (!KMP_UBER_GTID(gtid))) {
+                  tn = __kmp_threadprivate_find_task_common(
+                      __kmp_threads[gtid]->th.th_pri_common, gtid,
+                      d_tn->gbl_addr);
+                  if (tn) {
+                    (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
+                  }
+                }
+              }
+            }
+            if (d_tn->obj_init != 0) {
+              (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
+            }
+          }
+        } else {
+          if (d_tn->dt.dtor != 0) {
+            for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
+              if (__kmp_threads[gtid]) {
+                if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
+                                       : (!KMP_UBER_GTID(gtid))) {
+                  tn = __kmp_threadprivate_find_task_common(
+                      __kmp_threads[gtid]->th.th_pri_common, gtid,
+                      d_tn->gbl_addr);
+                  if (tn) {
+                    (*d_tn->dt.dtor)(tn->par_addr);
+                  }
                 }
+              }
             }
-            __kmp_threadprivate_d_table.data[ q ] = 0;
+            if (d_tn->obj_init != 0) {
+              (*d_tn->dt.dtor)(d_tn->obj_init);
+            }
+          }
         }
+      }
+      __kmp_threadprivate_d_table.data[q] = 0;
     }
+  }
 }
 
 /* Call all destructors for threadprivate data belonging to this thread */
-void
-__kmp_common_destroy_gtid( int gtid )
-{
-    struct private_common *tn;
-    struct shared_common *d_tn;
-
-    KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) );
-    if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) :
-                             (! KMP_UBER_GTID (gtid)) ) {
-
-        if( TCR_4(__kmp_init_common) ) {
-
-            /* Cannot do this here since not all threads have destroyed their data */
-            /* TCW_4(__kmp_init_common, FALSE); */
-
-            for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) {
-
-                d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
-                                                      gtid, tn->gbl_addr );
-
-                KMP_DEBUG_ASSERT( d_tn );
-
-                if (d_tn->is_vec) {
-                    if (d_tn->dt.dtorv != 0) {
-                        (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len);
-                    }
-                    if (d_tn->obj_init != 0) {
-                        (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len);
-                    }
-                } else {
-                    if (d_tn->dt.dtor != 0) {
-                        (void) (*d_tn->dt.dtor) (tn->par_addr);
-                    }
-                    if (d_tn->obj_init != 0) {
-                        (void) (*d_tn->dt.dtor) (d_tn->obj_init);
-                    }
-                }
-            }
-            KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n",
-                           gtid ) );
-        }
+void __kmp_common_destroy_gtid(int gtid) {
+  struct private_common *tn;
+  struct shared_common *d_tn;
+
+  KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
+  if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
+
+    if (TCR_4(__kmp_init_common)) {
+
+      /* Cannot do this here since not all threads have destroyed their data */
+      /* TCW_4(__kmp_init_common, FALSE); */
+
+      for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
+
+        d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
+                                             tn->gbl_addr);
+
+        KMP_DEBUG_ASSERT(d_tn);
+
+        if (d_tn->is_vec) {
+          if (d_tn->dt.dtorv != 0) {
+            (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
+          }
+          if (d_tn->obj_init != 0) {
+            (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
+          }
+        } else {
+          if (d_tn->dt.dtor != 0) {
+            (void)(*d_tn->dt.dtor)(tn->par_addr);
+          }
+          if (d_tn->obj_init != 0) {
+            (void)(*d_tn->dt.dtor)(d_tn->obj_init);
+          }
+        }
+      }
+      KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
+                    "complete\n",
+                    gtid));
     }
+  }
 }
 
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
-
 #ifdef KMP_TASK_COMMON_DEBUG
-static void
-dump_list( void )
-{
-    int p, q;
+static void dump_list(void) {
+  int p, q;
 
-    for (p = 0; p < __kmp_all_nth; ++p) {
-        if( !__kmp_threads[p] ) continue;
-        for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
-            if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) {
-                struct private_common *tn;
-
-                KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) );
-
-                for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next)                 {
-                    KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
-                                    tn->gbl_addr, tn->par_addr ) );
-                }
-            }
+  for (p = 0; p < __kmp_all_nth; ++p) {
+    if (!__kmp_threads[p])
+      continue;
+    for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
+      if (__kmp_threads[p]->th.th_pri_common->data[q]) {
+        struct private_common *tn;
+
+        KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
+
+        for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
+             tn = tn->next) {
+          KC_TRACE(10,
+                   ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
+                    tn->gbl_addr, tn->par_addr));
         }
+      }
     }
+  }
 }
 #endif /* KMP_TASK_COMMON_DEBUG */
 
+// NOTE: this routine is to be called only from the serial part of the program.
+void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
+                                           void *data_addr, size_t pc_size) {
+  struct shared_common **lnk_tn, *d_tn;
+  KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
+                   __kmp_threads[gtid]->th.th_root->r.r_active == 0);
 
-/*
- * NOTE: this routine is to be called only from the serial part of the program.
- */
+  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
+                                       pc_addr);
 
-void
-kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size )
-{
-    struct shared_common **lnk_tn, *d_tn;
-    KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] &&
-            __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 );
-
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
-                                          gtid, pc_addr );
-
-    if (d_tn == 0) {
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
-
-        d_tn->gbl_addr = pc_addr;
-        d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size );
-/*
-        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory
-        d_tn->ct.ctor = 0;
-        d_tn->cct.cctor = 0;;
-        d_tn->dt.dtor = 0;
-        d_tn->is_vec = FALSE;
-        d_tn->vec_len = 0L;
-*/
-        d_tn->cmn_size = pc_size;
-
-        __kmp_acquire_lock( &__kmp_global_lock, gtid );
-
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]);
-
-        d_tn->next = *lnk_tn;
-        *lnk_tn = d_tn;
-
-        __kmp_release_lock( &__kmp_global_lock, gtid );
-    }
-}
+  if (d_tn == 0) {
+    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
 
-struct private_common *
-kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size )
-{
-    struct private_common *tn, **tt;
-    struct shared_common  *d_tn;
-
-    /* +++++++++ START OF CRITICAL SECTION +++++++++ */
+    d_tn->gbl_addr = pc_addr;
+    d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
+    /*
+            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
+       zeroes the memory
+            d_tn->ct.ctor = 0;
+            d_tn->cct.cctor = 0;;
+            d_tn->dt.dtor = 0;
+            d_tn->is_vec = FALSE;
+            d_tn->vec_len = 0L;
+    */
+    d_tn->cmn_size = pc_size;
 
-    __kmp_acquire_lock( & __kmp_global_lock, gtid );
+    __kmp_acquire_lock(&__kmp_global_lock, gtid);
 
-    tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) );
+    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
 
-    tn->gbl_addr = pc_addr;
+    d_tn->next = *lnk_tn;
+    *lnk_tn = d_tn;
 
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
-                                          gtid, pc_addr );     /* Only the MASTER data table exists. */
+    __kmp_release_lock(&__kmp_global_lock, gtid);
+  }
+}
 
-    if (d_tn != 0) {
-        /* This threadprivate variable has already been seen. */
+struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
+                                                void *data_addr,
+                                                size_t pc_size) {
+  struct private_common *tn, **tt;
+  struct shared_common *d_tn;
 
-        if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) {
-            d_tn->cmn_size = pc_size;
+  /* +++++++++ START OF CRITICAL SECTION +++++++++ */
+  __kmp_acquire_lock(&__kmp_global_lock, gtid);
 
-            if (d_tn->is_vec) {
-                if (d_tn->ct.ctorv != 0) {
-                    /* Construct from scratch so no prototype exists */
-                    d_tn->obj_init = 0;
-                }
-                else if (d_tn->cct.cctorv != 0) {
-                    /* Now data initialize the prototype since it was previously registered */
-                    d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size );
-                    (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len);
-                }
-                else {
-                    d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size );
-                }
-            } else {
-                if (d_tn->ct.ctor != 0) {
-                    /* Construct from scratch so no prototype exists */
-                    d_tn->obj_init = 0;
-                }
-                else if (d_tn->cct.cctor != 0) {
-                    /* Now data initialize the prototype since it was previously registered */
-                    d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size );
-                    (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr);
-                }
-                else {
-                    d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size );
-                }
-            }
-        }
-    }
-    else {
-        struct shared_common **lnk_tn;
+  tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
 
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
-        d_tn->gbl_addr = pc_addr;
-        d_tn->cmn_size = pc_size;
-        d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size );
-/*
-        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory
-        d_tn->ct.ctor = 0;
-        d_tn->cct.cctor = 0;
-        d_tn->dt.dtor = 0;
-        d_tn->is_vec = FALSE;
-        d_tn->vec_len = 0L;
-*/
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]);
+  tn->gbl_addr = pc_addr;
 
-        d_tn->next = *lnk_tn;
-        *lnk_tn = d_tn;
-    }
+  d_tn = __kmp_find_shared_task_common(
+      &__kmp_threadprivate_d_table, gtid,
+      pc_addr); /* Only the MASTER data table exists. */
 
-    tn->cmn_size = d_tn->cmn_size;
+  if (d_tn != 0) {
+    /* This threadprivate variable has already been seen. */
 
-    if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) {
-        tn->par_addr = (void *) pc_addr;
-    }
-    else {
-        tn->par_addr = (void *) __kmp_allocate( tn->cmn_size );
-    }
+    if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
+      d_tn->cmn_size = pc_size;
 
-    __kmp_release_lock( & __kmp_global_lock, gtid );
+      if (d_tn->is_vec) {
+        if (d_tn->ct.ctorv != 0) {
+          /* Construct from scratch so no prototype exists */
+          d_tn->obj_init = 0;
+        } else if (d_tn->cct.cctorv != 0) {
+          /* Now data initialize the prototype since it was previously
+           * registered */
+          d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
+          (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
+        } else {
+          d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
+        }
+      } else {
+        if (d_tn->ct.ctor != 0) {
+          /* Construct from scratch so no prototype exists */
+          d_tn->obj_init = 0;
+        } else if (d_tn->cct.cctor != 0) {
+          /* Now data initialize the prototype since it was previously
+             registered */
+          d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
+          (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
+        } else {
+          d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
+        }
+      }
+    }
+  } else {
+    struct shared_common **lnk_tn;
+
+    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
+    d_tn->gbl_addr = pc_addr;
+    d_tn->cmn_size = pc_size;
+    d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
+    /*
+            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
+       zeroes the memory
+            d_tn->ct.ctor = 0;
+            d_tn->cct.cctor = 0;
+            d_tn->dt.dtor = 0;
+            d_tn->is_vec = FALSE;
+            d_tn->vec_len = 0L;
+    */
+    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
+
+    d_tn->next = *lnk_tn;
+    *lnk_tn = d_tn;
+  }
+
+  tn->cmn_size = d_tn->cmn_size;
+
+  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
+    tn->par_addr = (void *)pc_addr;
+  } else {
+    tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
+  }
 
-    /* +++++++++ END OF CRITICAL SECTION +++++++++ */
+  __kmp_release_lock(&__kmp_global_lock, gtid);
+/* +++++++++ END OF CRITICAL SECTION +++++++++ */
 
 #ifdef USE_CHECKS_COMMON
-        if (pc_size > d_tn->cmn_size) {
-            KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%"
-                            KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n",
-                            pc_addr, pc_size, d_tn->cmn_size ) );
-            KMP_FATAL( TPCommonBlocksInconsist );
-        }
+  if (pc_size > d_tn->cmn_size) {
+    KC_TRACE(
+        10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
+             " ,%" KMP_UINTPTR_SPEC ")\n",
+             pc_addr, pc_size, d_tn->cmn_size));
+    KMP_FATAL(TPCommonBlocksInconsist);
+  }
 #endif /* USE_CHECKS_COMMON */
 
-    tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]);
+  tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
 
 #ifdef KMP_TASK_COMMON_DEBUG
-    if (*tt != 0) {
-        KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
-                        gtid, pc_addr ) );
-    }
+  if (*tt != 0) {
+    KC_TRACE(
+        10,
+        ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
+         gtid, pc_addr));
+  }
 #endif
-    tn->next = *tt;
-    *tt = tn;
+  tn->next = *tt;
+  *tt = tn;
 
 #ifdef KMP_TASK_COMMON_DEBUG
-    KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
-                    gtid, pc_addr ) );
-    dump_list( );
+  KC_TRACE(10,
+           ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
+            gtid, pc_addr));
+  dump_list();
 #endif
 
-    /* Link the node into a simple list */
+  /* Link the node into a simple list */
 
-    tn->link = __kmp_threads[ gtid ]->th.th_pri_head;
-    __kmp_threads[ gtid ]->th.th_pri_head = tn;
+  tn->link = __kmp_threads[gtid]->th.th_pri_head;
+  __kmp_threads[gtid]->th.th_pri_head = tn;
 
 #ifdef BUILD_TV
-    __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr );
+  __kmp_tv_threadprivate_store(__kmp_threads[gtid], tn->gbl_addr, tn->par_addr);
 #endif
 
-    if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) )
-        return tn;
-
-    /*
-     * if C++ object with copy constructor, use it;
-     * else if C++ object with constructor, use it for the non-master copies only;
-     * else use pod_init and memcpy
-     *
-     * C++ constructors need to be called once for each non-master thread on allocate
-     * C++ copy constructors need to be called once for each thread on allocate
-     */
+  if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
+    return tn;
 
-    /*
-     * C++ object with constructors/destructors;
-     * don't call constructors for master thread though
-     */
-    if (d_tn->is_vec) {
-        if ( d_tn->ct.ctorv != 0) {
-            (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len);
-        } else if (d_tn->cct.cctorv != 0) {
-            (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len);
-        } else if (tn->par_addr != tn->gbl_addr) {
-            __kmp_copy_common_data( tn->par_addr, d_tn->pod_init );
-        }
-    } else {
-        if ( d_tn->ct.ctor != 0 ) {
-            (void) (*d_tn->ct.ctor) (tn->par_addr);
-        } else if (d_tn->cct.cctor != 0) {
-            (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init);
-        } else if (tn->par_addr != tn->gbl_addr) {
-            __kmp_copy_common_data( tn->par_addr, d_tn->pod_init );
-        }
-    }
-/* !BUILD_OPENMP_C
-    if (tn->par_addr != tn->gbl_addr)
-        __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
+  /* if C++ object with copy constructor, use it;
+   * else if C++ object with constructor, use it for the non-master copies only;
+   * else use pod_init and memcpy
+   *
+   * C++ constructors need to be called once for each non-master thread on
+   * allocate
+   * C++ copy constructors need to be called once for each thread on allocate */
+
+  /* C++ object with constructors/destructors; don't call constructors for
+     master thread though */
+  if (d_tn->is_vec) {
+    if (d_tn->ct.ctorv != 0) {
+      (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
+    } else if (d_tn->cct.cctorv != 0) {
+      (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
+    } else if (tn->par_addr != tn->gbl_addr) {
+      __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
+    }
+  } else {
+    if (d_tn->ct.ctor != 0) {
+      (void)(*d_tn->ct.ctor)(tn->par_addr);
+    } else if (d_tn->cct.cctor != 0) {
+      (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
+    } else if (tn->par_addr != tn->gbl_addr) {
+      __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
+    }
+  }
+  /* !BUILD_OPENMP_C
+      if (tn->par_addr != tn->gbl_addr)
+          __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
 
-    return tn;
+  return tn;
 }
 
 /* ------------------------------------------------------------------------ */
@@ -528,91 +502,95 @@ kmp_threadprivate_insert( int gtid, void
  Register constructors and destructors for thread private data.
  This function is called when executing in parallel, when we know the thread id.
 */
-void
-__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor)
-{
-    struct shared_common *d_tn, **lnk_tn;
+void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
+                                   kmpc_cctor cctor, kmpc_dtor dtor) {
+  struct shared_common *d_tn, **lnk_tn;
 
-    KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) );
+  KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
 
 #ifdef USE_CHECKS_COMMON
-    /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
-    KMP_ASSERT( cctor == 0);
+  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
+  KMP_ASSERT(cctor == 0);
 #endif /* USE_CHECKS_COMMON */
 
-    /* Only the global data table exists. */
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data );
-
-    if (d_tn == 0) {
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
-        d_tn->gbl_addr = data;
-
-        d_tn->ct.ctor = ctor;
-        d_tn->cct.cctor = cctor;
-        d_tn->dt.dtor = dtor;
-/*
-        d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate zeroes the memory
-        d_tn->vec_len = 0L;
-        d_tn->obj_init = 0;
-        d_tn->pod_init = 0;
-*/
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]);
-
-        d_tn->next = *lnk_tn;
-        *lnk_tn = d_tn;
-    }
-}
+  /* Only the global data table exists. */
+  d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
 
-void *
-__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size)
-{
-    void *ret;
-    struct private_common *tn;
+  if (d_tn == 0) {
+    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
+    d_tn->gbl_addr = data;
+
+    d_tn->ct.ctor = ctor;
+    d_tn->cct.cctor = cctor;
+    d_tn->dt.dtor = dtor;
+    /*
+            d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
+       zeroes the memory
+            d_tn->vec_len = 0L;
+            d_tn->obj_init = 0;
+            d_tn->pod_init = 0;
+    */
+    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
+
+    d_tn->next = *lnk_tn;
+    *lnk_tn = d_tn;
+  }
+}
+
+void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
+                           size_t size) {
+  void *ret;
+  struct private_common *tn;
 
-    KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) );
+  KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
 
 #ifdef USE_CHECKS_COMMON
-    if (! __kmp_init_serial)
-        KMP_FATAL( RTLNotInitialized );
+  if (!__kmp_init_serial)
+    KMP_FATAL(RTLNotInitialized);
 #endif /* USE_CHECKS_COMMON */
 
-    if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) {
-        /* The parallel address will NEVER overlap with the data_address */
-        /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */
-
-        KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) );
-        kmp_threadprivate_insert_private_data( global_tid, data, data, size );
-
-        ret = data;
-    }
-    else {
-        KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
-                       global_tid, data ) );
-        tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data );
+  if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
+    /* The parallel address will NEVER overlap with the data_address */
+    /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
+     * data_address; use data_address = data */
+
+    KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
+                  global_tid));
+    kmp_threadprivate_insert_private_data(global_tid, data, data, size);
+
+    ret = data;
+  } else {
+    KC_TRACE(
+        50,
+        ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
+         global_tid, data));
+    tn = __kmp_threadprivate_find_task_common(
+        __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
 
-        if ( tn ) {
-            KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) );
+    if (tn) {
+      KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
 #ifdef USE_CHECKS_COMMON
-            if ((size_t) size > tn->cmn_size) {
-                KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n",
-                                data, size, tn->cmn_size ) );
-                KMP_FATAL( TPCommonBlocksInconsist );
-            }
+      if ((size_t)size > tn->cmn_size) {
+        KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
+                      " ,%" KMP_UINTPTR_SPEC ")\n",
+                      data, size, tn->cmn_size));
+        KMP_FATAL(TPCommonBlocksInconsist);
+      }
 #endif /* USE_CHECKS_COMMON */
-        }
-        else {
-            /* The parallel address will NEVER overlap with the data_address */
-            /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */
-            KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) );
-            tn = kmp_threadprivate_insert( global_tid, data, data, size );
-        }
-
-        ret = tn->par_addr;
+    } else {
+      /* The parallel address will NEVER overlap with the data_address */
+      /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
+       * data_address = data */
+      KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
+      tn = kmp_threadprivate_insert(global_tid, data, data, size);
     }
-    KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
-                   global_tid, ret ) );
 
-    return ret;
+    ret = tn->par_addr;
+  }
+  KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
+                global_tid, ret));
+
+  return ret;
 }
 
 /*!
@@ -627,62 +605,63 @@ __kmpc_threadprivate(ident_t *loc, kmp_i
  Allocate private storage for threadprivate data.
 */
 void *
-__kmpc_threadprivate_cached(
-    ident_t *  loc,
-    kmp_int32  global_tid,   // gtid.
-    void *     data,         // Pointer to original global variable.
-    size_t     size,         // Size of original global variable.
-    void ***   cache
-) {
-    KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %"
-                   KMP_SIZE_T_SPEC "\n",
-                   global_tid, *cache, data, size ) );
-
-    if ( TCR_PTR(*cache) == 0) {
-        __kmp_acquire_lock( & __kmp_global_lock, global_tid );
-
-        if ( TCR_PTR(*cache) == 0) {
-            __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
-            __kmp_tp_cached = 1;
-            __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
-            void ** my_cache;
-            KMP_ITT_IGNORE(
-            my_cache = (void**)
-                __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t ));
-                           );
-            // No need to zero the allocated memory; __kmp_allocate does that.
-            KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n",
-                           global_tid, my_cache ) );
-
-            /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */
-            /* Add address of mycache to linked list for cleanup later  */
-            kmp_cached_addr_t *tp_cache_addr;
-
-            tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity];
-            tp_cache_addr -> addr = my_cache;
-            tp_cache_addr -> next = __kmp_threadpriv_cache_list;
-            __kmp_threadpriv_cache_list = tp_cache_addr;
+__kmpc_threadprivate_cached(ident_t *loc,
+                            kmp_int32 global_tid, // gtid.
+                            void *data, // Pointer to original global variable.
+                            size_t size, // Size of original global variable.
+                            void ***cache) {
+  KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
+                "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
+                global_tid, *cache, data, size));
+
+  if (TCR_PTR(*cache) == 0) {
+    __kmp_acquire_lock(&__kmp_global_lock, global_tid);
+
+    if (TCR_PTR(*cache) == 0) {
+      __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
+      __kmp_tp_cached = 1;
+      __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
+      void **my_cache;
+      KMP_ITT_IGNORE(
+          my_cache = (void **)__kmp_allocate(
+              sizeof(void *) * __kmp_tp_capacity + sizeof(kmp_cached_addr_t)););
+      // No need to zero the allocated memory; __kmp_allocate does that.
+      KC_TRACE(
+          50,
+          ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n",
+           global_tid, my_cache));
+
+      /* TODO: free all this memory in __kmp_common_destroy using
+       * __kmp_threadpriv_cache_list */
+      /* Add address of mycache to linked list for cleanup later  */
+      kmp_cached_addr_t *tp_cache_addr;
+
+      tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
+      tp_cache_addr->addr = my_cache;
+      tp_cache_addr->next = __kmp_threadpriv_cache_list;
+      __kmp_threadpriv_cache_list = tp_cache_addr;
+
+      KMP_MB();
+
+      TCW_PTR(*cache, my_cache);
+
+      KMP_MB();
+    }
+
+    __kmp_release_lock(&__kmp_global_lock, global_tid);
+  }
+
+  void *ret;
+  if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
+    ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
+
+    TCW_PTR((*cache)[global_tid], ret);
+  }
+  KC_TRACE(10,
+           ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
+            global_tid, ret));
 
-            KMP_MB();
-
-            TCW_PTR( *cache, my_cache);
-
-            KMP_MB();
-        }
-
-        __kmp_release_lock( & __kmp_global_lock, global_tid );
-    }
-
-    void *ret;
-    if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) {
-        ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size);
-
-        TCW_PTR( (*cache)[ global_tid ], ret);
-    }
-    KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
-                   global_tid, ret ) );
-
-    return ret;
+  return ret;
 }
 
 /*!
@@ -695,39 +674,40 @@ __kmpc_threadprivate_cached(
  @param vector_length length of the vector (bytes or elements?)
  Register vector constructors and destructors for thread private data.
 */
-void
-__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor,
-                                   kmpc_cctor_vec cctor, kmpc_dtor_vec dtor,
-                                   size_t vector_length )
-{
-    struct shared_common *d_tn, **lnk_tn;
+void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
+                                       kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
+                                       kmpc_dtor_vec dtor,
+                                       size_t vector_length) {
+  struct shared_common *d_tn, **lnk_tn;
 
-    KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) );
+  KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
 
 #ifdef USE_CHECKS_COMMON
-    /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
-    KMP_ASSERT( cctor == 0);
+  /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
+  KMP_ASSERT(cctor == 0);
 #endif /* USE_CHECKS_COMMON */
 
-    d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table,
-                                          -1, data );        /* Only the global data table exists. */
-
-    if (d_tn == 0) {
-        d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) );
-        d_tn->gbl_addr = data;
-
-        d_tn->ct.ctorv = ctor;
-        d_tn->cct.cctorv = cctor;
-        d_tn->dt.dtorv = dtor;
-        d_tn->is_vec = TRUE;
-        d_tn->vec_len = (size_t) vector_length;
-/*
-        d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate zeroes the memory
-        d_tn->pod_init = 0;
-*/
-        lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]);
-
-        d_tn->next = *lnk_tn;
-        *lnk_tn = d_tn;
-    }
+  d_tn = __kmp_find_shared_task_common(
+      &__kmp_threadprivate_d_table, -1,
+      data); /* Only the global data table exists. */
+
+  if (d_tn == 0) {
+    d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
+    d_tn->gbl_addr = data;
+
+    d_tn->ct.ctorv = ctor;
+    d_tn->cct.cctorv = cctor;
+    d_tn->dt.dtorv = dtor;
+    d_tn->is_vec = TRUE;
+    d_tn->vec_len = (size_t)vector_length;
+    /*
+            d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
+       zeroes the memory
+            d_tn->pod_init = 0;
+    */
+    lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
+
+    d_tn->next = *lnk_tn;
+    *lnk_tn = d_tn;
+  }
 }

Modified: openmp/trunk/runtime/src/kmp_utility.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_utility.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_utility.cpp (original)
+++ openmp/trunk/runtime/src/kmp_utility.cpp Fri May 12 13:01:32 2017
@@ -14,416 +14,396 @@
 
 
 #include "kmp.h"
-#include "kmp_wrapper_getpid.h"
+#include "kmp_i18n.h"
 #include "kmp_str.h"
+#include "kmp_wrapper_getpid.h"
 #include <float.h>
-#include "kmp_i18n.h"
-
-/* ------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------ */
 
 static const char *unknown = "unknown";
 
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 
-/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */
-/* the debugging package has not been initialized yet, and only "0" will print   */
-/* debugging output since the environment variables have not been read.          */
+/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then
+   the debugging package has not been initialized yet, and only "0" will print
+   debugging output since the environment variables have not been read. */
 
 #ifdef KMP_DEBUG
 static int trace_level = 5;
 #endif
 
-/*
- * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
+/* LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
  * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID
  * PHY_ID       = APIC_ID >> LOG_ID_BITS
  */
-int
-__kmp_get_physical_id( int log_per_phy, int apic_id )
-{
-   int index_lsb, index_msb, temp;
-
-   if (log_per_phy > 1) {
-	index_lsb = 0;
-	index_msb = 31;
-
-	temp = log_per_phy;
-        while ( (temp & 1) == 0 ) {
-	    temp  >>= 1;
-	    index_lsb++;
-	}
-
-	temp = log_per_phy;
-	while ( (temp & 0x80000000)==0 ) {
-	    temp <<= 1;
-	    index_msb--;
-	}
+int __kmp_get_physical_id(int log_per_phy, int apic_id) {
+  int index_lsb, index_msb, temp;
 
-	/* If >1 bits were set in log_per_phy, choose next higher power of 2 */
-	if (index_lsb != index_msb) index_msb++;
+  if (log_per_phy > 1) {
+    index_lsb = 0;
+    index_msb = 31;
+
+    temp = log_per_phy;
+    while ((temp & 1) == 0) {
+      temp >>= 1;
+      index_lsb++;
+    }
 
-	return ( (int) (apic_id >> index_msb) );
-   }
+    temp = log_per_phy;
+    while ((temp & 0x80000000) == 0) {
+      temp <<= 1;
+      index_msb--;
+    }
 
-   return apic_id;
-}
+    /* If >1 bits were set in log_per_phy, choose next higher power of 2 */
+    if (index_lsb != index_msb)
+      index_msb++;
+
+    return ((int)(apic_id >> index_msb));
+  }
 
+  return apic_id;
+}
 
 /*
  * LOG_ID_BITS  = ( 1 + floor( log_2( max( log_per_phy - 1, 1 ))))
  * APIC_ID      = (PHY_ID << LOG_ID_BITS) | LOG_ID
  * LOG_ID       = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 )
  */
-int
-__kmp_get_logical_id( int log_per_phy, int apic_id )
-{
-   unsigned current_bit;
-   int bits_seen;
-
-   if (log_per_phy <= 1) return ( 0 );
-
-   bits_seen = 0;
-
-   for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) {
-	if ( log_per_phy & current_bit ) {
-	    log_per_phy &= ~current_bit;
-	    bits_seen++;
-	}
-   }
-
-   /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */
-   if (bits_seen == 1) {
-	current_bit >>= 1;
-   }
+int __kmp_get_logical_id(int log_per_phy, int apic_id) {
+  unsigned current_bit;
+  int bits_seen;
+
+  if (log_per_phy <= 1)
+    return (0);
+
+  bits_seen = 0;
+
+  for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) {
+    if (log_per_phy & current_bit) {
+      log_per_phy &= ~current_bit;
+      bits_seen++;
+    }
+  }
 
-   return ( (int) ((current_bit - 1) & apic_id) );
-}
+  /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */
+  if (bits_seen == 1) {
+    current_bit >>= 1;
+  }
 
+  return ((int)((current_bit - 1) & apic_id));
+}
 
-static
-kmp_uint64
-__kmp_parse_frequency(        // R: Frequency in Hz.
-    char const * frequency    // I: Float number and unit: MHz, GHz, or TGz.
-) {
-
-    double       value  = 0.0;
-    char const * unit   = NULL;
-    kmp_uint64   result = 0;                    /* Zero is a better unknown value than all ones. */
+static kmp_uint64 __kmp_parse_frequency( // R: Frequency in Hz.
+    char const *frequency // I: Float number and unit: MHz, GHz, or TGz.
+    ) {
+
+  double value = 0.0;
+  char const *unit = NULL;
+  kmp_uint64 result = 0; /* Zero is a better unknown value than all ones. */
 
-    if ( frequency == NULL ) {
-        return result;
-    }; // if
-    value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char const *".
-    if ( 0 < value && value <= DBL_MAX ) {          // Good value (not overflow, underflow, etc).
-        if ( strcmp( unit, "MHz" ) == 0 ) {
-            value = value * 1.0E+6;
-        } else if ( strcmp( unit, "GHz" ) == 0 ) {
-            value = value * 1.0E+9;
-        } else if ( strcmp( unit, "THz" ) == 0 ) {
-            value = value * 1.0E+12;
-        } else {                      // Wrong unit.
-            return result;
-        }; // if
-        result = value;
-    }; // if
+  if (frequency == NULL) {
     return result;
+  }; // if
+  value = strtod(frequency,
+                 (char **)&unit); // strtod() does not like "char const *".
+  if (0 < value &&
+      value <= DBL_MAX) { // Good value (not overflow, underflow, etc).
+    if (strcmp(unit, "MHz") == 0) {
+      value = value * 1.0E+6;
+    } else if (strcmp(unit, "GHz") == 0) {
+      value = value * 1.0E+9;
+    } else if (strcmp(unit, "THz") == 0) {
+      value = value * 1.0E+12;
+    } else { // Wrong unit.
+      return result;
+    }; // if
+    result = value;
+  }; // if
+  return result;
 
 }; // func __kmp_parse_cpu_frequency
 
-void
-__kmp_query_cpuid( kmp_cpuinfo_t *p )
-{
-    struct kmp_cpuid buf;
-    int max_arg;
-    int log_per_phy;
+void __kmp_query_cpuid(kmp_cpuinfo_t *p) {
+  struct kmp_cpuid buf;
+  int max_arg;
+  int log_per_phy;
 #ifdef KMP_DEBUG
-    int cflush_size;
+  int cflush_size;
 #endif
 
-    p->initialized = 1;
+  p->initialized = 1;
 
-    p->sse2 = 1; // Assume SSE2 by default.
+  p->sse2 = 1; // Assume SSE2 by default.
 
-    __kmp_x86_cpuid( 0, 0, &buf );
+  __kmp_x86_cpuid(0, 0, &buf);
 
-    KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
-        0, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+  KA_TRACE(trace_level,
+           ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 0,
+            buf.eax, buf.ebx, buf.ecx, buf.edx));
 
-    max_arg = buf.eax;
+  max_arg = buf.eax;
 
-    p->apic_id = -1;
+  p->apic_id = -1;
 
-    if (max_arg >= 1) {
-        int i;
-        kmp_uint32 t, data[ 4 ];
+  if (max_arg >= 1) {
+    int i;
+    kmp_uint32 t, data[4];
 
-        __kmp_x86_cpuid( 1, 0, &buf );
-        KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
-                                1, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
+    __kmp_x86_cpuid(1, 0, &buf);
+    KA_TRACE(trace_level,
+             ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+              1, buf.eax, buf.ebx, buf.ecx, buf.edx));
 
-        {
-#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask  ) )
+    {
+#define get_value(reg, lo, mask) (((reg) >> (lo)) & (mask))
 
-            p->signature = buf.eax;
-            p->family    =   get_value( buf.eax, 20, 0xff )        + get_value( buf.eax, 8, 0x0f );
-            p->model     = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f );
-            p->stepping  =   get_value( buf.eax,  0, 0x0f );
+      p->signature = buf.eax;
+      p->family = get_value(buf.eax, 20, 0xff) + get_value(buf.eax, 8, 0x0f);
+      p->model =
+          (get_value(buf.eax, 16, 0x0f) << 4) + get_value(buf.eax, 4, 0x0f);
+      p->stepping = get_value(buf.eax, 0, 0x0f);
 
 #undef get_value
 
-            KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) );
-        }
+      KA_TRACE(trace_level, (" family = %d, model = %d, stepping = %d\n",
+                             p->family, p->model, p->stepping));
+    }
 
-        for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) {
-            data[ i ] = (t & 0xff);
-        }; // for
+    for (t = buf.ebx, i = 0; i < 4; t >>= 8, ++i) {
+      data[i] = (t & 0xff);
+    }; // for
 
-        p->sse2 = ( buf.edx >> 26 ) & 1;
+    p->sse2 = (buf.edx >> 26) & 1;
 
 #ifdef KMP_DEBUG
 
-        if ( (buf.edx >> 4) & 1 ) {
-            /* TSC - Timestamp Counter Available */
-            KA_TRACE( trace_level, (" TSC" ) );
-        }
-        if ( (buf.edx >> 8) & 1 ) {
-            /* CX8 - CMPXCHG8B Instruction Available */
-            KA_TRACE( trace_level, (" CX8" ) );
-        }
-        if ( (buf.edx >> 9) & 1 ) {
-            /* APIC - Local APIC Present (multi-processor operation support */
-            KA_TRACE( trace_level, (" APIC" ) );
-        }
-        if ( (buf.edx >> 15) & 1 ) {
-            /* CMOV - Conditional MOVe Instruction Available */
-            KA_TRACE( trace_level, (" CMOV" ) );
-        }
-        if ( (buf.edx >> 18) & 1 ) {
-            /* PSN - Processor Serial Number Available */
-            KA_TRACE( trace_level, (" PSN" ) );
-        }
-        if ( (buf.edx >> 19) & 1 ) {
-            /* CLFULSH - Cache Flush Instruction Available */
-            cflush_size = data[ 1 ] * 8;    /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */
-            KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) );
-
-        }
-        if ( (buf.edx >> 21) & 1 ) {
-            /* DTES - Debug Trace & EMON Store */
-            KA_TRACE( trace_level, (" DTES" ) );
-        }
-        if ( (buf.edx >> 22) & 1 ) {
-            /* ACPI - ACPI Support Available */
-            KA_TRACE( trace_level, (" ACPI" ) );
-        }
-        if ( (buf.edx >> 23) & 1 ) {
-            /* MMX - Multimedia Extensions */
-            KA_TRACE( trace_level, (" MMX" ) );
-        }
-        if ( (buf.edx >> 25) & 1 ) {
-            /* SSE - SSE Instructions */
-            KA_TRACE( trace_level, (" SSE" ) );
-        }
-        if ( (buf.edx >> 26) & 1 ) {
-            /* SSE2 - SSE2 Instructions */
-            KA_TRACE( trace_level, (" SSE2" ) );
-        }
-        if ( (buf.edx >> 27) & 1 ) {
-            /* SLFSNP - Self-Snooping Cache */
-            KA_TRACE( trace_level, (" SLFSNP" ) );
-        }
+    if ((buf.edx >> 4) & 1) {
+      /* TSC - Timestamp Counter Available */
+      KA_TRACE(trace_level, (" TSC"));
+    }
+    if ((buf.edx >> 8) & 1) {
+      /* CX8 - CMPXCHG8B Instruction Available */
+      KA_TRACE(trace_level, (" CX8"));
+    }
+    if ((buf.edx >> 9) & 1) {
+      /* APIC - Local APIC Present (multi-processor operation support */
+      KA_TRACE(trace_level, (" APIC"));
+    }
+    if ((buf.edx >> 15) & 1) {
+      /* CMOV - Conditional MOVe Instruction Available */
+      KA_TRACE(trace_level, (" CMOV"));
+    }
+    if ((buf.edx >> 18) & 1) {
+      /* PSN - Processor Serial Number Available */
+      KA_TRACE(trace_level, (" PSN"));
+    }
+    if ((buf.edx >> 19) & 1) {
+      /* CLFULSH - Cache Flush Instruction Available */
+      cflush_size =
+          data[1] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */
+      KA_TRACE(trace_level, (" CLFLUSH(%db)", cflush_size));
+    }
+    if ((buf.edx >> 21) & 1) {
+      /* DTES - Debug Trace & EMON Store */
+      KA_TRACE(trace_level, (" DTES"));
+    }
+    if ((buf.edx >> 22) & 1) {
+      /* ACPI - ACPI Support Available */
+      KA_TRACE(trace_level, (" ACPI"));
+    }
+    if ((buf.edx >> 23) & 1) {
+      /* MMX - Multimedia Extensions */
+      KA_TRACE(trace_level, (" MMX"));
+    }
+    if ((buf.edx >> 25) & 1) {
+      /* SSE - SSE Instructions */
+      KA_TRACE(trace_level, (" SSE"));
+    }
+    if ((buf.edx >> 26) & 1) {
+      /* SSE2 - SSE2 Instructions */
+      KA_TRACE(trace_level, (" SSE2"));
+    }
+    if ((buf.edx >> 27) & 1) {
+      /* SLFSNP - Self-Snooping Cache */
+      KA_TRACE(trace_level, (" SLFSNP"));
+    }
 #endif /* KMP_DEBUG */
 
-        if ( (buf.edx >> 28) & 1 ) {
-            /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */
-            log_per_phy = data[ 2 ];
-            p->apic_id     = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */
-            KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) );
+    if ((buf.edx >> 28) & 1) {
+      /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */
+      log_per_phy = data[2];
+      p->apic_id = data[3]; /* Bits 31-24: Processor Initial APIC ID (X) */
+      KA_TRACE(trace_level, (" HT(%d TPUs)", log_per_phy));
 
-            if( log_per_phy > 1 ) {
-                /* default to 1k FOR JT-enabled processors (4k on OS X*) */
+      if (log_per_phy > 1) {
+/* default to 1k FOR JT-enabled processors (4k on OS X*) */
 #if KMP_OS_DARWIN
-                p->cpu_stackoffset = 4 * 1024;
+        p->cpu_stackoffset = 4 * 1024;
 #else
-                p->cpu_stackoffset = 1 * 1024;
+        p->cpu_stackoffset = 1 * 1024;
 #endif
-            }
+      }
 
-            p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id );
-            p->logical_id  = __kmp_get_logical_id( log_per_phy, p->apic_id );
-        }
+      p->physical_id = __kmp_get_physical_id(log_per_phy, p->apic_id);
+      p->logical_id = __kmp_get_logical_id(log_per_phy, p->apic_id);
+    }
 #ifdef KMP_DEBUG
-        if ( (buf.edx >> 29) & 1 ) {
-            /* ATHROTL - Automatic Throttle Control */
-            KA_TRACE( trace_level, (" ATHROTL" ) );
-        }
-        KA_TRACE( trace_level, (" ]\n" ) );
+    if ((buf.edx >> 29) & 1) {
+      /* ATHROTL - Automatic Throttle Control */
+      KA_TRACE(trace_level, (" ATHROTL"));
+    }
+    KA_TRACE(trace_level, (" ]\n"));
 
-        for (i = 2; i <= max_arg; ++i) {
-            __kmp_x86_cpuid( i, 0, &buf );
-            KA_TRACE( trace_level,
-                      ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
-                        i, buf.eax, buf.ebx, buf.ecx, buf.edx ) );
-        }
+    for (i = 2; i <= max_arg; ++i) {
+      __kmp_x86_cpuid(i, 0, &buf);
+      KA_TRACE(trace_level,
+               ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n",
+                i, buf.eax, buf.ebx, buf.ecx, buf.edx));
+    }
 #endif
 #if KMP_USE_ADAPTIVE_LOCKS
-        p->rtm = 0;
-        if (max_arg > 7)
-        {
-            /* RTM bit CPUID.07:EBX, bit 11 */
-            __kmp_x86_cpuid(7, 0, &buf);
-            p->rtm = (buf.ebx >> 11) & 1;
-            KA_TRACE( trace_level, (" RTM" ) );
-        }
+    p->rtm = 0;
+    if (max_arg > 7) {
+      /* RTM bit CPUID.07:EBX, bit 11 */
+      __kmp_x86_cpuid(7, 0, &buf);
+      p->rtm = (buf.ebx >> 11) & 1;
+      KA_TRACE(trace_level, (" RTM"));
+    }
 #endif
-    }; // if
+  }; // if
 
-    { // Parse CPU brand string for frequency, saving the string for later.
-        int i;
-        kmp_cpuid_t * base = (kmp_cpuid_t *)&p->name[0];
-
-        // Get CPU brand string.
-        for ( i = 0; i < 3; ++ i ) {
-            __kmp_x86_cpuid( 0x80000002 + i, 0, base+i );
-        }; // for
-        p->name[ sizeof(p->name) - 1 ] = 0; // Just in case. ;-)
-        KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", &p->name[0] ) );
-
-        // Parse frequency.
-        p->frequency = __kmp_parse_frequency( strrchr( &p->name[0], ' ' ) );
-        KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) );
-    }
+  { // Parse CPU brand string for frequency, saving the string for later.
+    int i;
+    kmp_cpuid_t *base = (kmp_cpuid_t *)&p->name[0];
+
+    // Get CPU brand string.
+    for (i = 0; i < 3; ++i) {
+      __kmp_x86_cpuid(0x80000002 + i, 0, base + i);
+    }; // for
+    p->name[sizeof(p->name) - 1] = 0; // Just in case. ;-)
+    KA_TRACE(trace_level, ("cpu brand string: \"%s\"\n", &p->name[0]));
+
+    // Parse frequency.
+    p->frequency = __kmp_parse_frequency(strrchr(&p->name[0], ' '));
+    KA_TRACE(trace_level,
+             ("cpu frequency from brand string: %" KMP_UINT64_SPEC "\n",
+              p->frequency));
+  }
 }
 
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
-/* ------------------------------------------------------------------------------------ */
-/* ------------------------------------------------------------------------------------ */
-
-void
-__kmp_expand_host_name( char *buffer, size_t size )
-{
-    KMP_DEBUG_ASSERT(size >= sizeof(unknown));
+void __kmp_expand_host_name(char *buffer, size_t size) {
+  KMP_DEBUG_ASSERT(size >= sizeof(unknown));
 #if KMP_OS_WINDOWS
-    {
-	DWORD	s = size;
+  {
+    DWORD s = size;
 
-	if (! GetComputerNameA( buffer, & s ))
-	    KMP_STRCPY_S( buffer, size, unknown );
-    }
+    if (!GetComputerNameA(buffer, &s))
+      KMP_STRCPY_S(buffer, size, unknown);
+  }
 #else
-    buffer[size - 2] = 0;
-    if (gethostname( buffer, size ) || buffer[size - 2] != 0)
-	KMP_STRCPY_S( buffer, size, unknown );
+  buffer[size - 2] = 0;
+  if (gethostname(buffer, size) || buffer[size - 2] != 0)
+    KMP_STRCPY_S(buffer, size, unknown);
 #endif
 }
 
 /* Expand the meta characters in the filename:
- *
  * Currently defined characters are:
- *
  * %H the hostname
  * %P the number of threads used.
  * %I the unique identifier for this run.
  */
 
-void
-__kmp_expand_file_name( char *result, size_t rlen, char *pattern )
-{
-    char	*pos = result, *end = result + rlen - 1;
-    char	 buffer[256];
-    int		 default_cpu_width = 1;
-    int          snp_result;
-
-    KMP_DEBUG_ASSERT(rlen > 0);
-    *end = 0;
-    {
-	int i;
-	for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width);
-    }
-
-    if (pattern != NULL) {
-	while (*pattern != '\0' && pos < end) {
-	    if (*pattern != '%') {
-		*pos++ = *pattern++;
-	    } else {
-		char *old_pattern = pattern;
-		int width = 1;
-		int cpu_width = default_cpu_width;
-
-		++pattern;
-
-		if (*pattern >= '0' && *pattern <= '9') {
-		    width = 0;
-		    do {
-			width = (width * 10) + *pattern++ - '0';
-		    } while (*pattern >= '0' && *pattern <= '9');
-		    if (width < 0 || width > 1024)
-			width = 1;
-
-		    cpu_width = width;
-		}
-
-		switch (*pattern) {
-		case 'H':
-		case 'h':
-		    {
-			__kmp_expand_host_name( buffer, sizeof( buffer ) );
-			KMP_STRNCPY( pos,  buffer, end - pos + 1);
-			if(*end == 0) {
-			    while ( *pos )
-				++pos;
-			    ++pattern;
-			} else
-			    pos = end;
-		    }
-		    break;
-		case 'P':
-		case 'p':
-		    {
-			snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth );
-			if(snp_result >= 0 && snp_result <= end - pos) {
-			    while ( *pos )
-				++pos;
-			    ++pattern;
-			} else
-			    pos = end;
-		    }
-		    break;
-		case 'I':
-		case 'i':
-		    {
-			pid_t id = getpid();
-			snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id );
-			if(snp_result >= 0 && snp_result <= end - pos) {
-			    while ( *pos )
-				++pos;
-			    ++pattern;
-			} else
-			    pos = end;
-			break;
-		    }
-		case '%':
-		    {
-			*pos++ = '%';
-			++pattern;
-			break;
-		    }
-		default:
-		    {
-			*pos++ = '%';
-			pattern = old_pattern + 1;
-			break;
-		    }
-		}
-	    }
-	}
-	/* TODO: How do we get rid of this? */
-	if(*pattern != '\0')
-	    KMP_FATAL( FileNameTooLong );
+void __kmp_expand_file_name(char *result, size_t rlen, char *pattern) {
+  char *pos = result, *end = result + rlen - 1;
+  char buffer[256];
+  int default_cpu_width = 1;
+  int snp_result;
+
+  KMP_DEBUG_ASSERT(rlen > 0);
+  *end = 0;
+  {
+    int i;
+    for (i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width)
+      ;
+  }
+
+  if (pattern != NULL) {
+    while (*pattern != '\0' && pos < end) {
+      if (*pattern != '%') {
+        *pos++ = *pattern++;
+      } else {
+        char *old_pattern = pattern;
+        int width = 1;
+        int cpu_width = default_cpu_width;
+
+        ++pattern;
+
+        if (*pattern >= '0' && *pattern <= '9') {
+          width = 0;
+          do {
+            width = (width * 10) + *pattern++ - '0';
+          } while (*pattern >= '0' && *pattern <= '9');
+          if (width < 0 || width > 1024)
+            width = 1;
+
+          cpu_width = width;
+        }
+
+        switch (*pattern) {
+        case 'H':
+        case 'h': {
+          __kmp_expand_host_name(buffer, sizeof(buffer));
+          KMP_STRNCPY(pos, buffer, end - pos + 1);
+          if (*end == 0) {
+            while (*pos)
+              ++pos;
+            ++pattern;
+          } else
+            pos = end;
+        } break;
+        case 'P':
+        case 'p': {
+          snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", cpu_width,
+                                    __kmp_dflt_team_nth);
+          if (snp_result >= 0 && snp_result <= end - pos) {
+            while (*pos)
+              ++pos;
+            ++pattern;
+          } else
+            pos = end;
+        } break;
+        case 'I':
+        case 'i': {
+          pid_t id = getpid();
+          snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id);
+          if (snp_result >= 0 && snp_result <= end - pos) {
+            while (*pos)
+              ++pos;
+            ++pattern;
+          } else
+            pos = end;
+          break;
+        }
+        case '%': {
+          *pos++ = '%';
+          ++pattern;
+          break;
+        }
+        default: {
+          *pos++ = '%';
+          pattern = old_pattern + 1;
+          break;
+        }
+        }
+      }
     }
+    /* TODO: How do we get rid of this? */
+    if (*pattern != '\0')
+      KMP_FATAL(FileNameTooLong);
+  }
 
-    *pos = '\0';
+  *pos = '\0';
 }
-

Modified: openmp/trunk/runtime/src/kmp_version.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_version.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_version.cpp (original)
+++ openmp/trunk/runtime/src/kmp_version.cpp Fri May 12 13:01:32 2017
@@ -18,199 +18,191 @@
 #include "kmp_version.h"
 
 // Replace with snapshot date YYYYMMDD for promotion build.
-#define KMP_VERSION_BUILD    20140926
+#define KMP_VERSION_BUILD 20140926
 
 // Helper macros to convert value of macro to string literal.
-#define _stringer( x ) #x
-#define stringer( x )  _stringer( x )
+#define _stringer(x) #x
+#define stringer(x) _stringer(x)
 
 // Detect compiler.
 #if KMP_COMPILER_ICC
-    #if   __INTEL_COMPILER == 1010
-        #define KMP_COMPILER "Intel C++ Compiler 10.1"
-    #elif __INTEL_COMPILER == 1100
-        #define KMP_COMPILER "Intel C++ Compiler 11.0"
-    #elif __INTEL_COMPILER == 1110
-        #define KMP_COMPILER "Intel C++ Compiler 11.1"
-    #elif __INTEL_COMPILER == 1200
-        #define KMP_COMPILER "Intel C++ Compiler 12.0"
-    #elif __INTEL_COMPILER == 1210
-        #define KMP_COMPILER "Intel C++ Compiler 12.1"
-    #elif __INTEL_COMPILER == 1300
-        #define KMP_COMPILER "Intel C++ Compiler 13.0"
-    #elif __INTEL_COMPILER == 1310
-        #define KMP_COMPILER "Intel C++ Compiler 13.1"
-    #elif __INTEL_COMPILER == 1400
-        #define KMP_COMPILER "Intel C++ Compiler 14.0"
-    #elif __INTEL_COMPILER == 1410
-        #define KMP_COMPILER "Intel C++ Compiler 14.1"
-    #elif __INTEL_COMPILER == 1500
-        #define KMP_COMPILER "Intel C++ Compiler 15.0"
-    #elif __INTEL_COMPILER == 1600
-        #define KMP_COMPILER "Intel C++ Compiler 16.0"
-    #elif __INTEL_COMPILER == 1700
-        #define KMP_COMPILER "Intel C++ Compiler 17.0"
-    #elif __INTEL_COMPILER == 9998
-        #define KMP_COMPILER "Intel C++ Compiler mainline"
-    #elif __INTEL_COMPILER == 9999
-        #define KMP_COMPILER "Intel C++ Compiler mainline"
-    #endif
+#if __INTEL_COMPILER == 1010
+#define KMP_COMPILER "Intel C++ Compiler 10.1"
+#elif __INTEL_COMPILER == 1100
+#define KMP_COMPILER "Intel C++ Compiler 11.0"
+#elif __INTEL_COMPILER == 1110
+#define KMP_COMPILER "Intel C++ Compiler 11.1"
+#elif __INTEL_COMPILER == 1200
+#define KMP_COMPILER "Intel C++ Compiler 12.0"
+#elif __INTEL_COMPILER == 1210
+#define KMP_COMPILER "Intel C++ Compiler 12.1"
+#elif __INTEL_COMPILER == 1300
+#define KMP_COMPILER "Intel C++ Compiler 13.0"
+#elif __INTEL_COMPILER == 1310
+#define KMP_COMPILER "Intel C++ Compiler 13.1"
+#elif __INTEL_COMPILER == 1400
+#define KMP_COMPILER "Intel C++ Compiler 14.0"
+#elif __INTEL_COMPILER == 1410
+#define KMP_COMPILER "Intel C++ Compiler 14.1"
+#elif __INTEL_COMPILER == 1500
+#define KMP_COMPILER "Intel C++ Compiler 15.0"
+#elif __INTEL_COMPILER == 1600
+#define KMP_COMPILER "Intel C++ Compiler 16.0"
+#elif __INTEL_COMPILER == 1700
+#define KMP_COMPILER "Intel C++ Compiler 17.0"
+#elif __INTEL_COMPILER == 9998
+#define KMP_COMPILER "Intel C++ Compiler mainline"
+#elif __INTEL_COMPILER == 9999
+#define KMP_COMPILER "Intel C++ Compiler mainline"
+#endif
 #elif KMP_COMPILER_CLANG
-    #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ )
+#define KMP_COMPILER                                                           \
+  "Clang " stringer(__clang_major__) "." stringer(__clang_minor__)
 #elif KMP_COMPILER_GCC
-    #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ )
+#define KMP_COMPILER "GCC " stringer(__GNUC__) "." stringer(__GNUC_MINOR__)
 #elif KMP_COMPILER_MSVC
-    #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER )
+#define KMP_COMPILER "MSVC " stringer(_MSC_FULL_VER)
 #endif
 #ifndef KMP_COMPILER
-    #warning "Unknown compiler"
-    #define KMP_COMPILER "unknown compiler"
+#warning "Unknown compiler"
+#define KMP_COMPILER "unknown compiler"
 #endif
 
 // Detect librray type (perf, stub).
 #ifdef KMP_STUB
-    #define KMP_LIB_TYPE "stub"
+#define KMP_LIB_TYPE "stub"
 #else
-    #define KMP_LIB_TYPE "performance"
+#define KMP_LIB_TYPE "performance"
 #endif // KMP_LIB_TYPE
 
 // Detect link type (static, dynamic).
 #ifdef KMP_DYNAMIC_LIB
-    #define KMP_LINK_TYPE "dynamic"
+#define KMP_LINK_TYPE "dynamic"
 #else
-    #define KMP_LINK_TYPE "static"
+#define KMP_LINK_TYPE "static"
 #endif // KMP_LINK_TYPE
 
 // Finally, define strings.
-#define KMP_LIBRARY   KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")"
+#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")"
 #define KMP_COPYRIGHT ""
 
 int const __kmp_version_major = KMP_VERSION_MAJOR;
 int const __kmp_version_minor = KMP_VERSION_MINOR;
 int const __kmp_version_build = KMP_VERSION_BUILD;
 int const __kmp_openmp_version =
-    #if OMP_50_ENABLED
-        201611;
-    #elif OMP_45_ENABLED
-        201511;
-    #elif OMP_40_ENABLED
-        201307;
-    #else
-        201107;
-    #endif
-
-/* Do NOT change the format of this string!  Intel(R) Thread Profiler checks for a
-   specific format some changes in the recognition routine there need to
-   be made before this is changed.
-*/
-char const __kmp_copyright[] =
-    KMP_VERSION_PREFIX KMP_LIBRARY
-    " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR )
-    "." stringer( KMP_VERSION_BUILD ) " "
-    KMP_COPYRIGHT;
-
-char const __kmp_version_copyright[]      = KMP_VERSION_PREFIX KMP_COPYRIGHT;
-char const __kmp_version_lib_ver[]        = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD );
-char const __kmp_version_lib_type[]       = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE;
-char const __kmp_version_link_type[]      = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE;
-char const __kmp_version_build_time[]     = KMP_VERSION_PREFIX "build time: " "no_timestamp";
+#if OMP_50_ENABLED
+    201611;
+#elif OMP_45_ENABLED
+    201511;
+#elif OMP_40_ENABLED
+    201307;
+#else
+    201107;
+#endif
+
+/* Do NOT change the format of this string!  Intel(R) Thread Profiler checks for
+   a specific format some changes in the recognition routine there need to be
+   made before this is changed. */
+char const __kmp_copyright[] = KMP_VERSION_PREFIX KMP_LIBRARY
+    " ver. " stringer(KMP_VERSION_MAJOR) "." stringer(
+        KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD) " " KMP_COPYRIGHT;
+
+char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT;
+char const __kmp_version_lib_ver[] =
+    KMP_VERSION_PREFIX "version: " stringer(KMP_VERSION_MAJOR) "." stringer(
+        KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD);
+char const __kmp_version_lib_type[] =
+    KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE;
+char const __kmp_version_link_type[] =
+    KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE;
+char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: "
+                                                           "no_timestamp";
 #if KMP_MIC2
-    char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2";
+char const __kmp_version_target_env[] =
+    KMP_VERSION_PREFIX "target environment: MIC2";
 #endif
-char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER;
+char const __kmp_version_build_compiler[] =
+    KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER;
 
-//
 // Called at serial initialization time.
-//
 static int __kmp_version_1_printed = FALSE;
 
-void
-__kmp_print_version_1( void )
-{
-    if ( __kmp_version_1_printed ) {
-        return;
-    }; // if
-    __kmp_version_1_printed = TRUE;
-
-    #ifndef KMP_STUB
-        kmp_str_buf_t buffer;
-        __kmp_str_buf_init( & buffer );
-        // Print version strings skipping initial magic.
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] );
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] );
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] );
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] );
-      #if KMP_MIC
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] );
-      #endif
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] );
-        #if defined(KMP_GOMP_COMPAT)
-            __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] );
-        #endif /* defined(KMP_GOMP_COMPAT) */
-        __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] );
-        __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" )  );
-        #ifdef KMP_DEBUG
-            for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) {
-                __kmp_str_buf_print(
-                    & buffer,
-                    "%s%s barrier branch bits: gather=%u, release=%u\n",
-                    KMP_VERSION_PREF_STR,
-                    __kmp_barrier_type_name[ i ],
-                    __kmp_barrier_gather_branch_bits[ i ],
-                    __kmp_barrier_release_branch_bits[ i ]
-                ); // __kmp_str_buf_print
-            }; // for i
-            for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) {
-                __kmp_str_buf_print(
-                    & buffer,
-                    "%s%s barrier pattern: gather=%s, release=%s\n",
-                    KMP_VERSION_PREF_STR,
-                    __kmp_barrier_type_name[ i ],
-                    __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ],
-                    __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ]
-                ); // __kmp_str_buf_print
-            }; // for i
-            __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] );
-        #endif
-        __kmp_str_buf_print(
-            & buffer,
-            "%sthread affinity support: %s\n",
-            KMP_VERSION_PREF_STR,
-            #if KMP_AFFINITY_SUPPORTED
-                (
-                    KMP_AFFINITY_CAPABLE()
-                    ?
-                    (
-                        __kmp_affinity_type == affinity_none
-                        ?
-                        "not used"
-                        :
-                        "yes"
-                    )
-                    :
-                    "no"
-                )
-            #else
-                "no"
-            #endif
-        );
-        __kmp_printf( "%s", buffer.str );
-        __kmp_str_buf_free( & buffer );
-        K_DIAG( 1, ( "KMP_VERSION is true\n" ) );
-    #endif // KMP_STUB
+void __kmp_print_version_1(void) {
+  if (__kmp_version_1_printed) {
+    return;
+  }; // if
+  __kmp_version_1_printed = TRUE;
+
+#ifndef KMP_STUB
+  kmp_str_buf_t buffer;
+  __kmp_str_buf_init(&buffer);
+  // Print version strings skipping initial magic.
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]);
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_lib_type[KMP_VERSION_MAGIC_LEN]);
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_link_type[KMP_VERSION_MAGIC_LEN]);
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_build_time[KMP_VERSION_MAGIC_LEN]);
+#if KMP_MIC
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_target_env[KMP_VERSION_MAGIC_LEN]);
+#endif
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_build_compiler[KMP_VERSION_MAGIC_LEN]);
+#if defined(KMP_GOMP_COMPAT)
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_alt_comp[KMP_VERSION_MAGIC_LEN]);
+#endif /* defined(KMP_GOMP_COMPAT) */
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_omp_api[KMP_VERSION_MAGIC_LEN]);
+  __kmp_str_buf_print(&buffer, "%sdynamic error checking: %s\n",
+                      KMP_VERSION_PREF_STR,
+                      (__kmp_env_consistency_check ? "yes" : "no"));
+#ifdef KMP_DEBUG
+  for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) {
+    __kmp_str_buf_print(
+        &buffer, "%s%s barrier branch bits: gather=%u, release=%u\n",
+        KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i],
+        __kmp_barrier_gather_branch_bits[i],
+        __kmp_barrier_release_branch_bits[i]); // __kmp_str_buf_print
+  }; // for i
+  for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) {
+    __kmp_str_buf_print(
+        &buffer, "%s%s barrier pattern: gather=%s, release=%s\n",
+        KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i],
+        __kmp_barrier_pattern_name[__kmp_barrier_gather_pattern[i]],
+        __kmp_barrier_pattern_name
+            [__kmp_barrier_release_pattern[i]]); // __kmp_str_buf_print
+  }; // for i
+  __kmp_str_buf_print(&buffer, "%s\n",
+                      &__kmp_version_lock[KMP_VERSION_MAGIC_LEN]);
+#endif
+  __kmp_str_buf_print(
+      &buffer, "%sthread affinity support: %s\n", KMP_VERSION_PREF_STR,
+#if KMP_AFFINITY_SUPPORTED
+      (KMP_AFFINITY_CAPABLE()
+           ? (__kmp_affinity_type == affinity_none ? "not used" : "yes")
+           : "no")
+#else
+      "no"
+#endif
+          );
+  __kmp_printf("%s", buffer.str);
+  __kmp_str_buf_free(&buffer);
+  K_DIAG(1, ("KMP_VERSION is true\n"));
+#endif // KMP_STUB
 } // __kmp_print_version_1
 
-//
 // Called at parallel initialization time.
-//
 static int __kmp_version_2_printed = FALSE;
 
-void
-__kmp_print_version_2( void ) {
-    if ( __kmp_version_2_printed ) {
-        return;
-    }; // if
-    __kmp_version_2_printed = TRUE;
+void __kmp_print_version_2(void) {
+  if (__kmp_version_2_printed) {
+    return;
+  }; // if
+  __kmp_version_2_printed = TRUE;
 } // __kmp_print_version_2
 
 // end of file //

Modified: openmp/trunk/runtime/src/kmp_version.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_version.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_version.h (original)
+++ openmp/trunk/runtime/src/kmp_version.h Fri May 12 13:01:32 2017
@@ -17,31 +17,32 @@
 #define KMP_VERSION_H
 
 #ifdef __cplusplus
-    extern "C" {
+extern "C" {
 #endif // __cplusplus
 
 #ifndef KMP_VERSION_MAJOR
-    #error KMP_VERSION_MAJOR macro is not defined.
+#error KMP_VERSION_MAJOR macro is not defined.
 #endif
-#define KMP_VERSION_MINOR       0
-/*
-    Using "magic" prefix in all the version strings is rather convenient to get static version info
-    from binaries by using standard utilities "strings" and "grep", e. g.:
+#define KMP_VERSION_MINOR 0
+/* Using "magic" prefix in all the version strings is rather convenient to get
+   static version info from binaries by using standard utilities "strings" and
+   "grep", e. g.:
         $ strings libomp.so | grep "@(#)"
-    gives clean list of all version strings in the library. Leading zero helps to keep version
-    string separate from printable characters which may occurs just before version string.
-*/
-#define KMP_VERSION_MAGIC_STR   "\x00@(#) "
-#define KMP_VERSION_MAGIC_LEN   6                // Length of KMP_VERSION_MAGIC_STR.
-#define KMP_VERSION_PREF_STR    "Intel(R) OMP "
-#define KMP_VERSION_PREFIX      KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR
+   gives clean list of all version strings in the library. Leading zero helps
+   to keep version string separate from printable characters which may occurs
+   just before version string. */
+#define KMP_VERSION_MAGIC_STR "\x00@(#) "
+#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR.
+#define KMP_VERSION_PREF_STR "Intel(R) OMP "
+#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR
 
 /* declare all the version string constants for KMP_VERSION env. variable */
-extern int  const __kmp_version_major;
-extern int  const __kmp_version_minor;
-extern int  const __kmp_version_build;
-extern int  const __kmp_openmp_version;
-extern char const __kmp_copyright[];    // Old variable, kept for compatibility with ITC and ITP.
+extern int const __kmp_version_major;
+extern int const __kmp_version_minor;
+extern int const __kmp_version_build;
+extern int const __kmp_openmp_version;
+extern char const
+    __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP.
 extern char const __kmp_version_copyright[];
 extern char const __kmp_version_lib_ver[];
 extern char const __kmp_version_lib_type[];
@@ -58,11 +59,11 @@ extern char const __kmp_version_ftnstdca
 extern char const __kmp_version_ftncdecl[];
 extern char const __kmp_version_ftnextra[];
 
-void __kmp_print_version_1( void );
-void __kmp_print_version_2( void );
+void __kmp_print_version_1(void);
+void __kmp_print_version_2(void);
 
 #ifdef __cplusplus
-    } // extern "C"
+} // extern "C"
 #endif // __cplusplus
 
 #endif /* KMP_VERSION_H */

Modified: openmp/trunk/runtime/src/kmp_wait_release.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.cpp (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.cpp Fri May 12 13:01:32 2017
@@ -14,13 +14,10 @@
 
 #include "kmp_wait_release.h"
 
-void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin
-                   USE_ITT_BUILD_ARG(void * itt_sync_obj) )
-{
-    __kmp_wait_template(this_thr, flag, final_spin
-                        USE_ITT_BUILD_ARG(itt_sync_obj) );
+void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag,
+                   int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+  __kmp_wait_template(this_thr, flag,
+                      final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
 }
 
-void __kmp_release_64(kmp_flag_64 *flag) {
-    __kmp_release_template(flag);
-}
+void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); }

Modified: openmp/trunk/runtime/src/kmp_wait_release.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wait_release.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wait_release.h (original)
+++ openmp/trunk/runtime/src/kmp_wait_release.h Fri May 12 13:01:32 2017
@@ -24,8 +24,8 @@
 @defgroup WAIT_RELEASE Wait/Release operations
 
 The definitions and functions here implement the lowest level thread
-synchronizations of suspending a thread and awaking it. They are used
-to build higher level operations such as barriers and fork/join.
+synchronizations of suspending a thread and awaking it. They are used to build
+higher level operations such as barriers and fork/join.
 */
 
 /*!
@@ -37,581 +37,647 @@ to build higher level operations such as
  * The flag_type describes the storage used for the flag.
  */
 enum flag_type {
-    flag32,        /**< 32 bit flags */
-    flag64,        /**< 64 bit flags */
-    flag_oncore    /**< special 64-bit flag for on-core barrier (hierarchical) */
+  flag32, /**< 32 bit flags */
+  flag64, /**< 64 bit flags */
+  flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
 };
 
 /*!
  * Base class for wait/release volatile flag
  */
-template <typename P>
-class kmp_flag {
-    volatile P * loc;  /**< Pointer to the flag storage that is modified by another thread */
-    flag_type t;       /**< "Type" of the flag in loc */
- public:
-    typedef P flag_t;
-    kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
-    /*!
-     * @result the pointer to the actual flag
-     */
-    volatile P * get() { return loc; }
-    /*!
-     * @param new_loc in   set loc to point at new_loc
-     */
-    void set(volatile P *new_loc) { loc = new_loc; }
-    /*!
-     * @result the flag_type
-     */
-    flag_type get_type() { return t; }
-    // Derived classes must provide the following:
-    /*
-    kmp_info_t * get_waiter(kmp_uint32 i);
-    kmp_uint32 get_num_waiters();
-    bool done_check();
-    bool done_check_val(P old_loc);
-    bool notdone_check();
-    P internal_release();
-    void suspend(int th_gtid);
-    void resume(int th_gtid);
-    P set_sleeping();
-    P unset_sleeping();
-    bool is_sleeping();
-    bool is_any_sleeping();
-    bool is_sleeping_val(P old_loc);
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained);
-    */
+template <typename P> class kmp_flag {
+  volatile P
+    *loc; /**< Pointer to the flag storage that is modified by another thread
+             */
+  flag_type t; /**< "Type" of the flag in loc */
+public:
+  typedef P flag_t;
+  kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
+  /*!
+   * @result the pointer to the actual flag
+   */
+  volatile P *get() { return loc; }
+  /*!
+   * @param new_loc in   set loc to point at new_loc
+   */
+  void set(volatile P *new_loc) { loc = new_loc; }
+  /*!
+   * @result the flag_type
+   */
+  flag_type get_type() { return t; }
+  // Derived classes must provide the following:
+  /*
+  kmp_info_t * get_waiter(kmp_uint32 i);
+  kmp_uint32 get_num_waiters();
+  bool done_check();
+  bool done_check_val(P old_loc);
+  bool notdone_check();
+  P internal_release();
+  void suspend(int th_gtid);
+  void resume(int th_gtid);
+  P set_sleeping();
+  P unset_sleeping();
+  bool is_sleeping();
+  bool is_any_sleeping();
+  bool is_sleeping_val(P old_loc);
+  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+                    int *thread_finished
+                    USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
+                    is_constrained);
+  */
 };
 
-/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
-   must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks!  */
+/* Spin wait loop that first does pause, then yield, then sleep. A thread that
+   calls __kmp_wait_*  must make certain that another thread calls __kmp_release
+   to wake it back up to prevent deadlocks!  */
 template <class C>
 static inline void
-__kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
-                    USE_ITT_BUILD_ARG(void * itt_sync_obj) )
-{
-    // NOTE: We may not belong to a team at this point.
-    volatile typename C::flag_t *spin = flag->get();
-    kmp_uint32 spins;
-    kmp_uint32 hibernate;
-    int th_gtid;
-    int tasks_completed = FALSE;
-    int oversubscribed;
-#if ! KMP_USE_MONITOR
-    kmp_uint64 poll_count;
-    kmp_uint64 hibernate_goal;
-#endif
-
-    KMP_FSYNC_SPIN_INIT(spin, NULL);
-    if (flag->done_check()) {
-        KMP_FSYNC_SPIN_ACQUIRED(spin);
-        return;
-    }
-    th_gtid = this_thr->th.th_info.ds.ds_gtid;
-    KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
+__kmp_wait_template(kmp_info_t *this_thr, C *flag,
+                    int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+  // NOTE: We may not belong to a team at this point.
+  volatile typename C::flag_t *spin = flag->get();
+  kmp_uint32 spins;
+  kmp_uint32 hibernate;
+  int th_gtid;
+  int tasks_completed = FALSE;
+  int oversubscribed;
+#if !KMP_USE_MONITOR
+  kmp_uint64 poll_count;
+  kmp_uint64 hibernate_goal;
+#endif
+
+  KMP_FSYNC_SPIN_INIT(spin, NULL);
+  if (flag->done_check()) {
+    KMP_FSYNC_SPIN_ACQUIRED(spin);
+    return;
+  }
+  th_gtid = this_thr->th.th_info.ds.ds_gtid;
+  KA_TRACE(20,
+           ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
 #if KMP_STATS_ENABLED
-    stats_state_e thread_state = KMP_GET_THREAD_STATE();
+  stats_state_e thread_state = KMP_GET_THREAD_STATE();
 #endif
 
 #if OMPT_SUPPORT && OMPT_BLAME
-    ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
-    if (ompt_enabled &&
-        ompt_state != ompt_state_undefined) {
-        if (ompt_state == ompt_state_idle) {
-            if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
-                ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
-            }
-        } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
-            KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
-                             ompt_state == ompt_state_wait_barrier_implicit ||
-                             ompt_state == ompt_state_wait_barrier_explicit);
-
-            ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
-            ompt_parallel_id_t pId;
-            ompt_task_id_t tId;
-            if (team){
-                pId = team->ompt_team_info.parallel_id;
-                tId = team->ompt_task_info.task_id;
-            } else {
-                pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
-                tId = this_thr->th.th_current_task->ompt_task_info.task_id;
-            }
-            ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
-        }
+  ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;
+  if (ompt_enabled && ompt_state != ompt_state_undefined) {
+    if (ompt_state == ompt_state_idle) {
+      if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
+        ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1);
+      }
+    } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) {
+      KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
+                       ompt_state == ompt_state_wait_barrier_implicit ||
+                       ompt_state == ompt_state_wait_barrier_explicit);
+
+      ompt_lw_taskteam_t *team =
+          this_thr->th.th_team->t.ompt_serialized_team_info;
+      ompt_parallel_id_t pId;
+      ompt_task_id_t tId;
+      if (team) {
+        pId = team->ompt_team_info.parallel_id;
+        tId = team->ompt_task_info.task_id;
+      } else {
+        pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
+        tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+      }
+      ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId);
     }
+  }
 #endif
 
-    // Setup for waiting
-    KMP_INIT_YIELD(spins);
+  // Setup for waiting
+  KMP_INIT_YIELD(spins);
 
-    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
 #if KMP_USE_MONITOR
-        // The worker threads cannot rely on the team struct existing at this point.
-        // Use the bt values cached in the thread struct instead.
+// The worker threads cannot rely on the team struct existing at this point.
+// Use the bt values cached in the thread struct instead.
 #ifdef KMP_ADJUST_BLOCKTIME
-        if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
-            // Force immediate suspend if not set by user and more threads than available procs
-            hibernate = 0;
-        else
-            hibernate = this_thr->th.th_team_bt_intervals;
+    if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
+      // Force immediate suspend if not set by user and more threads than
+      // available procs
+      hibernate = 0;
+    else
+      hibernate = this_thr->th.th_team_bt_intervals;
 #else
-        hibernate = this_thr->th.th_team_bt_intervals;
+    hibernate = this_thr->th.th_team_bt_intervals;
 #endif /* KMP_ADJUST_BLOCKTIME */
 
-        /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
-           of the specified #intervals, plus up to one interval more.  This increment make
-           certain that this thread doesn't go to sleep too soon.  */
-        if (hibernate != 0)
-            hibernate++;
-
-        // Add in the current time value.
-        hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
-        KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
-                      th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
-                      hibernate - __kmp_global.g.g_time.dt.t_value));
+    /* If the blocktime is nonzero, we want to make sure that we spin wait for
+       the entirety of the specified #intervals, plus up to one interval more.
+       This increment make certain that this thread doesn't go to sleep too
+       soon.  */
+    if (hibernate != 0)
+      hibernate++;
+
+    // Add in the current time value.
+    hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
+    KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
+                  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
+                  hibernate - __kmp_global.g.g_time.dt.t_value));
 #else
-        hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
-        poll_count = 0;
+    hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
+    poll_count = 0;
 #endif // KMP_USE_MONITOR
-    }
-
-    oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
-    KMP_MB();
+  }
 
-    // Main wait spin loop
-    while (flag->notdone_check()) {
-        int in_pool;
-        kmp_task_team_t * task_team = NULL;
-        if (__kmp_tasking_mode != tskm_immediate_exec) {
-            task_team = this_thr->th.th_task_team;
-            /* If the thread's task team pointer is NULL, it means one of 3 things:
-	       1) A newly-created thread is first being released by __kmp_fork_barrier(), and
-	          its task team has not been set up yet.
-	       2) All tasks have been executed to completion.
-	       3) Tasking is off for this region.  This could be because we are in a serialized region
-	          (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0).  */
-            if (task_team != NULL) {
-                if (TCR_SYNC_4(task_team->tt.tt_active)) {
-                    if (KMP_TASKING_ENABLED(task_team))
-                        flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
-                                            USE_ITT_BUILD_ARG(itt_sync_obj), 0);
-                    else
-                        this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
-                }
-                else {
-                    KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
-                    this_thr->th.th_task_team = NULL;
-                    this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
-                }
-            } else {
-                this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
-            } // if
-        } // if
-
-        KMP_FSYNC_SPIN_PREPARE(spin);
-        if (TCR_4(__kmp_global.g.g_done)) {
-            if (__kmp_global.g.g_abort)
-                __kmp_abort_thread();
-            break;
-        }
+  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
+  KMP_MB();
 
-        // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
-        KMP_YIELD(oversubscribed);
-        // TODO: Should it be number of cores instead of thread contexts? Like:
-        // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
-        // Need performance improvement data to make the change...
-        KMP_YIELD_SPIN(spins);
-
-        // Check if this thread was transferred from a team
-        // to the thread pool (or vice-versa) while spinning.
-        in_pool = !!TCR_4(this_thr->th.th_in_pool);
-        if (in_pool != !!this_thr->th.th_active_in_pool) {
-            if (in_pool) { // Recently transferred from team to pool
-                KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
-                this_thr->th.th_active_in_pool = TRUE;
-                /* Here, we cannot assert that:
-                   KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
-                   __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
-                   lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
-                   by the workers.  The two can get out of sync for brief periods of time.  */
-            }
-            else { // Recently transferred from pool to team
-                KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
-                KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
-                this_thr->th.th_active_in_pool = FALSE;
-            }
-        }
+  // Main wait spin loop
+  while (flag->notdone_check()) {
+    int in_pool;
+    kmp_task_team_t *task_team = NULL;
+    if (__kmp_tasking_mode != tskm_immediate_exec) {
+      task_team = this_thr->th.th_task_team;
+      /* If the thread's task team pointer is NULL, it means one of 3 things:
+         1) A newly-created thread is first being released by
+         __kmp_fork_barrier(), and its task team has not been set up yet.
+         2) All tasks have been executed to completion.
+         3) Tasking is off for this region.  This could be because we are in a
+         serialized region (perhaps the outer one), or else tasking was manually
+         disabled (KMP_TASKING=0).  */
+      if (task_team != NULL) {
+        if (TCR_SYNC_4(task_team->tt.tt_active)) {
+          if (KMP_TASKING_ENABLED(task_team))
+            flag->execute_tasks(
+                this_thr, th_gtid, final_spin,
+                &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
+          else
+            this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+        } else {
+          KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
+          this_thr->th.th_task_team = NULL;
+          this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+        }
+      } else {
+        this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
+      } // if
+    } // if
+
+    KMP_FSYNC_SPIN_PREPARE(spin);
+    if (TCR_4(__kmp_global.g.g_done)) {
+      if (__kmp_global.g.g_abort)
+        __kmp_abort_thread();
+      break;
+    }
+
+    // If we are oversubscribed, or have waited a bit (and
+    // KMP_LIBRARY=throughput), then yield
+    KMP_YIELD(oversubscribed);
+    // TODO: Should it be number of cores instead of thread contexts? Like:
+    // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
+    // Need performance improvement data to make the change...
+    KMP_YIELD_SPIN(spins);
+    // Check if this thread was transferred from a team
+    // to the thread pool (or vice-versa) while spinning.
+    in_pool = !!TCR_4(this_thr->th.th_in_pool);
+    if (in_pool != !!this_thr->th.th_active_in_pool) {
+      if (in_pool) { // Recently transferred from team to pool
+        KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
+        this_thr->th.th_active_in_pool = TRUE;
+        /* Here, we cannot assert that:
+           KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <=
+           __kmp_thread_pool_nth);
+           __kmp_thread_pool_nth is inc/dec'd by the master thread while the
+           fork/join lock is held, whereas __kmp_thread_pool_active_nth is
+           inc/dec'd asynchronously by the workers. The two can get out of sync
+           for brief periods of time.  */
+      } else { // Recently transferred from pool to team
+        KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
+        KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
+        this_thr->th.th_active_in_pool = FALSE;
+      }
+    }
 
 #if KMP_STATS_ENABLED
-        // Check if thread has been signalled to idle state
-        // This indicates that the logical "join-barrier" has finished
-        if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
-            KMP_SET_THREAD_STATE(IDLE);
-            KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
-        }
+    // Check if thread has been signalled to idle state
+    // This indicates that the logical "join-barrier" has finished
+    if (this_thr->th.th_stats->isIdle() &&
+        KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
+      KMP_SET_THREAD_STATE(IDLE);
+      KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
+    }
 #endif
 
-        // Don't suspend if KMP_BLOCKTIME is set to "infinite"
-        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
-            continue;
-
-        // Don't suspend if there is a likelihood of new tasks being spawned.
-        if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
-            continue;
+    // Don't suspend if KMP_BLOCKTIME is set to "infinite"
+    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
+      continue;
+
+    // Don't suspend if there is a likelihood of new tasks being spawned.
+    if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
+      continue;
 
 #if KMP_USE_MONITOR
-        // If we have waited a bit more, fall asleep
-        if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
-            continue;
+    // If we have waited a bit more, fall asleep
+    if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
+      continue;
 #else
-        if (KMP_BLOCKING(hibernate_goal, poll_count++))
-            continue;
+    if (KMP_BLOCKING(hibernate_goal, poll_count++))
+      continue;
 #endif
 
-        KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
-
-        flag->suspend(th_gtid);
+    KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
+    flag->suspend(th_gtid);
 
-        if (TCR_4(__kmp_global.g.g_done)) {
-            if (__kmp_global.g.g_abort)
-                __kmp_abort_thread();
-            break;
-        }
-        else if (__kmp_tasking_mode != tskm_immediate_exec
-                 && this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
-            this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
-        }
-        // TODO: If thread is done with work and times out, disband/free
+    if (TCR_4(__kmp_global.g.g_done)) {
+      if (__kmp_global.g.g_abort)
+        __kmp_abort_thread();
+      break;
+    } else if (__kmp_tasking_mode != tskm_immediate_exec &&
+               this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
+      this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
     }
+    // TODO: If thread is done with work and times out, disband/free
+  }
 
 #if OMPT_SUPPORT && OMPT_BLAME
-    if (ompt_enabled &&
-        ompt_state != ompt_state_undefined) {
-        if (ompt_state == ompt_state_idle) {
-            if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
-                ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
-            }
-        } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
-            KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
-                             ompt_state == ompt_state_wait_barrier_implicit ||
-                             ompt_state == ompt_state_wait_barrier_explicit);
-
-            ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info;
-            ompt_parallel_id_t pId;
-            ompt_task_id_t tId;
-            if (team){
-                pId = team->ompt_team_info.parallel_id;
-                tId = team->ompt_task_info.task_id;
-            } else {
-                pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
-                tId = this_thr->th.th_current_task->ompt_task_info.task_id;
-            }
-            ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
-        }
+  if (ompt_enabled && ompt_state != ompt_state_undefined) {
+    if (ompt_state == ompt_state_idle) {
+      if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
+        ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1);
+      }
+    } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) {
+      KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier ||
+                       ompt_state == ompt_state_wait_barrier_implicit ||
+                       ompt_state == ompt_state_wait_barrier_explicit);
+
+      ompt_lw_taskteam_t *team =
+          this_thr->th.th_team->t.ompt_serialized_team_info;
+      ompt_parallel_id_t pId;
+      ompt_task_id_t tId;
+      if (team) {
+        pId = team->ompt_team_info.parallel_id;
+        tId = team->ompt_task_info.task_id;
+      } else {
+        pId = this_thr->th.th_team->t.ompt_team_info.parallel_id;
+        tId = this_thr->th.th_current_task->ompt_task_info.task_id;
+      }
+      ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId);
     }
+  }
 #endif
 #if KMP_STATS_ENABLED
-    // If we were put into idle state, pop that off the state stack
-    if (KMP_GET_THREAD_STATE() == IDLE) {
-        KMP_POP_PARTITIONED_TIMER();
-        KMP_SET_THREAD_STATE(thread_state);
-        this_thr->th.th_stats->resetIdleFlag();
-    }
+  // If we were put into idle state, pop that off the state stack
+  if (KMP_GET_THREAD_STATE() == IDLE) {
+    KMP_POP_PARTITIONED_TIMER();
+    KMP_SET_THREAD_STATE(thread_state);
+    this_thr->th.th_stats->resetIdleFlag();
+  }
 #endif
 
-    KMP_FSYNC_SPIN_ACQUIRED(spin);
+  KMP_FSYNC_SPIN_ACQUIRED(spin);
 }
 
-/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
-   if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
-   up the potentially sleeping thread and prevent deadlocks!  */
-template <class C>
-static inline void
-__kmp_release_template(C *flag)
-{
+/* Release any threads specified as waiting on the flag by releasing the flag
+   and resume the waiting thread if indicated by the sleep bit(s). A thread that
+   calls __kmp_wait_template must call this function to wake up the potentially
+   sleeping thread and prevent deadlocks!  */
+template <class C> static inline void __kmp_release_template(C *flag) {
 #ifdef KMP_DEBUG
-    int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
+  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
 #endif
-    KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
-    KMP_DEBUG_ASSERT(flag->get());
-    KMP_FSYNC_RELEASING(flag->get());
-
-    flag->internal_release();
-
-    KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get())));
-
-    if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
-        // Only need to check sleep stuff if infinite block time not set
-        if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping?
-            for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
-                kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag
-                if (waiter) {
-                    int wait_gtid = waiter->th.th_info.ds.ds_gtid;
-                    // Wake up thread if needed
-                    KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n",
-                                  gtid, wait_gtid, flag->get()));
-                    flag->resume(wait_gtid); // unsets flag's current_waiter when done
-                }
-            }
+  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
+  KMP_DEBUG_ASSERT(flag->get());
+  KMP_FSYNC_RELEASING(flag->get());
+
+  flag->internal_release();
+
+  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
+                 *(flag->get())));
+
+  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
+    // Only need to check sleep stuff if infinite block time not set.
+    // Are *any* threads waiting on flag sleeping?
+    if (flag->is_any_sleeping()) {
+      for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
+        // if sleeping waiter exists at i, sets current_waiter to i inside flag
+        kmp_info_t *waiter = flag->get_waiter(i);
+        if (waiter) {
+          int wait_gtid = waiter->th.th_info.ds.ds_gtid;
+          // Wake up thread if needed
+          KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
+                        "flag(%p) set\n",
+                        gtid, wait_gtid, flag->get()));
+          flag->resume(wait_gtid); // unsets flag's current_waiter when done
         }
+      }
     }
+  }
 }
 
-template <typename FlagType>
-struct flag_traits {};
+template <typename FlagType> struct flag_traits {};
 
-template <>
-struct flag_traits<kmp_uint32> {
-    typedef kmp_uint32 flag_t;
-    static const flag_type t = flag32;
-    static inline flag_t tcr(flag_t f) { return TCR_4(f); }
-    static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
-    static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
-    static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
+template <> struct flag_traits<kmp_uint32> {
+  typedef kmp_uint32 flag_t;
+  static const flag_type t = flag32;
+  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
+  static inline flag_t test_then_add4(volatile flag_t *f) {
+    return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f);
+  }
+  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+    return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v);
+  }
+  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+    return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v);
+  }
 };
 
-template <>
-struct flag_traits<kmp_uint64> {
-    typedef kmp_uint64 flag_t;
-    static const flag_type t = flag64;
-    static inline flag_t tcr(flag_t f) { return TCR_8(f); }
-    static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
-    static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
-    static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
+template <> struct flag_traits<kmp_uint64> {
+  typedef kmp_uint64 flag_t;
+  static const flag_type t = flag64;
+  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
+  static inline flag_t test_then_add4(volatile flag_t *f) {
+    return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f);
+  }
+  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
+    return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v);
+  }
+  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
+    return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v);
+  }
 };
 
-template <typename FlagType>
-class kmp_basic_flag : public kmp_flag<FlagType> {
-    typedef flag_traits<FlagType> traits_type;
-    FlagType checker;  /**< Value to compare flag to to check if flag has been released. */
-    kmp_info_t * waiting_threads[1];  /**< Array of threads sleeping on this thread. */
-    kmp_uint32 num_waiting_threads;       /**< Number of threads sleeping on this thread. */
- public:
-    kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
-    kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
-        waiting_threads[0] = thr;
-    }
-    kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
-    /*!
-     * param i in   index into waiting_threads
-     * @result the thread that is waiting at index i
-     */
-    kmp_info_t * get_waiter(kmp_uint32 i) {
-        KMP_DEBUG_ASSERT(i<num_waiting_threads);
-        return waiting_threads[i];
-    }
-    /*!
-     * @result num_waiting_threads
-     */
-    kmp_uint32 get_num_waiters() { return num_waiting_threads; }
-    /*!
-     * @param thr in   the thread which is now waiting
-     *
-     * Insert a waiting thread at index 0.
-     */
-    void set_waiter(kmp_info_t *thr) {
-        waiting_threads[0] = thr;
-        num_waiting_threads = 1;
-    }
-    /*!
-     * @result true if the flag object has been released.
-     */
-    bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
-    /*!
-     * @param old_loc in   old value of flag
-     * @result true if the flag's old value indicates it was released.
-     */
-    bool done_check_val(FlagType old_loc) { return old_loc == checker; }
-    /*!
-     * @result true if the flag object is not yet released.
-     * Used in __kmp_wait_template like:
-     * @code
-     * while (flag.notdone_check()) { pause(); }
-     * @endcode
-     */
-    bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
-    /*!
-     * @result Actual flag value before release was applied.
-     * Trigger all waiting threads to run by modifying flag to release state.
-     */
-    void internal_release() {
-        (void) traits_type::test_then_add4((volatile FlagType *)this->get());
-    }
-    /*!
-     * @result Actual flag value before sleep bit(s) set.
-     * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s).
-     */
-    FlagType set_sleeping() {
-        return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
-    }
-    /*!
-     * @result Actual flag value before sleep bit(s) cleared.
-     * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s).
-     */
-    FlagType unset_sleeping() {
-        return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
-    }
-    /*!
-     * @param old_loc in   old value of flag
-     * Test whether there are threads sleeping on the flag's old value in old_loc.
-     */
-    bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
-    /*!
-     * Test whether there are threads sleeping on the flag.
-     */
-    bool is_sleeping() { return is_sleeping_val(*(this->get())); }
-    bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
-    kmp_uint8 *get_stolen() { return NULL; }
-    enum barrier_type get_bt() { return bs_last_barrier; }
+template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
+  typedef flag_traits<FlagType> traits_type;
+  FlagType checker; /**< Value to compare flag to to check if flag has been
+                       released. */
+  kmp_info_t
+      *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
+  kmp_uint32
+      num_waiting_threads; /**< Number of threads sleeping on this thread. */
+public:
+  kmp_basic_flag(volatile FlagType *p)
+      : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
+  kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr)
+      : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
+    waiting_threads[0] = thr;
+  }
+  kmp_basic_flag(volatile FlagType *p, FlagType c)
+      : kmp_flag<FlagType>(p, traits_type::t), checker(c),
+        num_waiting_threads(0) {}
+  /*!
+   * param i in   index into waiting_threads
+   * @result the thread that is waiting at index i
+   */
+  kmp_info_t *get_waiter(kmp_uint32 i) {
+    KMP_DEBUG_ASSERT(i < num_waiting_threads);
+    return waiting_threads[i];
+  }
+  /*!
+   * @result num_waiting_threads
+   */
+  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+  /*!
+   * @param thr in   the thread which is now waiting
+   *
+   * Insert a waiting thread at index 0.
+   */
+  void set_waiter(kmp_info_t *thr) {
+    waiting_threads[0] = thr;
+    num_waiting_threads = 1;
+  }
+  /*!
+   * @result true if the flag object has been released.
+   */
+  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
+  /*!
+   * @param old_loc in   old value of flag
+   * @result true if the flag's old value indicates it was released.
+   */
+  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
+  /*!
+   * @result true if the flag object is not yet released.
+   * Used in __kmp_wait_template like:
+   * @code
+   * while (flag.notdone_check()) { pause(); }
+   * @endcode
+   */
+  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
+  /*!
+   * @result Actual flag value before release was applied.
+   * Trigger all waiting threads to run by modifying flag to release state.
+   */
+  void internal_release() {
+    (void)traits_type::test_then_add4((volatile FlagType *)this->get());
+  }
+  /*!
+   * @result Actual flag value before sleep bit(s) set.
+   * Notes that there is at least one thread sleeping on the flag by setting
+   * sleep bit(s).
+   */
+  FlagType set_sleeping() {
+    return traits_type::test_then_or((volatile FlagType *)this->get(),
+                                     KMP_BARRIER_SLEEP_STATE);
+  }
+  /*!
+   * @result Actual flag value before sleep bit(s) cleared.
+   * Notes that there are no longer threads sleeping on the flag by clearing
+   * sleep bit(s).
+   */
+  FlagType unset_sleeping() {
+    return traits_type::test_then_and((volatile FlagType *)this->get(),
+                                      ~KMP_BARRIER_SLEEP_STATE);
+  }
+  /*!
+   * @param old_loc in   old value of flag
+   * Test whether there are threads sleeping on the flag's old value in old_loc.
+   */
+  bool is_sleeping_val(FlagType old_loc) {
+    return old_loc & KMP_BARRIER_SLEEP_STATE;
+  }
+  /*!
+   * Test whether there are threads sleeping on the flag.
+   */
+  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
+  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
+  kmp_uint8 *get_stolen() { return NULL; }
+  enum barrier_type get_bt() { return bs_last_barrier; }
 };
 
 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
- public:
-    kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
-    kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
-    kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
-    void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
-    void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
-        return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
-                                      USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-    }
-    void wait(kmp_info_t *this_thr, int final_spin
-              USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
-        __kmp_wait_template(this_thr, this, final_spin
-                            USE_ITT_BUILD_ARG(itt_sync_obj));
-    }
-    void release() { __kmp_release_template(this); }
-    flag_type get_ptr_type() { return flag32; }
+public:
+  kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
+  kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr)
+      : kmp_basic_flag<kmp_uint32>(p, thr) {}
+  kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c)
+      : kmp_basic_flag<kmp_uint32>(p, c) {}
+  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
+  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
+  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+                    kmp_int32 is_constrained) {
+    return __kmp_execute_tasks_32(
+        this_thr, gtid, this, final_spin,
+        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+  }
+  void wait(kmp_info_t *this_thr,
+            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+    __kmp_wait_template(this_thr, this,
+                        final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+  }
+  void release() { __kmp_release_template(this); }
+  flag_type get_ptr_type() { return flag32; }
 };
 
 class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
- public:
-    kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
-    kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
-    kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
-    void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
-    void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
-        return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
-                                      USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-    }
-    void wait(kmp_info_t *this_thr, int final_spin
-              USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
-        __kmp_wait_template(this_thr, this, final_spin
-                            USE_ITT_BUILD_ARG(itt_sync_obj));
-    }
-    void release() { __kmp_release_template(this); }
-    flag_type get_ptr_type() { return flag64; }
+public:
+  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
+  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
+      : kmp_basic_flag<kmp_uint64>(p, thr) {}
+  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
+      : kmp_basic_flag<kmp_uint64>(p, c) {}
+  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
+  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
+  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+                    kmp_int32 is_constrained) {
+    return __kmp_execute_tasks_64(
+        this_thr, gtid, this, final_spin,
+        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+  }
+  void wait(kmp_info_t *this_thr,
+            int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
+    __kmp_wait_template(this_thr, this,
+                        final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+  }
+  void release() { __kmp_release_template(this); }
+  flag_type get_ptr_type() { return flag64; }
 };
 
 // Hierarchical 64-bit on-core barrier instantiation
 class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
-    kmp_uint64 checker;
-    kmp_info_t * waiting_threads[1];
-    kmp_uint32 num_waiting_threads;
-    kmp_uint32 offset;      /**< Portion of flag that is of interest for an operation. */
-    bool flag_switch;       /**< Indicates a switch in flag location. */
-    enum barrier_type bt;   /**< Barrier type. */
-    kmp_info_t * this_thr;  /**< Thread that may be redirected to different flag location. */
+  kmp_uint64 checker;
+  kmp_info_t *waiting_threads[1];
+  kmp_uint32 num_waiting_threads;
+  kmp_uint32
+      offset; /**< Portion of flag that is of interest for an operation. */
+  bool flag_switch; /**< Indicates a switch in flag location. */
+  enum barrier_type bt; /**< Barrier type. */
+  kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
+                           location. */
 #if USE_ITT_BUILD
-    void *itt_sync_obj;     /**< ITT object that must be passed to new flag location. */
+  void *
+      itt_sync_obj; /**< ITT object that must be passed to new flag location. */
 #endif
-    unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; }
+  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
+    return ((unsigned char *)loc)[offset];
+  }
+
 public:
-    kmp_flag_oncore(volatile kmp_uint64 *p)
-        : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
-    kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
-        : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {}
-    kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
-                    kmp_info_t * thr
+  kmp_flag_oncore(volatile kmp_uint64 *p)
+      : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
+        flag_switch(false) {}
+  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
+      : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
+        offset(idx), flag_switch(false) {}
+  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
+                  enum barrier_type bar_t, kmp_info_t *thr
 #if USE_ITT_BUILD
-                    , void *itt
+                  ,
+                  void *itt
 #endif
-                    )
-        : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx),
-          flag_switch(false), bt(bar_t), this_thr(thr)
+                  )
+      : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c),
+        num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
+        this_thr(thr)
 #if USE_ITT_BUILD
-        , itt_sync_obj(itt)
+        ,
+        itt_sync_obj(itt)
 #endif
-        {}
-    kmp_info_t * get_waiter(kmp_uint32 i) {
-        KMP_DEBUG_ASSERT(i<num_waiting_threads);
-        return waiting_threads[i];
-    }
-    kmp_uint32 get_num_waiters() { return num_waiting_threads; }
-    void set_waiter(kmp_info_t *thr) {
-        waiting_threads[0] = thr;
-        num_waiting_threads = 1;
-    }
-    bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
-    bool done_check() { return done_check_val(*get()); }
-    bool notdone_check() {
-        // Calculate flag_switch
-        if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
-            flag_switch = true;
-        if (byteref(get(),offset) != 1 && !flag_switch)
-            return true;
-        else if (flag_switch) {
-            this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
-            kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
-            __kmp_wait_64(this_thr, &flag, TRUE
+  {
+  }
+  kmp_info_t *get_waiter(kmp_uint32 i) {
+    KMP_DEBUG_ASSERT(i < num_waiting_threads);
+    return waiting_threads[i];
+  }
+  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
+  void set_waiter(kmp_info_t *thr) {
+    waiting_threads[0] = thr;
+    num_waiting_threads = 1;
+  }
+  bool done_check_val(kmp_uint64 old_loc) {
+    return byteref(&old_loc, offset) == checker;
+  }
+  bool done_check() { return done_check_val(*get()); }
+  bool notdone_check() {
+    // Calculate flag_switch
+    if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
+      flag_switch = true;
+    if (byteref(get(), offset) != 1 && !flag_switch)
+      return true;
+    else if (flag_switch) {
+      this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
+      kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
+                       (kmp_uint64)KMP_BARRIER_STATE_BUMP);
+      __kmp_wait_64(this_thr, &flag, TRUE
 #if USE_ITT_BUILD
-                          , itt_sync_obj
+                    ,
+                    itt_sync_obj
 #endif
-                          );
-        }
-        return false;
-    }
-    void internal_release() {
-        if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
-            byteref(get(),offset) = 1;
-        }
-        else {
-            kmp_uint64 mask=0;
-            byteref(&mask,offset) = 1;
-            (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
-        }
+                    );
     }
-    kmp_uint64 set_sleeping() {
-        return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
-    }
-    kmp_uint64 unset_sleeping() {
-        return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
-    }
-    bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
-    bool is_sleeping() { return is_sleeping_val(*get()); }
-    bool is_any_sleeping() { return is_sleeping_val(*get()); }
-    void wait(kmp_info_t *this_thr, int final_spin) {
-        __kmp_wait_template<kmp_flag_oncore>(this_thr, this, final_spin
-                            USE_ITT_BUILD_ARG(itt_sync_obj));
-    }
-    void release() { __kmp_release_template(this); }
-    void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
-    void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
-    int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
-                      USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
-        return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
-                                          USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
-    }
-    kmp_uint8 *get_stolen() { return NULL; }
-    enum barrier_type get_bt() { return bt; }
-    flag_type get_ptr_type() { return flag_oncore; }
+    return false;
+  }
+  void internal_release() {
+    if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
+      byteref(get(), offset) = 1;
+    } else {
+      kmp_uint64 mask = 0;
+      byteref(&mask, offset) = 1;
+      (void)KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
+    }
+  }
+  kmp_uint64 set_sleeping() {
+    return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(),
+                              KMP_BARRIER_SLEEP_STATE);
+  }
+  kmp_uint64 unset_sleeping() {
+    return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(),
+                               ~KMP_BARRIER_SLEEP_STATE);
+  }
+  bool is_sleeping_val(kmp_uint64 old_loc) {
+    return old_loc & KMP_BARRIER_SLEEP_STATE;
+  }
+  bool is_sleeping() { return is_sleeping_val(*get()); }
+  bool is_any_sleeping() { return is_sleeping_val(*get()); }
+  void wait(kmp_info_t *this_thr, int final_spin) {
+    __kmp_wait_template<kmp_flag_oncore>(
+        this_thr, this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
+  }
+  void release() { __kmp_release_template(this); }
+  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
+  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
+  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
+                    int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
+                    kmp_int32 is_constrained) {
+    return __kmp_execute_tasks_oncore(
+        this_thr, gtid, this, final_spin,
+        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+  }
+  kmp_uint8 *get_stolen() { return NULL; }
+  enum barrier_type get_bt() { return bt; }
+  flag_type get_ptr_type() { return flag_oncore; }
 };
 
-// Used to wake up threads, volatile void* flag is usually the th_sleep_loc associated
-// with int gtid.
+// Used to wake up threads, volatile void* flag is usually the th_sleep_loc
+// associated with int gtid.
 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
-    if (!flag) return;
+  if (!flag)
+    return;
 
-    switch (((kmp_flag_64 *)flag)->get_type()) {
-    case flag32: __kmp_resume_32(gtid, NULL); break;
-    case flag64: __kmp_resume_64(gtid, NULL); break;
-    case flag_oncore: __kmp_resume_oncore(gtid, NULL); break;
-    }
+  switch (((kmp_flag_64 *)flag)->get_type()) {
+  case flag32:
+    __kmp_resume_32(gtid, NULL);
+    break;
+  case flag64:
+    __kmp_resume_64(gtid, NULL);
+    break;
+  case flag_oncore:
+    __kmp_resume_oncore(gtid, NULL);
+    break;
+  }
 }
 
 /*!

Modified: openmp/trunk/runtime/src/kmp_wrapper_getpid.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wrapper_getpid.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wrapper_getpid.h (original)
+++ openmp/trunk/runtime/src/kmp_wrapper_getpid.h Fri May 12 13:01:32 2017
@@ -18,50 +18,52 @@
 
 #if KMP_OS_UNIX
 
-    // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers.
-    #include <sys/types.h>
-    #include <unistd.h>
-    #include <sys/syscall.h>
-    #if KMP_OS_DARWIN
-    //OS X
-    #define __kmp_gettid() syscall(SYS_thread_selfid)
-    #elif defined(SYS_gettid)
-    // Hopefully other Unix systems define SYS_gettid syscall for getting os thread id
-    #define __kmp_gettid() syscall(SYS_gettid)
-    #else
-    #warning No gettid found, use getpid instead
-    #define __kmp_gettid() getpid()
-    #endif
+// On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard
+// headers.
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#if KMP_OS_DARWIN
+// OS X
+#define __kmp_gettid() syscall(SYS_thread_selfid)
+#elif defined(SYS_gettid)
+// Hopefully other Unix systems define SYS_gettid syscall for getting os thread
+// id
+#define __kmp_gettid() syscall(SYS_gettid)
+#else
+#warning No gettid found, use getpid instead
+#define __kmp_gettid() getpid()
+#endif
 
 #elif KMP_OS_WINDOWS
 
-    // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h".
-    #include <process.h>
-    // Let us simulate Unix.
-    typedef int pid_t;
-    #define getpid _getpid
-    #define __kmp_gettid() GetCurrentThreadId()
+// On Windows* OS _getpid() returns int (not pid_t) and is declared in
+// "process.h".
+#include <process.h>
+// Let us simulate Unix.
+typedef int pid_t;
+#define getpid _getpid
+#define __kmp_gettid() GetCurrentThreadId()
 
 #else
 
-    #error Unknown or unsupported OS.
+#error Unknown or unsupported OS.
 
 #endif
 
-/*
-    TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good.
-    But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t
-    be longer that int?). It seems all pid prints should be rewritten as
-
-        printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid );
+/* TODO: All the libomp source code uses pid_t type for storing the result of
+   getpid(), it is good. But often it printed as "%d", that is not good, because
+   it ignores pid_t definition (may pid_t be longer that int?). It seems all pid
+   prints should be rewritten as:
 
-    or (at least) as
+   printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid );
 
-        printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid );
+   or (at least) as
 
-    (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".)
+   printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid );
 
-*/
+   (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in
+   "kmp_os.h".)  */
 
 #endif // KMP_WRAPPER_GETPID_H
 

Modified: openmp/trunk/runtime/src/kmp_wrapper_malloc.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_wrapper_malloc.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_wrapper_malloc.h (original)
+++ openmp/trunk/runtime/src/kmp_wrapper_malloc.h Fri May 12 13:01:32 2017
@@ -17,21 +17,18 @@
 #ifndef KMP_WRAPPER_MALLOC_H
 #define KMP_WRAPPER_MALLOC_H
 
-/*
-    This header serves for 3 purposes:
-
-        1. Declaring standard memory allocation rourines in OS-independent way.
-        2. Passing source location info through memory allocation wrappers.
-        3. Enabling native memory debugging capabilities.
-
-
-    1. Declaring standard memory allocation rourines in OS-independent way.
-    -----------------------------------------------------------------------
-
-    On Linux* OS, alloca() function is declared in <alloca.h> header, while on Windows* OS there is no
-    <alloca.h> header, function _alloca() (note underscore!) is declared in <malloc.h>. This header
-    eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on
-    following routines:
+/* This header serves for 3 purposes:
+   1. Declaring standard memory allocation rourines in OS-independent way.
+   2. Passing source location info through memory allocation wrappers.
+   3. Enabling native memory debugging capabilities.
+
+   1. Declaring standard memory allocation rourines in OS-independent way.
+   -----------------------------------------------------------------------
+   On Linux* OS, alloca() function is declared in <alloca.h> header, while on
+   Windows* OS there is no <alloca.h> header, function _alloca() (note
+   underscore!) is declared in <malloc.h>. This header eliminates these
+   differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on
+   following routines:
 
         malloc
         calloc
@@ -39,60 +36,56 @@
         free
         alloca
 
-    in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently
-    it is available only on Windows* OS.)
-
-
-    2. Passing source location info through memory allocation wrappers.
-    -------------------------------------------------------------------
-
-    Some tools may help debugging memory errors, for example, report memory leaks. However, memory
-    allocation wrappers may hinder source location.
-
-    For example:
-
-        void * aligned_malloc( int size ) {
-            void * ptr = malloc( size ); // All the memory leaks will be reported at this line.
-            // some adjustments...
-            return ptr;
-        };
-
-        ptr = aligned_malloc( size );    // Memory leak will *not* be detected here. :-(
-
-    To overcome the problem, information about original source location should be passed through all
-    the memory allocation wrappers, for example:
-
-        void * aligned_malloc( int size, char const * file, int line ) {
-            void * ptr = _malloc_dbg( size, file, line );
-            // some adjustments...
-            return ptr;
-        };
-
-        void * ptr = aligned_malloc( size, __FILE__, __LINE__ );
-
-    This is a good idea for debug, but passing additional arguments impacts performance. Disabling
-    extra arguments in release version of the software introduces too many conditional compilation,
-    which makes code unreadable. This header defines few macros and functions facilitating it:
-
-        void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
-            void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
-            // some adjustments...
-            return ptr;
-        };
-        #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
-            // Use macro instead of direct call to function.
-
-        void * ptr = aligned_malloc( size );  // Bingo! Memory leak will be reported at this line.
-
-
-    3. Enabling native memory debugging capabilities.
-    -------------------------------------------------
-
-    Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft
-    RTL tracks all memory allocations and can report memory leaks. This header enables this, and
-    makes report more useful (see "Passing source location info through memory allocation
-    wrappers").
+   in OS-independent way. It also enables memory tracking capabilities in debug
+   build. (Currently it is available only on Windows* OS.)
 
+   2. Passing source location info through memory allocation wrappers.
+   -------------------------------------------------------------------
+   Some tools may help debugging memory errors, for example, report memory
+   leaks. However, memory allocation wrappers may hinder source location.
+   For example:
+
+   void * aligned_malloc( int size ) {
+     void * ptr = malloc( size ); // All the memory leaks will be reported at
+                                  // this line.
+     // some adjustments...
+     return ptr;
+   };
+
+   ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-(
+
+   To overcome the problem, information about original source location should
+   be passed through all the memory allocation wrappers, for example:
+
+   void * aligned_malloc( int size, char const * file, int line ) {
+     void * ptr = _malloc_dbg( size, file, line );
+     // some adjustments...
+     return ptr;
+   };
+   void * ptr = aligned_malloc( size, __FILE__, __LINE__ );
+
+   This is a good idea for debug, but passing additional arguments impacts
+   performance. Disabling extra arguments in release version of the software
+   introduces too many conditional compilation, which makes code unreadable.
+   This header defines few macros and functions facilitating it:
+
+   void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
+     void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
+     // some adjustments...
+     return ptr;
+   };
+   #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
+   // Use macro instead of direct call to function.
+
+   void * ptr = aligned_malloc( size );  // Bingo! Memory leak will be
+                                         // reported at this line.
+
+   3. Enabling native memory debugging capabilities.
+   -------------------------------------------------
+   Some platforms may offer memory debugging capabilities. For example, debug
+   version of Microsoft RTL tracks all memory allocations and can report memory
+   leaks. This header enables this, and makes report more useful (see "Passing
+   source location info through memory allocation wrappers").
 */
 
 #include <stdlib.h>
@@ -101,102 +94,101 @@
 
 // Include alloca() declaration.
 #if KMP_OS_WINDOWS
-    #include <malloc.h>        // Windows* OS: _alloca() declared in "malloc.h".
-    #define alloca _alloca     // Allow to use alloca() with no underscore.
+#include <malloc.h> // Windows* OS: _alloca() declared in "malloc.h".
+#define alloca _alloca // Allow to use alloca() with no underscore.
 #elif KMP_OS_FREEBSD || KMP_OS_NETBSD
-    // Declared in "stdlib.h".
+// Declared in "stdlib.h".
 #elif KMP_OS_UNIX
-    #include <alloca.h>        // Linux* OS and OS X*: alloc() declared in "alloca".
+#include <alloca.h> // Linux* OS and OS X*: alloc() declared in "alloca".
 #else
-    #error Unknown or unsupported OS.
+#error Unknown or unsupported OS.
 #endif
 
-/*
-    KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration.
-    KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying
-        levels.
-    KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at
-        top-level.
-
-    Typical usage:
-
-        void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
-            // Note: Comma is missed before KMP_SRC_LOC_DECL.
-            KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) );
-            ...
-        }
-        #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
-            // Use macro instead of direct call to function -- macro passes info about current
-            // source location to the func.
+/* KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in
+   function declaration.
+   KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass
+   parameters to underlying levels.
+   KMP_SRC_LOC_CURR -- Source location arguments describing current location,
+   to be used at top-level.
+
+   Typical usage:
+   void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) {
+     // Note: Comma is missed before KMP_SRC_LOC_DECL.
+     KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) );
+     ...
+   }
+   #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR )
+   // Use macro instead of direct call to function -- macro passes info
+   // about current source location to the func.
 */
 #if KMP_DEBUG
-    #define KMP_SRC_LOC_DECL    , char const * _file_, int _line_
-    #define KMP_SRC_LOC_PARM    , _file_, _line_
-    #define KMP_SRC_LOC_CURR    , __FILE__, __LINE__
+#define KMP_SRC_LOC_DECL , char const *_file_, int _line_
+#define KMP_SRC_LOC_PARM , _file_, _line_
+#define KMP_SRC_LOC_CURR , __FILE__, __LINE__
 #else
-    #define KMP_SRC_LOC_DECL
-    #define KMP_SRC_LOC_PARM
-    #define KMP_SRC_LOC_CURR
+#define KMP_SRC_LOC_DECL
+#define KMP_SRC_LOC_PARM
+#define KMP_SRC_LOC_CURR
 #endif // KMP_DEBUG
 
-/*
-    malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra
-    arguments (source location info) in debug mode. They should be used in place of malloc() and
-    free(), this allows enabling native memory debugging capabilities (if any).
-
-    Typical usage:
-
-        ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
-            // Inside memory allocation wrapper, or
-        ptr = malloc_src_loc( size KMP_SRC_LOC_CURR );
-            // Outside of memory allocation wrapper.
-
-
+/* malloc_src_loc() and free_src_loc() are pseudo-functions (really macros)
+   with accepts extra arguments (source location info) in debug mode. They
+   should be used in place of malloc() and free(), this allows enabling native
+   memory debugging capabilities (if any).
+
+   Typical usage:
+   ptr = malloc_src_loc( size KMP_SRC_LOC_PARM );
+   // Inside memory allocation wrapper, or
+   ptr = malloc_src_loc( size KMP_SRC_LOC_CURR );
+   // Outside of memory allocation wrapper.
 */
-#define malloc_src_loc( args )    _malloc_src_loc( args )
-#define free_src_loc(   args )    _free_src_loc(   args )
-    /*
-        Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3
-        parameters, but calls to malloc_src_loc() are always the same:
-
-            ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR
-
-        Compiler issues warning/error "too few arguments in macro invocation". Declaring two
-        macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem.
-    */
+#define malloc_src_loc(args) _malloc_src_loc(args)
+#define free_src_loc(args) _free_src_loc(args)
+/* Depending on build mode (debug or release), malloc_src_loc is declared with
+   1 or 3 parameters, but calls to malloc_src_loc() are always the same:
+
+   ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR
+
+   Compiler issues warning/error "too few arguments in macro invocation".
+   Declaring two macros, malloc_src_loc() and _malloc_src_loc(), overcomes the
+   problem. */
 
 #if KMP_DEBUG
 
-    #if KMP_OS_WINDOWS && _DEBUG
-        // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined.
+#if KMP_OS_WINDOWS && _DEBUG
+// KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined.
 
-        // Windows* OS has native memory debugging capabilities. Enable them.
+// Windows* OS has native memory debugging capabilities. Enable them.
 
-        #include <crtdbg.h>
+#include <crtdbg.h>
 
-        #define KMP_MEM_BLOCK           _CLIENT_BLOCK
-        #define malloc( size )          _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
-        #define calloc( num, size )     _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
-        #define realloc( ptr, size )    _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ )
-        #define free( ptr )             _free_dbg( (ptr), KMP_MEM_BLOCK )
+#define KMP_MEM_BLOCK _CLIENT_BLOCK
+#define malloc(size) _malloc_dbg((size), KMP_MEM_BLOCK, __FILE__, __LINE__)
+#define calloc(num, size)                                                      \
+  _calloc_dbg((num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__)
+#define realloc(ptr, size)                                                     \
+  _realloc_dbg((ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__)
+#define free(ptr) _free_dbg((ptr), KMP_MEM_BLOCK)
+
+#define _malloc_src_loc(size, file, line)                                      \
+  _malloc_dbg((size), KMP_MEM_BLOCK, (file), (line))
+#define _free_src_loc(ptr, file, line) _free_dbg((ptr), KMP_MEM_BLOCK)
 
-        #define _malloc_src_loc( size, file, line )  _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) )
-        #define _free_src_loc(    ptr, file, line )  _free_dbg(   (ptr),  KMP_MEM_BLOCK                 )
-
-    #else
+#else
 
-        // Linux* OS, OS X*, or non-debug Windows* OS.
+// Linux* OS, OS X*, or non-debug Windows* OS.
 
-        #define _malloc_src_loc( size, file, line )    malloc( (size) )
-        #define _free_src_loc( ptr, file, line )       free( (ptr) )
+#define _malloc_src_loc(size, file, line) malloc((size))
+#define _free_src_loc(ptr, file, line) free((ptr))
 
-    #endif
+#endif
 
 #else
 
-    // In release build malloc_src_loc() and free_src_loc() do not have extra parameters.
-    #define _malloc_src_loc( size )    malloc( (size) )
-    #define _free_src_loc( ptr )       free( (ptr) )
+// In release build malloc_src_loc() and free_src_loc() do not have extra
+// parameters.
+#define _malloc_src_loc(size) malloc((size))
+#define _free_src_loc(ptr) free((ptr))
 
 #endif // KMP_DEBUG
 

Modified: openmp/trunk/runtime/src/ompt-event-specific.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-event-specific.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-event-specific.h (original)
+++ openmp/trunk/runtime/src/ompt-event-specific.h Fri May 12 13:01:32 2017
@@ -1,5 +1,5 @@
-#ifndef  __OMPT_EVENT_SPECIFIC_H__
-#define  __OMPT_EVENT_SPECIFIC_H__
+#ifndef __OMPT_EVENT_SPECIFIC_H__
+#define __OMPT_EVENT_SPECIFIC_H__
 
 /******************************************************************************
  * File: ompt-event-specific.h
@@ -10,10 +10,9 @@
  *   and the level of their implementation by a runtime system.
  *****************************************************************************/
 
-#define _ompt_tokenpaste_helper(x,y)        x ## y
-#define _ompt_tokenpaste(x,y)               _ompt_tokenpaste_helper(x,y)
-#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented)
-
+#define _ompt_tokenpaste_helper(x, y) x##y
+#define _ompt_tokenpaste(x, y) _ompt_tokenpaste_helper(x, y)
+#define ompt_event_implementation_status(e) _ompt_tokenpaste(e, _implemented)
 
 /*----------------------------------------------------------------------------
  | Specify whether an event may occur or not, and whether event callbacks
@@ -23,130 +22,132 @@
  | the OMPT TR. They are exposed to tools through ompt_set_callback.
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_NEVER             ompt_set_result_event_never_occurs
-#define ompt_event_UNIMPLEMENTED     ompt_set_result_event_may_occur_no_callback
-#define ompt_event_MAY_CONVENIENT    ompt_set_result_event_may_occur_callback_some
-#define ompt_event_MAY_ALWAYS        ompt_set_result_event_may_occur_callback_always
+#define ompt_event_NEVER ompt_set_result_event_never_occurs
+#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback
+#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some
+#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always
 
 #if OMPT_TRACE
-#define ompt_event_MAY_ALWAYS_TRACE   ompt_event_MAY_ALWAYS
+#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS
 #else
-#define ompt_event_MAY_ALWAYS_TRACE   ompt_event_UNIMPLEMENTED
+#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED
 #endif
 
 #if OMPT_BLAME
-#define ompt_event_MAY_ALWAYS_BLAME   ompt_event_MAY_ALWAYS
+#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS
 #else
-#define ompt_event_MAY_ALWAYS_BLAME   ompt_event_UNIMPLEMENTED
+#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED
 #endif
 
 /*----------------------------------------------------------------------------
  | Mandatory Events
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_parallel_begin_implemented           ompt_event_MAY_ALWAYS
-#define ompt_event_parallel_end_implemented             ompt_event_MAY_ALWAYS
-
-#define ompt_event_task_begin_implemented               ompt_event_MAY_ALWAYS
-#define ompt_event_task_end_implemented                 ompt_event_MAY_ALWAYS
+#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_thread_begin_implemented             ompt_event_MAY_ALWAYS
-#define ompt_event_thread_end_implemented               ompt_event_MAY_ALWAYS
+#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_control_implemented                  ompt_event_MAY_ALWAYS
+#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS
+#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS
 
-#define ompt_event_runtime_shutdown_implemented         ompt_event_MAY_ALWAYS
+#define ompt_event_control_implemented ompt_event_MAY_ALWAYS
 
+#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS
 
 /*----------------------------------------------------------------------------
  | Optional Events (blame shifting)
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_idle_begin_implemented               ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_idle_end_implemented                 ompt_event_MAY_ALWAYS_BLAME
-
-#define ompt_event_wait_barrier_begin_implemented       ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_wait_barrier_end_implemented         ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME
 
-#define ompt_event_wait_taskwait_begin_implemented      ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_taskwait_end_implemented        ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME
 
-#define ompt_event_wait_taskgroup_begin_implemented     ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_taskgroup_end_implemented       ompt_event_UNIMPLEMENTED
-
-#define ompt_event_release_lock_implemented             ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_nest_lock_last_implemented   ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_critical_implemented         ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_atomic_implemented           ompt_event_MAY_ALWAYS_BLAME
-#define ompt_event_release_ordered_implemented          ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED
 
+#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
+
+#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_nest_lock_last_implemented                          \
+  ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME
+#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME
 
 /*----------------------------------------------------------------------------
  | Optional Events (synchronous events)
  +--------------------------------------------------------------------------*/
 
-#define ompt_event_implicit_task_begin_implemented      ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_implicit_task_end_implemented        ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_initial_task_begin_implemented       ompt_event_UNIMPLEMENTED
-#define ompt_event_initial_task_end_implemented         ompt_event_UNIMPLEMENTED
+#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED
 
-#define ompt_event_task_switch_implemented              ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_loop_begin_implemented               ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_loop_end_implemented                 ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_sections_begin_implemented           ompt_event_UNIMPLEMENTED
-#define ompt_event_sections_end_implemented             ompt_event_UNIMPLEMENTED
+#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED
 
-#define ompt_event_single_in_block_begin_implemented    ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_in_block_end_implemented      ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_others_begin_implemented      ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_single_others_end_implemented        ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_workshare_begin_implemented          ompt_event_UNIMPLEMENTED
-#define ompt_event_workshare_end_implemented            ompt_event_UNIMPLEMENTED
+#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED
 
-#define ompt_event_master_begin_implemented             ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_master_end_implemented               ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_barrier_begin_implemented            ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_barrier_end_implemented              ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_taskwait_begin_implemented           ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_taskwait_end_implemented             ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_taskgroup_begin_implemented          ompt_event_UNIMPLEMENTED
-#define ompt_event_taskgroup_end_implemented            ompt_event_UNIMPLEMENTED
+#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED
 
-#define ompt_event_release_nest_lock_prev_implemented   ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_wait_lock_implemented                ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_nest_lock_implemented           ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_critical_implemented            ompt_event_UNIMPLEMENTED
-#define ompt_event_wait_atomic_implemented              ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_wait_ordered_implemented             ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_release_nest_lock_prev_implemented                          \
+  ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_acquired_lock_implemented            ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_nest_lock_next_implemented  ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_critical_implemented        ompt_event_UNIMPLEMENTED
-#define ompt_event_acquired_atomic_implemented          ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_acquired_ordered_implemented         ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_nest_lock_first_implemented                        \
+  ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_nest_lock_next_implemented                         \
+  ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_init_lock_implemented                ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_init_nest_lock_implemented           ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_destroy_lock_implemented             ompt_event_MAY_ALWAYS_TRACE
-#define ompt_event_destroy_nest_lock_implemented        ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE
 
-#define ompt_event_flush_implemented                    ompt_event_UNIMPLEMENTED
+#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED
 
 #if OMP_40_ENABLED
-# define ompt_event_task_dependences_implemented         ompt_event_MAY_ALWAYS_TRACE
-# define ompt_event_task_dependence_pair_implemented     ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE
 #else
-# define ompt_event_task_dependences_implemented         ompt_event_UNIMPLEMENTED
-# define ompt_event_task_dependence_pair_implemented     ompt_event_UNIMPLEMENTED
+#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED
+#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED
 #endif /* OMP_40_ENABLED */
 
 #endif

Modified: openmp/trunk/runtime/src/ompt-general.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-general.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-general.cpp (original)
+++ openmp/trunk/runtime/src/ompt-general.cpp Fri May 12 13:01:32 2017
@@ -9,16 +9,12 @@
 #include <stdlib.h>
 #include <string.h>
 
-
-
 /*****************************************************************************
  * ompt include files
  ****************************************************************************/
 
 #include "ompt-specific.cpp"
 
-
-
 /*****************************************************************************
  * macros
  ****************************************************************************/
@@ -34,32 +30,25 @@
 #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
 #endif
 
-
 /*****************************************************************************
  * types
  ****************************************************************************/
 
 typedef struct {
-    const char *state_name;
-    ompt_state_t  state_id;
+  const char *state_name;
+  ompt_state_t state_id;
 } ompt_state_info_t;
 
-
 enum tool_setting_e {
-    omp_tool_error,
-    omp_tool_unset,
-    omp_tool_disabled,
-    omp_tool_enabled
+  omp_tool_error,
+  omp_tool_unset,
+  omp_tool_disabled,
+  omp_tool_enabled
 };
 
-
-typedef void (*ompt_initialize_t) (
-    ompt_function_lookup_t ompt_fn_lookup,
-    const char *version,
-    unsigned int ompt_version
-);
-
-
+typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup,
+                                  const char *version,
+                                  unsigned int ompt_version);
 
 /*****************************************************************************
  * global variables
@@ -68,16 +57,14 @@ typedef void (*ompt_initialize_t) (
 int ompt_enabled = 0;
 
 ompt_state_info_t ompt_state_info[] = {
-#define ompt_state_macro(state, code) { # state, state },
+#define ompt_state_macro(state, code) {#state, state},
     FOREACH_OMPT_STATE(ompt_state_macro)
 #undef ompt_state_macro
 };
 
 ompt_callbacks_t ompt_callbacks;
 
-static ompt_initialize_t  ompt_initialize_fn = NULL;
-
-
+static ompt_initialize_t ompt_initialize_fn = NULL;
 
 /*****************************************************************************
  * forward declarations
@@ -87,7 +74,6 @@ static ompt_interface_fn_t ompt_fn_looku
 
 OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void);
 
-
 /*****************************************************************************
  * initialization and finalization (private operations)
  ****************************************************************************/
@@ -102,13 +88,11 @@ OMPT_API_ROUTINE ompt_thread_id_t ompt_g
  * NULL is returned and OMPT won't be enabled */
 #if OMPT_HAVE_WEAK_ATTRIBUTE
 _OMP_EXTERN
-__attribute__ (( weak ))
-ompt_initialize_t ompt_tool()
-{
+__attribute__((weak)) ompt_initialize_t ompt_tool() {
 #if OMPT_DEBUG
-    printf("ompt_tool() is called from the RTL\n");
+  printf("ompt_tool() is called from the RTL\n");
 #endif
-    return NULL;
+  return NULL;
 }
 
 #elif OMPT_HAVE_PSAPI
@@ -120,161 +104,154 @@ ompt_initialize_t ompt_tool()
 // The number of loaded modules to start enumeration with EnumProcessModules()
 #define NUM_MODULES 128
 
-static
-ompt_initialize_t ompt_tool_windows()
-{
-    int i;
-    DWORD needed, new_size;
-    HMODULE *modules;
-    HANDLE  process = GetCurrentProcess();
-    modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) );
-    ompt_initialize_t (*ompt_tool_p)() = NULL;
+static ompt_initialize_t ompt_tool_windows() {
+  int i;
+  DWORD needed, new_size;
+  HMODULE *modules;
+  HANDLE process = GetCurrentProcess();
+  modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE));
+  ompt_initialize_t (*ompt_tool_p)() = NULL;
 
 #if OMPT_DEBUG
-    printf("ompt_tool_windows(): looking for ompt_tool\n");
+  printf("ompt_tool_windows(): looking for ompt_tool\n");
 #endif
-    if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE),
-                              &needed)) {
-        // Regardless of the error reason use the stub initialization function
-        free(modules);
-        return NULL;
-    }
-    // Check if NUM_MODULES is enough to list all modules
-    new_size = needed / sizeof(HMODULE);
-    if (new_size > NUM_MODULES) {
+  if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE),
+                          &needed)) {
+    // Regardless of the error reason use the stub initialization function
+    free(modules);
+    return NULL;
+  }
+  // Check if NUM_MODULES is enough to list all modules
+  new_size = needed / sizeof(HMODULE);
+  if (new_size > NUM_MODULES) {
 #if OMPT_DEBUG
     printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed);
 #endif
-        modules = (HMODULE*)realloc( modules, needed );
-        // If resizing failed use the stub function.
-        if (!EnumProcessModules(process, modules, needed, &needed)) {
-            free(modules);
-            return NULL;
-        }
-    }
-    for (i = 0; i < new_size; ++i) {
-        (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
-        if (ompt_tool_p) {
+    modules = (HMODULE *)realloc(modules, needed);
+    // If resizing failed use the stub function.
+    if (!EnumProcessModules(process, modules, needed, &needed)) {
+      free(modules);
+      return NULL;
+    }
+  }
+  for (i = 0; i < new_size; ++i) {
+    (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool");
+    if (ompt_tool_p) {
 #if OMPT_DEBUG
-            TCHAR modName[MAX_PATH];
-            if (GetModuleFileName(modules[i], modName, MAX_PATH))
-                printf("ompt_tool_windows(): ompt_tool found in module %s\n",
-                       modName);
+      TCHAR modName[MAX_PATH];
+      if (GetModuleFileName(modules[i], modName, MAX_PATH))
+        printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName);
 #endif
-            free(modules);
-            return ompt_tool_p();
-        }
+      free(modules);
+      return ompt_tool_p();
+    }
 #if OMPT_DEBUG
-        else {
-            TCHAR modName[MAX_PATH];
-            if (GetModuleFileName(modules[i], modName, MAX_PATH))
-                printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
-                       modName);
-        }
-#endif
+    else {
+      TCHAR modName[MAX_PATH];
+      if (GetModuleFileName(modules[i], modName, MAX_PATH))
+        printf("ompt_tool_windows(): ompt_tool not found in module %s\n",
+               modName);
     }
-    free(modules);
-    return NULL;
+#endif
+  }
+  free(modules);
+  return NULL;
 }
 #else
-# error Either __attribute__((weak)) or psapi.dll are required for OMPT support
+#error Either __attribute__((weak)) or psapi.dll are required for OMPT support
 #endif // OMPT_HAVE_WEAK_ATTRIBUTE
 
-void ompt_pre_init()
-{
-    //--------------------------------------------------
-    // Execute the pre-initialization logic only once.
-    //--------------------------------------------------
-    static int ompt_pre_initialized = 0;
-
-    if (ompt_pre_initialized) return;
-
-    ompt_pre_initialized = 1;
-
-    //--------------------------------------------------
-    // Use a tool iff a tool is enabled and available.
-    //--------------------------------------------------
-    const char *ompt_env_var = getenv("OMP_TOOL");
-    tool_setting_e tool_setting = omp_tool_error;
-
-    if (!ompt_env_var  || !strcmp(ompt_env_var, ""))
-        tool_setting = omp_tool_unset;
-    else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
-        tool_setting = omp_tool_disabled;
-    else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
-        tool_setting = omp_tool_enabled;
+void ompt_pre_init() {
+  //--------------------------------------------------
+  // Execute the pre-initialization logic only once.
+  //--------------------------------------------------
+  static int ompt_pre_initialized = 0;
+
+  if (ompt_pre_initialized)
+    return;
+
+  ompt_pre_initialized = 1;
+
+  //--------------------------------------------------
+  // Use a tool iff a tool is enabled and available.
+  //--------------------------------------------------
+  const char *ompt_env_var = getenv("OMP_TOOL");
+  tool_setting_e tool_setting = omp_tool_error;
+
+  if (!ompt_env_var || !strcmp(ompt_env_var, ""))
+    tool_setting = omp_tool_unset;
+  else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
+    tool_setting = omp_tool_disabled;
+  else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
+    tool_setting = omp_tool_enabled;
 
 #if OMPT_DEBUG
-    printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
+  printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
 #endif
-    switch(tool_setting) {
-    case omp_tool_disabled:
-        break;
-
-    case omp_tool_unset:
-    case omp_tool_enabled:
-        ompt_initialize_fn = ompt_tool();
-        if (ompt_initialize_fn) {
-            ompt_enabled = 1;
-        }
-        break;
-
-    case omp_tool_error:
-        fprintf(stderr,
-            "Warning: OMP_TOOL has invalid value \"%s\".\n"
-            "  legal values are (NULL,\"\",\"disabled\","
-            "\"enabled\").\n", ompt_env_var);
-        break;
-    }
+  switch (tool_setting) {
+  case omp_tool_disabled:
+    break;
+
+  case omp_tool_unset:
+  case omp_tool_enabled:
+    ompt_initialize_fn = ompt_tool();
+    if (ompt_initialize_fn) {
+      ompt_enabled = 1;
+    }
+    break;
+
+  case omp_tool_error:
+    fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n"
+                    "  legal values are (NULL,\"\",\"disabled\","
+                    "\"enabled\").\n",
+            ompt_env_var);
+    break;
+  }
 #if OMPT_DEBUG
-    printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
+  printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
 #endif
 }
 
+void ompt_post_init() {
+  //--------------------------------------------------
+  // Execute the post-initialization logic only once.
+  //--------------------------------------------------
+  static int ompt_post_initialized = 0;
 
-void ompt_post_init()
-{
-    //--------------------------------------------------
-    // Execute the post-initialization logic only once.
-    //--------------------------------------------------
-    static int ompt_post_initialized = 0;
-
-    if (ompt_post_initialized) return;
-
-    ompt_post_initialized = 1;
-
-    //--------------------------------------------------
-    // Initialize the tool if so indicated.
-    //--------------------------------------------------
-    if (ompt_enabled) {
-        ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),
-                           OMPT_VERSION);
-
-        ompt_thread_t *root_thread = ompt_get_thread();
-
-        ompt_set_thread_state(root_thread, ompt_state_overhead);
-
-        if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
-            ompt_callbacks.ompt_callback(ompt_event_thread_begin)
-                (ompt_thread_initial, ompt_get_thread_id());
-        }
+  if (ompt_post_initialized)
+    return;
 
-        ompt_set_thread_state(root_thread, ompt_state_work_serial);
-    }
-}
+  ompt_post_initialized = 1;
+
+  //--------------------------------------------------
+  // Initialize the tool if so indicated.
+  //--------------------------------------------------
+  if (ompt_enabled) {
+    ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(),
+                       OMPT_VERSION);
 
+    ompt_thread_t *root_thread = ompt_get_thread();
 
-void ompt_fini()
-{
-    if (ompt_enabled) {
-        if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
-            ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
-        }
+    ompt_set_thread_state(root_thread, ompt_state_overhead);
+
+    if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
+      ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
+          ompt_thread_initial, ompt_get_thread_id());
     }
 
-    ompt_enabled = 0;
+    ompt_set_thread_state(root_thread, ompt_state_work_serial);
+  }
 }
 
+void ompt_fini() {
+  if (ompt_enabled) {
+    if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) {
+      ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)();
+    }
+  }
+
+  ompt_enabled = 0;
+}
 
 /*****************************************************************************
  * interface operations
@@ -285,148 +262,122 @@ void ompt_fini()
  ****************************************************************************/
 
 OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state,
-                                          const char **next_state_name)
-{
-    const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
-    int i = 0;
-
-    for (i = 0; i < len - 1; i++) {
-        if (ompt_state_info[i].state_id == current_state) {
-            *next_state = ompt_state_info[i+1].state_id;
-            *next_state_name = ompt_state_info[i+1].state_name;
-            return 1;
-        }
+                                          const char **next_state_name) {
+  const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
+  int i = 0;
+
+  for (i = 0; i < len - 1; i++) {
+    if (ompt_state_info[i].state_id == current_state) {
+      *next_state = ompt_state_info[i + 1].state_id;
+      *next_state_name = ompt_state_info[i + 1].state_name;
+      return 1;
     }
+  }
 
-    return 0;
+  return 0;
 }
 
-
-
 /*****************************************************************************
  * callbacks
  ****************************************************************************/
 
-OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb)
-{
-    switch (evid) {
+OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) {
+  switch (evid) {
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
-    case event_name:                                                           \
-        if (ompt_event_implementation_status(event_name)) {                    \
-            ompt_callbacks.ompt_callback(event_name) = (callback_type) cb;     \
-        }                                                                      \
-        return ompt_event_implementation_status(event_name);
+  case event_name:                                                             \
+    if (ompt_event_implementation_status(event_name)) {                        \
+      ompt_callbacks.ompt_callback(event_name) = (callback_type)cb;            \
+    }                                                                          \
+    return ompt_event_implementation_status(event_name);
 
     FOREACH_OMPT_EVENT(ompt_event_macro)
 
 #undef ompt_event_macro
 
-    default: return ompt_set_result_registration_error;
-    }
+  default:
+    return ompt_set_result_registration_error;
+  }
 }
 
-
-OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb)
-{
-    switch (evid) {
+OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) {
+  switch (evid) {
 
 #define ompt_event_macro(event_name, callback_type, event_id)                  \
-    case event_name:                                                           \
-        if (ompt_event_implementation_status(event_name)) {                    \
-            ompt_callback_t mycb =                                             \
-                (ompt_callback_t) ompt_callbacks.ompt_callback(event_name);    \
-            if (mycb) {                                                        \
-                *cb = mycb;                                                    \
-                return ompt_get_callback_success;                              \
-            }                                                                  \
-        }                                                                      \
-        return ompt_get_callback_failure;
+  case event_name:                                                             \
+    if (ompt_event_implementation_status(event_name)) {                        \
+      ompt_callback_t mycb =                                                   \
+          (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);           \
+      if (mycb) {                                                              \
+        *cb = mycb;                                                            \
+        return ompt_get_callback_success;                                      \
+      }                                                                        \
+    }                                                                          \
+    return ompt_get_callback_failure;
 
     FOREACH_OMPT_EVENT(ompt_event_macro)
 
 #undef ompt_event_macro
 
-    default: return ompt_get_callback_failure;
-    }
+  default:
+    return ompt_get_callback_failure;
+  }
 }
 
-
 /*****************************************************************************
  * parallel regions
  ****************************************************************************/
 
-OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level)
-{
-    return __ompt_get_parallel_id_internal(ancestor_level);
+OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) {
+  return __ompt_get_parallel_id_internal(ancestor_level);
 }
 
-
-OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level)
-{
-    return __ompt_get_parallel_team_size_internal(ancestor_level);
+OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) {
+  return __ompt_get_parallel_team_size_internal(ancestor_level);
 }
 
-
-OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level)
-{
-    return __ompt_get_parallel_function_internal(ancestor_level);
+OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) {
+  return __ompt_get_parallel_function_internal(ancestor_level);
 }
 
+OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) {
+  ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
 
-OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id)
-{
-    ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id);
-
-    if (thread_state == ompt_state_undefined) {
-        thread_state = ompt_state_work_serial;
-    }
+  if (thread_state == ompt_state_undefined) {
+    thread_state = ompt_state_work_serial;
+  }
 
-    return thread_state;
+  return thread_state;
 }
 
-
-
 /*****************************************************************************
  * threads
  ****************************************************************************/
 
-
-OMPT_API_ROUTINE void *ompt_get_idle_frame()
-{
-    return __ompt_get_idle_frame_internal();
+OMPT_API_ROUTINE void *ompt_get_idle_frame() {
+  return __ompt_get_idle_frame_internal();
 }
 
-
-
 /*****************************************************************************
  * tasks
  ****************************************************************************/
 
-
-OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void)
-{
-    return __ompt_get_thread_id_internal();
+OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) {
+  return __ompt_get_thread_id_internal();
 }
 
-OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth)
-{
-    return __ompt_get_task_id_internal(depth);
+OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) {
+  return __ompt_get_task_id_internal(depth);
 }
 
-
-OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth)
-{
-    return __ompt_get_task_frame_internal(depth);
+OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) {
+  return __ompt_get_task_frame_internal(depth);
 }
 
-
-OMPT_API_ROUTINE void *ompt_get_task_function(int depth)
-{
-    return __ompt_get_task_function_internal(depth);
+OMPT_API_ROUTINE void *ompt_get_task_function(int depth) {
+  return __ompt_get_task_function_internal(depth);
 }
 
-
 /*****************************************************************************
  * placeholders
  ****************************************************************************/
@@ -440,96 +391,76 @@ OMPT_API_ROUTINE void *ompt_get_task_fun
 extern "C" {
 #endif
 
-
-OMPT_API_PLACEHOLDER void ompt_idle(void)
-{
-    // This function is a placeholder used to represent the calling context of
-    // idle OpenMP worker threads. It is not meant to be invoked.
-    assert(0);
+OMPT_API_PLACEHOLDER void ompt_idle(void) {
+  // This function is a placeholder used to represent the calling context of
+  // idle OpenMP worker threads. It is not meant to be invoked.
+  assert(0);
 }
 
-
-OMPT_API_PLACEHOLDER void ompt_overhead(void)
-{
-    // This function is a placeholder used to represent the OpenMP context of
-    // threads working in the OpenMP runtime.  It is not meant to be invoked.
-    assert(0);
+OMPT_API_PLACEHOLDER void ompt_overhead(void) {
+  // This function is a placeholder used to represent the OpenMP context of
+  // threads working in the OpenMP runtime.  It is not meant to be invoked.
+  assert(0);
 }
 
-
-OMPT_API_PLACEHOLDER void ompt_barrier_wait(void)
-{
-    // This function is a placeholder used to represent the OpenMP context of
-    // threads waiting for a barrier in the OpenMP runtime. It is not meant
-    // to be invoked.
-    assert(0);
+OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) {
+  // This function is a placeholder used to represent the OpenMP context of
+  // threads waiting for a barrier in the OpenMP runtime. It is not meant
+  // to be invoked.
+  assert(0);
 }
 
-
-OMPT_API_PLACEHOLDER void ompt_task_wait(void)
-{
-    // This function is a placeholder used to represent the OpenMP context of
-    // threads waiting for a task in the OpenMP runtime. It is not meant
-    // to be invoked.
-    assert(0);
+OMPT_API_PLACEHOLDER void ompt_task_wait(void) {
+  // This function is a placeholder used to represent the OpenMP context of
+  // threads waiting for a task in the OpenMP runtime. It is not meant
+  // to be invoked.
+  assert(0);
 }
 
-
-OMPT_API_PLACEHOLDER void ompt_mutex_wait(void)
-{
-    // This function is a placeholder used to represent the OpenMP context of
-    // threads waiting for a mutex in the OpenMP runtime. It is not meant
-    // to be invoked.
-    assert(0);
+OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) {
+  // This function is a placeholder used to represent the OpenMP context of
+  // threads waiting for a mutex in the OpenMP runtime. It is not meant
+  // to be invoked.
+  assert(0);
 }
 
 #ifdef __cplusplus
 };
 #endif
 
-
 /*****************************************************************************
  * compatability
  ****************************************************************************/
 
-OMPT_API_ROUTINE int ompt_get_ompt_version()
-{
-    return OMPT_VERSION;
-}
-
-
+OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; }
 
 /*****************************************************************************
  * application-facing API
  ****************************************************************************/
 
-
 /*----------------------------------------------------------------------------
  | control
  ---------------------------------------------------------------------------*/
 
-_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier)
-{
-    if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
-        ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
-    }
+_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) {
+  if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) {
+    ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier);
+  }
 }
 
-
-
 /*****************************************************************************
  * API inquiry for tool
  ****************************************************************************/
 
-static ompt_interface_fn_t ompt_fn_lookup(const char *s)
-{
+static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
 
-#define ompt_interface_fn(fn) \
-    if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn;
+#define ompt_interface_fn(fn)                                                  \
+  if (strcmp(s, #fn) == 0)                                                     \
+    return (ompt_interface_fn_t)fn;
 
-    FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
+  FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
 
-    FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn)
+  FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn)
 
-    return (ompt_interface_fn_t) 0;
+  return (ompt_interface_fn_t)0;
 }

Modified: openmp/trunk/runtime/src/ompt-internal.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-internal.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-internal.h (original)
+++ openmp/trunk/runtime/src/ompt-internal.h Fri May 12 13:01:32 2017
@@ -1,79 +1,71 @@
 #ifndef __OMPT_INTERNAL_H__
 #define __OMPT_INTERNAL_H__
 
-#include "ompt.h"
 #include "ompt-event-specific.h"
+#include "ompt.h"
 
 #define OMPT_VERSION 1
 
 #define _OMP_EXTERN extern "C"
 
-#define OMPT_INVOKER(x) \
+#define OMPT_INVOKER(x)                                                        \
   ((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime)
 
-
-#define ompt_callback(e) e ## _callback
-
+#define ompt_callback(e) e##_callback
 
 typedef struct ompt_callbacks_s {
-#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event);
+#define ompt_event_macro(event, callback, eventid)                             \
+  callback ompt_callback(event);
 
-    FOREACH_OMPT_EVENT(ompt_event_macro)
+  FOREACH_OMPT_EVENT(ompt_event_macro)
 
 #undef ompt_event_macro
 } ompt_callbacks_t;
 
-
-
 typedef struct {
-    ompt_frame_t            frame;
-    void*                   function;
-    ompt_task_id_t          task_id;
+  ompt_frame_t frame;
+  void *function;
+  ompt_task_id_t task_id;
 #if OMP_40_ENABLED
-    int                     ndeps;
-    ompt_task_dependence_t  *deps;
+  int ndeps;
+  ompt_task_dependence_t *deps;
 #endif /* OMP_40_ENABLED */
 } ompt_task_info_t;
 
-
 typedef struct {
-    ompt_parallel_id_t  parallel_id;
-    void                *microtask;
+  ompt_parallel_id_t parallel_id;
+  void *microtask;
 } ompt_team_info_t;
 
-
 typedef struct ompt_lw_taskteam_s {
-    ompt_team_info_t    ompt_team_info;
-    ompt_task_info_t    ompt_task_info;
-    struct ompt_lw_taskteam_s *parent;
+  ompt_team_info_t ompt_team_info;
+  ompt_task_info_t ompt_task_info;
+  struct ompt_lw_taskteam_s *parent;
 } ompt_lw_taskteam_t;
 
-
 typedef struct ompt_parallel_info_s {
-    ompt_task_id_t parent_task_id;    /* id of parent task            */
-    ompt_parallel_id_t parallel_id;   /* id of parallel region        */
-    ompt_frame_t *parent_task_frame;  /* frame data of parent task    */
-    void *parallel_function;          /* pointer to outlined function */
+  ompt_task_id_t parent_task_id; /* id of parent task            */
+  ompt_parallel_id_t parallel_id; /* id of parallel region        */
+  ompt_frame_t *parent_task_frame; /* frame data of parent task    */
+  void *parallel_function; /* pointer to outlined function */
 } ompt_parallel_info_t;
 
-
 typedef struct {
-    ompt_state_t        state;
-    ompt_wait_id_t      wait_id;
-    void                *idle_frame;
+  ompt_state_t state;
+  ompt_wait_id_t wait_id;
+  void *idle_frame;
 } ompt_thread_info_t;
 
-
 extern ompt_callbacks_t ompt_callbacks;
 
 #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE
 #if USE_FAST_MEMORY
-#  define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate
-#  define KMP_OMPT_DEPS_FREE __kmp_fast_free
-# else
-#  define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc
-#  define KMP_OMPT_DEPS_FREE __kmp_thread_free
-# endif
+#define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate
+#define KMP_OMPT_DEPS_FREE __kmp_fast_free
+#else
+#define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc
+#define KMP_OMPT_DEPS_FREE __kmp_thread_free
+#endif
 #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */
 
 #ifdef __cplusplus

Modified: openmp/trunk/runtime/src/ompt-specific.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-specific.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-specific.cpp (original)
+++ openmp/trunk/runtime/src/ompt-specific.cpp Fri May 12 13:01:32 2017
@@ -10,7 +10,7 @@
 // macros
 //******************************************************************************
 
-#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0)
+#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0)
 
 #define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info;
 
@@ -26,10 +26,10 @@
 // when using fetch_and_add to generate the IDs, there isn't any reason to waste
 // bits for thread id.
 #if 0
-#define NEXT_ID(id_ptr,tid) \
+#define NEXT_ID(id_ptr, tid)                                                   \
   ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid))
 #else
-#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
+#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr))
 #endif
 
 //******************************************************************************
@@ -43,89 +43,87 @@
 //       kept consistent
 //----------------------------------------------------------
 
-ompt_team_info_t *
-__ompt_get_teaminfo(int depth, int *size)
-{
-    kmp_info_t *thr = ompt_get_thread();
-
-    if (thr) {
-        kmp_team *team = thr->th.th_team;
-        if (team == NULL) return NULL;
-
-        ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
-
-        while(depth > 0) {
-            // next lightweight team (if any)
-            if (lwt) lwt = lwt->parent;
-
-            // next heavyweight team (if any) after
-            // lightweight teams are exhausted
-            if (!lwt && team) {
-                team=team->t.t_parent;
-                if (team) {
-                    lwt = LWT_FROM_TEAM(team);
-                }
-            }
+ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) {
+  kmp_info_t *thr = ompt_get_thread();
 
-            depth--;
+  if (thr) {
+    kmp_team *team = thr->th.th_team;
+    if (team == NULL)
+      return NULL;
+
+    ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team);
+
+    while (depth > 0) {
+      // next lightweight team (if any)
+      if (lwt)
+        lwt = lwt->parent;
+
+      // next heavyweight team (if any) after
+      // lightweight teams are exhausted
+      if (!lwt && team) {
+        team = team->t.t_parent;
+        if (team) {
+          lwt = LWT_FROM_TEAM(team);
         }
+      }
 
-        if (lwt) {
-            // lightweight teams have one task
-            if (size) *size = 1;
-
-            // return team info for lightweight team
-            return &lwt->ompt_team_info;
-        } else if (team) {
-            // extract size from heavyweight team
-            if (size) *size = team->t.t_nproc;
+      depth--;
+    }
 
-            // return team info for heavyweight team
-            return &team->t.ompt_team_info;
-        }
+    if (lwt) {
+      // lightweight teams have one task
+      if (size)
+        *size = 1;
+
+      // return team info for lightweight team
+      return &lwt->ompt_team_info;
+    } else if (team) {
+      // extract size from heavyweight team
+      if (size)
+        *size = team->t.t_nproc;
+
+      // return team info for heavyweight team
+      return &team->t.ompt_team_info;
     }
+  }
 
-    return NULL;
+  return NULL;
 }
 
-
-ompt_task_info_t *
-__ompt_get_taskinfo(int depth)
-{
-    ompt_task_info_t *info = NULL;
-    kmp_info_t *thr = ompt_get_thread();
-
-    if (thr) {
-        kmp_taskdata_t  *taskdata = thr->th.th_current_task;
-        ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
-
-        while (depth > 0) {
-            // next lightweight team (if any)
-            if (lwt) lwt = lwt->parent;
-
-            // next heavyweight team (if any) after
-            // lightweight teams are exhausted
-            if (!lwt && taskdata) {
-                taskdata = taskdata->td_parent;
-                if (taskdata) {
-                    lwt = LWT_FROM_TEAM(taskdata->td_team);
-                }
-            }
-            depth--;
+ompt_task_info_t *__ompt_get_taskinfo(int depth) {
+  ompt_task_info_t *info = NULL;
+  kmp_info_t *thr = ompt_get_thread();
+
+  if (thr) {
+    kmp_taskdata_t *taskdata = thr->th.th_current_task;
+    ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team);
+
+    while (depth > 0) {
+      // next lightweight team (if any)
+      if (lwt)
+        lwt = lwt->parent;
+
+      // next heavyweight team (if any) after
+      // lightweight teams are exhausted
+      if (!lwt && taskdata) {
+        taskdata = taskdata->td_parent;
+        if (taskdata) {
+          lwt = LWT_FROM_TEAM(taskdata->td_team);
         }
+      }
+      depth--;
+    }
 
-        if (lwt) {
-            info = &lwt->ompt_task_info;
-        } else if (taskdata) {
-            info = &taskdata->ompt_task_info;
-        }
+    if (lwt) {
+      info = &lwt->ompt_task_info;
+    } else if (taskdata) {
+      info = &taskdata->ompt_task_info;
     }
+  }
 
-    return info;
+  return info;
 }
 
-
-
 //******************************************************************************
 // interface operations
 //******************************************************************************
@@ -134,204 +132,151 @@ __ompt_get_taskinfo(int depth)
 // thread support
 //----------------------------------------------------------
 
-ompt_parallel_id_t
-__ompt_thread_id_new()
-{
-    static uint64_t ompt_thread_id = 1;
-    return NEXT_ID(&ompt_thread_id, 0);
+ompt_parallel_id_t __ompt_thread_id_new() {
+  static uint64_t ompt_thread_id = 1;
+  return NEXT_ID(&ompt_thread_id, 0);
 }
 
-void
-__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid)
-{
-    ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
-        thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
+void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) {
+  ompt_callbacks.ompt_callback(ompt_event_thread_begin)(
+      thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
 }
 
-
-void
-__ompt_thread_end(ompt_thread_type_t thread_type, int gtid)
-{
-    ompt_callbacks.ompt_callback(ompt_event_thread_end)(
-        thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
+void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) {
+  ompt_callbacks.ompt_callback(ompt_event_thread_end)(
+      thread_type, GTID_TO_OMPT_THREAD_ID(gtid));
 }
 
+ompt_thread_id_t __ompt_get_thread_id_internal() {
+  // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid
+  // since the return value might be negative, we need to test that before
+  // assigning it to an ompt_thread_id_t, which is unsigned.
+  int id = __kmp_get_gtid();
+  assert(id >= 0);
 
-ompt_thread_id_t
-__ompt_get_thread_id_internal()
-{
-    // FIXME
-    // until we have a better way of assigning ids, use __kmp_get_gtid
-    // since the return value might be negative, we need to test that before
-    // assigning it to an ompt_thread_id_t, which is unsigned.
-    int id = __kmp_get_gtid();
-    assert(id >= 0);
-
-    return GTID_TO_OMPT_THREAD_ID(id);
+  return GTID_TO_OMPT_THREAD_ID(id);
 }
 
 //----------------------------------------------------------
 // state support
 //----------------------------------------------------------
 
-void
-__ompt_thread_assign_wait_id(void *variable)
-{
-    int gtid = __kmp_gtid_get_specific();
-    kmp_info_t *ti = ompt_get_thread_gtid(gtid);
-
-    ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable;
-}
-
-ompt_state_t
-__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id)
-{
-    kmp_info_t *ti = ompt_get_thread();
-
-    if (ti) {
-        if (ompt_wait_id)
-            *ompt_wait_id = ti->th.ompt_thread_info.wait_id;
-        return ti->th.ompt_thread_info.state;
-    }
-    return ompt_state_undefined;
+void __ompt_thread_assign_wait_id(void *variable) {
+  int gtid = __kmp_gtid_get_specific();
+  kmp_info_t *ti = ompt_get_thread_gtid(gtid);
+
+  ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
+}
+
+ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) {
+  kmp_info_t *ti = ompt_get_thread();
+
+  if (ti) {
+    if (ompt_wait_id)
+      *ompt_wait_id = ti->th.ompt_thread_info.wait_id;
+    return ti->th.ompt_thread_info.state;
+  }
+  return ompt_state_undefined;
 }
 
 //----------------------------------------------------------
 // idle frame support
 //----------------------------------------------------------
 
-void *
-__ompt_get_idle_frame_internal(void)
-{
-    kmp_info_t *ti = ompt_get_thread();
-    return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
+void *__ompt_get_idle_frame_internal(void) {
+  kmp_info_t *ti = ompt_get_thread();
+  return ti ? ti->th.ompt_thread_info.idle_frame : NULL;
 }
 
-
 //----------------------------------------------------------
 // parallel region support
 //----------------------------------------------------------
 
-ompt_parallel_id_t
-__ompt_parallel_id_new(int gtid)
-{
-    static uint64_t ompt_parallel_id = 1;
-    return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
-}
-
-
-void *
-__ompt_get_parallel_function_internal(int depth)
-{
-    ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
-    void *function = info ? info->microtask : NULL;
-    return function;
+ompt_parallel_id_t __ompt_parallel_id_new(int gtid) {
+  static uint64_t ompt_parallel_id = 1;
+  return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0;
+}
+
+void *__ompt_get_parallel_function_internal(int depth) {
+  ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
+  void *function = info ? info->microtask : NULL;
+  return function;
+}
+
+ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) {
+  ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
+  ompt_parallel_id_t id = info ? info->parallel_id : 0;
+  return id;
+}
+
+int __ompt_get_parallel_team_size_internal(int depth) {
+  // initialize the return value with the error value.
+  // if there is a team at the specified depth, the default
+  // value will be overwritten the size of that team.
+  int size = -1;
+  (void)__ompt_get_teaminfo(depth, &size);
+  return size;
 }
 
-
-ompt_parallel_id_t
-__ompt_get_parallel_id_internal(int depth)
-{
-    ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL);
-    ompt_parallel_id_t id = info ? info->parallel_id : 0;
-    return id;
-}
-
-
-int
-__ompt_get_parallel_team_size_internal(int depth)
-{
-    // initialize the return value with the error value.
-    // if there is a team at the specified depth, the default
-    // value will be overwritten the size of that team.
-    int size = -1;
-    (void) __ompt_get_teaminfo(depth, &size);
-    return size;
-}
-
-
 //----------------------------------------------------------
 // lightweight task team support
 //----------------------------------------------------------
 
-void
-__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
-                        int gtid, void *microtask,
-                        ompt_parallel_id_t ompt_pid)
-{
-    lwt->ompt_team_info.parallel_id = ompt_pid;
-    lwt->ompt_team_info.microtask = microtask;
-    lwt->ompt_task_info.task_id = 0;
-    lwt->ompt_task_info.frame.reenter_runtime_frame = NULL;
-    lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
-    lwt->ompt_task_info.function = NULL;
-    lwt->parent = 0;
-}
-
-
-void
-__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt,  kmp_info_t *thr)
-{
-    ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
-    lwt->parent = my_parent;
-    thr->th.th_team->t.ompt_serialized_team_info = lwt;
+void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
+                             void *microtask, ompt_parallel_id_t ompt_pid) {
+  lwt->ompt_team_info.parallel_id = ompt_pid;
+  lwt->ompt_team_info.microtask = microtask;
+  lwt->ompt_task_info.task_id = 0;
+  lwt->ompt_task_info.frame.reenter_runtime_frame = NULL;
+  lwt->ompt_task_info.frame.exit_runtime_frame = NULL;
+  lwt->ompt_task_info.function = NULL;
+  lwt->parent = 0;
+}
+
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) {
+  ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info;
+  lwt->parent = my_parent;
+  thr->th.th_team->t.ompt_serialized_team_info = lwt;
+}
+
+ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) {
+  ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
+  if (lwtask)
+    thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
+  return lwtask;
 }
 
-
-ompt_lw_taskteam_t *
-__ompt_lw_taskteam_unlink(kmp_info_t *thr)
-{
-    ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
-    if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
-    return lwtask;
-}
-
-
 //----------------------------------------------------------
 // task support
 //----------------------------------------------------------
 
-ompt_task_id_t
-__ompt_task_id_new(int gtid)
-{
-    static uint64_t ompt_task_id = 1;
-    return NEXT_ID(&ompt_task_id, gtid);
+ompt_task_id_t __ompt_task_id_new(int gtid) {
+  static uint64_t ompt_task_id = 1;
+  return NEXT_ID(&ompt_task_id, gtid);
 }
 
-
-ompt_task_id_t
-__ompt_get_task_id_internal(int depth)
-{
-    ompt_task_info_t *info = __ompt_get_taskinfo(depth);
-    ompt_task_id_t task_id = info ?  info->task_id : 0;
-    return task_id;
+ompt_task_id_t __ompt_get_task_id_internal(int depth) {
+  ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+  ompt_task_id_t task_id = info ? info->task_id : 0;
+  return task_id;
 }
 
-
-void *
-__ompt_get_task_function_internal(int depth)
-{
-    ompt_task_info_t *info = __ompt_get_taskinfo(depth);
-    void *function = info ? info->function : NULL;
-    return function;
+void *__ompt_get_task_function_internal(int depth) {
+  ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+  void *function = info ? info->function : NULL;
+  return function;
 }
 
-
-ompt_frame_t *
-__ompt_get_task_frame_internal(int depth)
-{
-    ompt_task_info_t *info = __ompt_get_taskinfo(depth);
-    ompt_frame_t *frame = info ? frame = &info->frame : NULL;
-    return frame;
+ompt_frame_t *__ompt_get_task_frame_internal(int depth) {
+  ompt_task_info_t *info = __ompt_get_taskinfo(depth);
+  ompt_frame_t *frame = info ? frame = &info->frame : NULL;
+  return frame;
 }
 
-
 //----------------------------------------------------------
 // team support
 //----------------------------------------------------------
 
-void
-__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid)
-{
-    team->t.ompt_team_info.parallel_id = ompt_pid;
+void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) {
+  team->t.ompt_team_info.parallel_id = ompt_pid;
 }

Modified: openmp/trunk/runtime/src/ompt-specific.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/ompt-specific.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/ompt-specific.h (original)
+++ openmp/trunk/runtime/src/ompt-specific.h Fri May 12 13:01:32 2017
@@ -9,8 +9,6 @@
 
 typedef kmp_info_t ompt_thread_t;
 
-
-
 /*****************************************************************************
  * forward declarations
  ****************************************************************************/
@@ -22,9 +20,9 @@ void __ompt_lw_taskteam_init(ompt_lw_tas
                              int gtid, void *microtask,
                              ompt_parallel_id_t ompt_pid);
 
-void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt,  ompt_thread_t *thr);
+void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr);
 
-ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr);
+ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr);
 
 ompt_parallel_id_t __ompt_parallel_id_new(int gtid);
 ompt_task_id_t __ompt_task_id_new(int gtid);
@@ -43,8 +41,6 @@ ompt_task_id_t __ompt_get_task_id_intern
 
 ompt_frame_t *__ompt_get_task_frame_internal(int depth);
 
-
-
 /*****************************************************************************
  * macros
  ****************************************************************************/
@@ -53,38 +49,25 @@ ompt_frame_t *__ompt_get_task_frame_inte
 #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI
 #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle)
 
-
-
 //******************************************************************************
 // inline functions
 //******************************************************************************
 
-inline ompt_thread_t *
-ompt_get_thread_gtid(int gtid)
-{
-    return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL;
+inline ompt_thread_t *ompt_get_thread_gtid(int gtid) {
+  return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL;
 }
 
-
-inline ompt_thread_t *
-ompt_get_thread()
-{
-    int gtid = __kmp_get_gtid();
-    return ompt_get_thread_gtid(gtid);
+inline ompt_thread_t *ompt_get_thread() {
+  int gtid = __kmp_get_gtid();
+  return ompt_get_thread_gtid(gtid);
 }
 
-
-inline void
-ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state)
-{
-    thread->th.ompt_thread_info.state = state;
+inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) {
+  thread->th.ompt_thread_info.state = state;
 }
 
-
-inline const char *
-ompt_get_runtime_version()
-{
-    return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
+inline const char *ompt_get_runtime_version() {
+  return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN];
 }
 
 #endif

Modified: openmp/trunk/runtime/src/tsan_annotations.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/tsan_annotations.cpp?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/tsan_annotations.cpp (original)
+++ openmp/trunk/runtime/src/tsan_annotations.cpp Fri May 12 13:01:32 2017
@@ -3,7 +3,6 @@
  * race detection in OpenMP programs.
  */
 
-
 //===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -18,46 +17,92 @@
 #include <stdio.h>
 
 typedef unsigned long uptr;
-typedef signed   long sptr;
+typedef signed long sptr;
 
-extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f, int l, uptr cv) {}
-extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f, int l, uptr cv) {}
-extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f, int l, uptr mu) {}
-extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l, uptr cv, uptr lock) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l, uptr m) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f, int l, uptr m) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {}
-extern "C" __attribute__((weak)) void AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {}
-extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l, uptr mem) {}
-extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size) {}
-extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l, uptr mem) {}
-extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateEnableRaceDetection( const char *f, int l, int enable) {}
-extern "C" __attribute__((weak)) void AnnotateMutexIsUsedAsCondVar( const char *f, int l, uptr mu) {}
-extern "C" __attribute__((weak)) void AnnotatePCQGet( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotatePCQPut( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotatePCQDestroy( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotatePCQCreate( const char *f, int l, uptr pcq) {}
-extern "C" __attribute__((weak)) void AnnotateExpectRace( const char *f, int l, uptr mem, char *desc) {}
-extern "C" __attribute__((weak)) void AnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr size, char *desc) {}
-extern "C" __attribute__((weak)) void AnnotateBenignRace( const char *f, int l, uptr mem, char *desc) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f, int l) {}
-extern "C" __attribute__((weak)) void AnnotatePublishMemoryRange( const char *f, int l, uptr addr, uptr size) {}
-extern "C" __attribute__((weak)) void AnnotateUnpublishMemoryRange( const char *f, int l, uptr addr, uptr size) {}
-extern "C" __attribute__((weak)) void AnnotateThreadName( const char *f, int l, char *name) {}
-extern "C" __attribute__((weak)) void WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {}
-extern "C" __attribute__((weak)) void WTFAnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr sz, char *desc) {}
-extern "C" __attribute__((weak)) int RunningOnValgrind() {return 0;}
-extern "C" __attribute__((weak)) double ValgrindSlowdown(void) {return 0;}
-extern "C" __attribute__((weak)) const char __attribute__((weak))* ThreadSanitizerQuery(const char *query) {return 0;}
-extern "C" __attribute__((weak)) void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {}
+extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f,
+                                                            int l, uptr addr) {}
+extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l,
+                                                           uptr addr) {}
+extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f,
+                                                            int l, uptr cv) {}
+extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f,
+                                                               int l, uptr cv) {
+}
+extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f,
+                                                            int l, uptr mu) {}
+extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l,
+                                                          uptr cv, uptr lock) {}
+extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l,
+                                                           uptr m) {}
+extern "C" __attribute__((weak)) void
+AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {}
+extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f,
+                                                            int l, uptr m) {}
+extern "C" __attribute__((weak)) void
+AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {}
+extern "C" __attribute__((weak)) void
+AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {}
+extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l,
+                                                          uptr mem) {}
+extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) {
+}
+extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l,
+                                                        uptr mem, uptr size) {}
+extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l,
+                                                   uptr mem) {}
+extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f,
+                                                                 int l) {}
+extern "C" __attribute__((weak)) void
+AnnotateEnableRaceDetection(const char *f, int l, int enable) {}
+extern "C" __attribute__((weak)) void
+AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu) {}
+extern "C" __attribute__((weak)) void AnnotatePCQGet(const char *f, int l,
+                                                     uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotatePCQPut(const char *f, int l,
+                                                     uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotatePCQDestroy(const char *f, int l,
+                                                         uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotatePCQCreate(const char *f, int l,
+                                                        uptr pcq) {}
+extern "C" __attribute__((weak)) void AnnotateExpectRace(const char *f, int l,
+                                                         uptr mem, char *desc) {
+}
+extern "C" __attribute__((weak)) void
+AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size, char *desc) {
+}
+extern "C" __attribute__((weak)) void AnnotateBenignRace(const char *f, int l,
+                                                         uptr mem, char *desc) {
+}
+extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f,
+                                                               int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f,
+                                                             int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f,
+                                                                int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f,
+                                                              int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f,
+                                                              int l) {}
+extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f,
+                                                            int l) {}
+extern "C" __attribute__((weak)) void
+AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size) {}
+extern "C" __attribute__((weak)) void
+AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size) {}
+extern "C" __attribute__((weak)) void AnnotateThreadName(const char *f, int l,
+                                                         char *name) {}
+extern "C" __attribute__((weak)) void
+WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {}
+extern "C" __attribute__((weak)) void
+WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {}
+extern "C" __attribute__((weak)) void
+WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz,
+                           char *desc) {}
+extern "C" __attribute__((weak)) int RunningOnValgrind() { return 0; }
+extern "C" __attribute__((weak)) double ValgrindSlowdown(void) { return 0; }
+extern "C" __attribute__((weak)) const char __attribute__((weak)) *
+    ThreadSanitizerQuery(const char *query) {
+  return 0;
+}
+extern "C" __attribute__((weak)) void
+AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {}

Modified: openmp/trunk/runtime/src/tsan_annotations.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/tsan_annotations.h?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/tsan_annotations.h (original)
+++ openmp/trunk/runtime/src/tsan_annotations.h Fri May 12 13:01:32 2017
@@ -4,7 +4,6 @@
  * race detection in OpenMP programs.
  */
 
-
 //===----------------------------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -21,7 +20,7 @@
 
 /* types as used in tsan/rtl/tsan_interface_ann.cc */
 typedef unsigned long uptr;
-typedef signed   long sptr;
+typedef signed long sptr;
 
 #ifdef __cplusplus
 extern "C" {
@@ -44,30 +43,32 @@ void AnnotateFlushState(const char *f, i
 void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size);
 void AnnotateNoOp(const char *f, int l, uptr mem);
 void AnnotateFlushExpectedRaces(const char *f, int l);
-void AnnotateEnableRaceDetection( const char *f, int l, int enable);
-void AnnotateMutexIsUsedAsCondVar( const char *f, int l, uptr mu);
-void AnnotatePCQGet( const char *f, int l, uptr pcq);
-void AnnotatePCQPut( const char *f, int l, uptr pcq);
-void AnnotatePCQDestroy( const char *f, int l, uptr pcq);
-void AnnotatePCQCreate( const char *f, int l, uptr pcq);
-void AnnotateExpectRace( const char *f, int l, uptr mem, char *desc);
-void AnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr size, char *desc);
-void AnnotateBenignRace( const char *f, int l, uptr mem, char *desc);
+void AnnotateEnableRaceDetection(const char *f, int l, int enable);
+void AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu);
+void AnnotatePCQGet(const char *f, int l, uptr pcq);
+void AnnotatePCQPut(const char *f, int l, uptr pcq);
+void AnnotatePCQDestroy(const char *f, int l, uptr pcq);
+void AnnotatePCQCreate(const char *f, int l, uptr pcq);
+void AnnotateExpectRace(const char *f, int l, uptr mem, char *desc);
+void AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size,
+                             char *desc);
+void AnnotateBenignRace(const char *f, int l, uptr mem, char *desc);
 void AnnotateIgnoreReadsBegin(const char *f, int l);
 void AnnotateIgnoreReadsEnd(const char *f, int l);
 void AnnotateIgnoreWritesBegin(const char *f, int l);
 void AnnotateIgnoreWritesEnd(const char *f, int l);
 void AnnotateIgnoreSyncBegin(const char *f, int l);
 void AnnotateIgnoreSyncEnd(const char *f, int l);
-void AnnotatePublishMemoryRange( const char *f, int l, uptr addr, uptr size);
-void AnnotateUnpublishMemoryRange( const char *f, int l, uptr addr, uptr size);
-void AnnotateThreadName( const char *f, int l, char *name);
+void AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size);
+void AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size);
+void AnnotateThreadName(const char *f, int l, char *name);
 void WTFAnnotateHappensBefore(const char *f, int l, uptr addr);
 void WTFAnnotateHappensAfter(const char *f, int l, uptr addr);
-void WTFAnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr sz, char *desc);
+void WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz,
+                                char *desc);
 int RunningOnValgrind();
 double ValgrindSlowdown(void);
-const char * ThreadSanitizerQuery(const char *query);
+const char *ThreadSanitizerQuery(const char *query);
 void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz);
 
 #ifdef __cplusplus
@@ -75,17 +76,27 @@ void AnnotateMemoryIsInitialized(const c
 #endif
 
 #ifdef TSAN_SUPPORT
-#define ANNOTATE_HAPPENS_AFTER(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_HAPPENS_BEFORE(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_IGNORE_WRITES_BEGIN() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+#define ANNOTATE_HAPPENS_AFTER(addr)                                           \
+  AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_HAPPENS_BEFORE(addr)                                          \
+  AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_IGNORE_WRITES_BEGIN()                                         \
+  AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
 #define ANNOTATE_IGNORE_WRITES_END() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-#define ANNOTATE_RWLOCK_CREATE(lck) AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck)
-#define ANNOTATE_RWLOCK_RELEASED(lck) AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1)
-#define ANNOTATE_RWLOCK_ACQUIRED(lck) AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1)
-#define ANNOTATE_BARRIER_BEGIN(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_BARRIER_END(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_REDUCE_AFTER(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
-#define ANNOTATE_REDUCE_BEFORE(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_RWLOCK_CREATE(lck)                                            \
+  AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck)
+#define ANNOTATE_RWLOCK_RELEASED(lck)                                          \
+  AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1)
+#define ANNOTATE_RWLOCK_ACQUIRED(lck)                                          \
+  AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1)
+#define ANNOTATE_BARRIER_BEGIN(addr)                                           \
+  AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_BARRIER_END(addr)                                             \
+  AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_REDUCE_AFTER(addr)                                            \
+  AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr)
+#define ANNOTATE_REDUCE_BEFORE(addr)                                           \
+  AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr)
 #else
 #define ANNOTATE_HAPPENS_AFTER(addr)
 #define ANNOTATE_HAPPENS_BEFORE(addr)

Modified: openmp/trunk/runtime/src/z_Linux_asm.s
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/z_Linux_asm.s?rev=302929&r1=302928&r2=302929&view=diff
==============================================================================
--- openmp/trunk/runtime/src/z_Linux_asm.s (original)
+++ openmp/trunk/runtime/src/z_Linux_asm.s Fri May 12 13:01:32 2017
@@ -21,7 +21,6 @@
 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
 
 # if KMP_MIC
-//
 // the 'delay r16/r32/r64' should be used instead of the 'pause'.
 // The delay operation has the effect of removing the current thread from
 // the round-robin HT mechanism, and therefore speeds up the issue rate of
@@ -70,9 +69,10 @@
 KMP_PREFIX_UNDERSCORE($0):
 .endmacro
 # else // KMP_OS_DARWIN
-#  define KMP_PREFIX_UNDERSCORE(x) x  // no extra underscore for Linux* OS symbols
+#  define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
 // Format labels so that they don't override function names in gdb's backtraces
-// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*)
+// MIC assembler doesn't accept .L syntax, the L works fine there (as well as
+// on OS X*)
 # if KMP_MIC
 #  define KMP_LABEL(x) L_##x          // local label
 # else
@@ -163,12 +163,10 @@ KMP_PREFIX_UNDERSCORE(\proc):
 
 #ifdef KMP_GOMP_COMPAT
 
-//
 // Support for unnamed common blocks.
 //
 // Because the symbol ".gomp_critical_user_" contains a ".", we have to
 // put this stuff in assembly.
-//
 
 # if KMP_ARCH_X86
 #  if KMP_OS_DARWIN
@@ -221,14 +219,12 @@ __kmp_unnamed_critical_addr:
 // microtasking routines specifically written for IA-32 architecture
 // running Linux* OS
 // -----------------------------------------------------------------------
-//
 
 	.ident "Intel Corporation"
 	.data
 	ALIGN 4
 // void
 // __kmp_x86_pause( void );
-//
 
         .text
 	PROC  __kmp_x86_pause
@@ -238,10 +234,9 @@ __kmp_unnamed_critical_addr:
 
 	DEBUG_INFO __kmp_x86_pause
 
-//
 // void
 // __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer );
-//
+
 	PROC  __kmp_x86_cpuid
 
 	pushl %ebp
@@ -253,7 +248,7 @@ __kmp_unnamed_critical_addr:
 
 	movl  8(%ebp), %eax
 	movl  12(%ebp), %ecx
-	cpuid				// Query the CPUID for the current processor
+	cpuid		// Query the CPUID for the current processor
 
 	movl  16(%ebp), %edi
 	movl  %eax, 0(%edi)
@@ -275,10 +270,8 @@ __kmp_unnamed_critical_addr:
 # if !KMP_ASM_INTRINS
 
 //------------------------------------------------------------------------
-//
 // kmp_int32
 // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
-//
 
         PROC      __kmp_test_then_add32
 
@@ -291,7 +284,6 @@ __kmp_unnamed_critical_addr:
 	DEBUG_INFO __kmp_test_then_add32
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed8
 //
 // kmp_int32
@@ -302,7 +294,6 @@ __kmp_unnamed_critical_addr:
 // 	d:	8(%esp)
 //
 // return:	%al
-
         PROC  __kmp_xchg_fixed8
 
         movl      4(%esp), %ecx    // "p"
@@ -316,7 +307,6 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed16
 //
 // kmp_int16
@@ -326,7 +316,6 @@ __kmp_unnamed_critical_addr:
 // 	p:	4(%esp)
 // 	d:	8(%esp)
 // return:     %ax
-
         PROC  __kmp_xchg_fixed16
 
         movl      4(%esp), %ecx    // "p"
@@ -340,7 +329,6 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed32
 //
 // kmp_int32
@@ -351,7 +339,6 @@ __kmp_unnamed_critical_addr:
 // 	d:	8(%esp)
 //
 // return:	%eax
-
         PROC  __kmp_xchg_fixed32
 
         movl      4(%esp), %ecx    // "p"
@@ -364,11 +351,8 @@ __kmp_unnamed_critical_addr:
         DEBUG_INFO __kmp_xchg_fixed32
 
 
-//
 // kmp_int8
 // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
-//
-
         PROC  __kmp_compare_and_store8
 
         movl      4(%esp), %ecx
@@ -382,11 +366,8 @@ __kmp_unnamed_critical_addr:
 
         DEBUG_INFO __kmp_compare_and_store8
 
-//
 // kmp_int16
-// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
-//
-
+// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv);
         PROC  __kmp_compare_and_store16
 
         movl      4(%esp), %ecx
@@ -400,11 +381,8 @@ __kmp_unnamed_critical_addr:
 
         DEBUG_INFO __kmp_compare_and_store16
 
-//
 // kmp_int32
-// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
-//
-
+// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv);
         PROC  __kmp_compare_and_store32
 
         movl      4(%esp), %ecx
@@ -412,16 +390,14 @@ __kmp_unnamed_critical_addr:
         movl      12(%esp), %edx
         lock
         cmpxchgl  %edx,(%ecx)
-        sete      %al           // if %eax == (%ecx) set %al = 1 else set %al = 0
-        and       $1, %eax      // sign extend previous instruction
+        sete      %al          // if %eax == (%ecx) set %al = 1 else set %al = 0
+        and       $1, %eax     // sign extend previous instruction
         ret
 
         DEBUG_INFO __kmp_compare_and_store32
 
-//
 // kmp_int32
-// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
-//
+// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s );
         PROC  __kmp_compare_and_store64
 
         pushl     %ebp
@@ -435,8 +411,8 @@ __kmp_unnamed_critical_addr:
         movl      24(%ebp), %ecx        // "sv" high order word
         lock
         cmpxchg8b (%edi)
-        sete      %al           // if %edx:eax == (%edi) set %al = 1 else set %al = 0
-        and       $1, %eax      // sign extend previous instruction
+        sete      %al      // if %edx:eax == (%edi) set %al = 1 else set %al = 0
+        and       $1, %eax // sign extend previous instruction
         popl      %edi
         popl      %ebx
         movl      %ebp, %esp
@@ -445,11 +421,8 @@ __kmp_unnamed_critical_addr:
 
         DEBUG_INFO __kmp_compare_and_store64
 
-//
 // kmp_int8
-// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
-//
-
+// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv);
         PROC  __kmp_compare_and_store_ret8
 
         movl      4(%esp), %ecx
@@ -461,11 +434,9 @@ __kmp_unnamed_critical_addr:
 
         DEBUG_INFO __kmp_compare_and_store_ret8
 
-//
 // kmp_int16
-// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
-//
-
+// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv,
+//                               kmp_int16 sv);
         PROC  __kmp_compare_and_store_ret16
 
         movl      4(%esp), %ecx
@@ -477,11 +448,9 @@ __kmp_unnamed_critical_addr:
 
         DEBUG_INFO __kmp_compare_and_store_ret16
 
-//
 // kmp_int32
-// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
-//
-
+// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv,
+//                               kmp_int32 sv);
         PROC  __kmp_compare_and_store_ret32
 
         movl      4(%esp), %ecx
@@ -493,10 +462,9 @@ __kmp_unnamed_critical_addr:
 
         DEBUG_INFO __kmp_compare_and_store_ret32
 
-//
 // kmp_int64
-// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
-//
+// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv,
+//                               kmp_int64 sv);
         PROC  __kmp_compare_and_store_ret64
 
         pushl     %ebp
@@ -520,7 +488,6 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_real32
 //
 // kmp_real32
@@ -531,8 +498,6 @@ __kmp_unnamed_critical_addr:
 // 	data:	8(%esp)
 //
 // return:	%eax
-
-
         PROC  __kmp_xchg_real32
 
         pushl   %ebp
@@ -565,7 +530,6 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_load_x87_fpu_control_word
 //
 // void
@@ -573,8 +537,6 @@ __kmp_unnamed_critical_addr:
 //
 // parameters:
 // 	p:	4(%esp)
-//
-
         PROC  __kmp_load_x87_fpu_control_word
 
         movl  4(%esp), %eax
@@ -585,7 +547,6 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_store_x87_fpu_control_word
 //
 // void
@@ -593,8 +554,6 @@ __kmp_unnamed_critical_addr:
 //
 // parameters:
 // 	p:	4(%esp)
-//
-
         PROC  __kmp_store_x87_fpu_control_word
 
         movl  4(%esp), %eax
@@ -605,14 +564,10 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_clear_x87_fpu_status_word
 //
 // void
 // __kmp_clear_x87_fpu_status_word();
-//
-//
-
         PROC  __kmp_clear_x87_fpu_status_word
 
         fnclex
@@ -622,7 +577,6 @@ __kmp_unnamed_critical_addr:
 
 
 //------------------------------------------------------------------------
-//
 // typedef void	(*microtask_t)( int *gtid, int *tid, ... );
 //
 // int
@@ -714,7 +668,6 @@ KMP_LABEL(invoke_3):
 	DEBUG_INFO __kmp_hardware_timestamp
 // -- End  __kmp_hardware_timestamp
 
-// -----------------------------------------------------------------------
 #endif /* KMP_ARCH_X86 */
 
 
@@ -732,9 +685,9 @@ KMP_LABEL(invoke_3):
 	.data
 	ALIGN 4
 
-// To prevent getting our code into .data section .text added to every routine definition for x86_64.
+// To prevent getting our code into .data section .text added to every routine
+// definition for x86_64.
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_x86_cpuid
 //
 // void
@@ -744,7 +697,6 @@ KMP_LABEL(invoke_3):
 // 	mode:		%edi
 // 	mode2:		%esi
 // 	cpuid_buffer:	%rdx
-
         .text
 	PROC  __kmp_x86_cpuid
 
@@ -774,7 +726,6 @@ KMP_LABEL(invoke_3):
 # if !KMP_ASM_INTRINS
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_test_then_add32
 //
 // kmp_int32
@@ -785,7 +736,6 @@ KMP_LABEL(invoke_3):
 // 	d:	%esi
 //
 // return:	%eax
-
         .text
         PROC  __kmp_test_then_add32
 
@@ -798,7 +748,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_test_then_add64
 //
 // kmp_int64
@@ -808,7 +757,6 @@ KMP_LABEL(invoke_3):
 // 	p:	%rdi
 // 	d:	%rsi
 //	return:	%rax
-
         .text
         PROC  __kmp_test_then_add64
 
@@ -821,7 +769,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed8
 //
 // kmp_int32
@@ -832,7 +779,6 @@ KMP_LABEL(invoke_3):
 // 	d:	%sil
 //
 // return:	%al
-
         .text
         PROC  __kmp_xchg_fixed8
 
@@ -846,7 +792,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed16
 //
 // kmp_int16
@@ -856,7 +801,6 @@ KMP_LABEL(invoke_3):
 // 	p:	%rdi
 // 	d:	%si
 // return:     %ax
-
         .text
         PROC  __kmp_xchg_fixed16
 
@@ -870,7 +814,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed32
 //
 // kmp_int32
@@ -881,7 +824,6 @@ KMP_LABEL(invoke_3):
 // 	d:	%esi
 //
 // return:	%eax
-
         .text
         PROC  __kmp_xchg_fixed32
 
@@ -895,7 +837,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_fixed64
 //
 // kmp_int64
@@ -905,7 +846,6 @@ KMP_LABEL(invoke_3):
 // 	p:	%rdi
 // 	d:	%rsi
 // return:	%rax
-
         .text
         PROC  __kmp_xchg_fixed64
 
@@ -919,7 +859,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store8
 //
 // kmp_int8
@@ -931,7 +870,6 @@ KMP_LABEL(invoke_3):
 //	sv:	%edx
 //
 // return:	%eax
-
         .text
         PROC  __kmp_compare_and_store8
 
@@ -946,7 +884,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store16
 //
 // kmp_int16
@@ -958,7 +895,6 @@ KMP_LABEL(invoke_3):
 //	sv:	%dx
 //
 // return:	%eax
-
         .text
         PROC  __kmp_compare_and_store16
 
@@ -973,7 +909,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store32
 //
 // kmp_int32
@@ -985,7 +920,6 @@ KMP_LABEL(invoke_3):
 //	sv:	%edx
 //
 // return:	%eax
-
         .text
         PROC  __kmp_compare_and_store32
 
@@ -1000,7 +934,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store64
 //
 // kmp_int32
@@ -1011,7 +944,6 @@ KMP_LABEL(invoke_3):
 // 	cv:	%rsi
 //	sv:	%rdx
 //	return:	%eax
-
         .text
         PROC  __kmp_compare_and_store64
 
@@ -1025,7 +957,6 @@ KMP_LABEL(invoke_3):
         DEBUG_INFO __kmp_compare_and_store64
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store_ret8
 //
 // kmp_int8
@@ -1037,7 +968,6 @@ KMP_LABEL(invoke_3):
 //	sv:	%edx
 //
 // return:	%eax
-
         .text
         PROC  __kmp_compare_and_store_ret8
 
@@ -1050,7 +980,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store_ret16
 //
 // kmp_int16
@@ -1062,7 +991,6 @@ KMP_LABEL(invoke_3):
 //	sv:	%dx
 //
 // return:	%eax
-
         .text
         PROC  __kmp_compare_and_store_ret16
 
@@ -1075,7 +1003,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store_ret32
 //
 // kmp_int32
@@ -1087,7 +1014,6 @@ KMP_LABEL(invoke_3):
 //	sv:	%edx
 //
 // return:	%eax
-
         .text
         PROC  __kmp_compare_and_store_ret32
 
@@ -1100,7 +1026,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_compare_and_store_ret64
 //
 // kmp_int64
@@ -1111,7 +1036,6 @@ KMP_LABEL(invoke_3):
 // 	cv:	%rsi
 //	sv:	%rdx
 //	return:	%eax
-
         .text
         PROC  __kmp_compare_and_store_ret64
 
@@ -1130,7 +1054,6 @@ KMP_LABEL(invoke_3):
 # if !KMP_ASM_INTRINS
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_real32
 //
 // kmp_real32
@@ -1141,7 +1064,6 @@ KMP_LABEL(invoke_3):
 // 	data:	%xmm0 (lower 4 bytes)
 //
 // return:	%xmm0 (lower 4 bytes)
-
         .text
         PROC  __kmp_xchg_real32
 
@@ -1158,7 +1080,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_xchg_real64
 //
 // kmp_real64
@@ -1168,8 +1089,6 @@ KMP_LABEL(invoke_3):
 //      addr:   %rdi
 //      data:   %xmm0 (lower 8 bytes)
 //      return: %xmm0 (lower 8 bytes)
-//
-
         .text
         PROC  __kmp_xchg_real64
 
@@ -1190,7 +1109,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_load_x87_fpu_control_word
 //
 // void
@@ -1198,8 +1116,6 @@ KMP_LABEL(invoke_3):
 //
 // parameters:
 // 	p:	%rdi
-//
-
         .text
         PROC  __kmp_load_x87_fpu_control_word
 
@@ -1210,7 +1126,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_store_x87_fpu_control_word
 //
 // void
@@ -1218,8 +1133,6 @@ KMP_LABEL(invoke_3):
 //
 // parameters:
 // 	p:	%rdi
-//
-
         .text
         PROC  __kmp_store_x87_fpu_control_word
 
@@ -1230,14 +1143,10 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_clear_x87_fpu_status_word
 //
 // void
 // __kmp_clear_x87_fpu_status_word();
-//
-//
-
         .text
         PROC  __kmp_clear_x87_fpu_status_word
 
@@ -1256,7 +1165,6 @@ KMP_LABEL(invoke_3):
 
 
 //------------------------------------------------------------------------
-//
 // typedef void	(*microtask_t)( int *gtid, int *tid, ... );
 //
 // int
@@ -1267,8 +1175,7 @@ KMP_LABEL(invoke_3):
 //    return 1;
 // }
 //
-// note:
-//	at call to pkfn must have %rsp 128-byte aligned for compiler
+// note: at call to pkfn must have %rsp 128-byte aligned for compiler
 //
 // parameters:
 //      %rdi:  	pkfn
@@ -1291,8 +1198,6 @@ KMP_LABEL(invoke_3):
 //	%rbx:	used to hold pkfn address, and zero constant, callee-save
 //
 // return:	%eax 	(always 1/TRUE)
-//
-
 __gtid = -16
 __tid = -24
 
@@ -1442,13 +1347,10 @@ KMP_LABEL(kmp_1_exit):
 // -- End  __kmp_hardware_timestamp
 
 //------------------------------------------------------------------------
-//
 // FUNCTION __kmp_bsr32
 //
 // int
 // __kmp_bsr32( int );
-//
-
         .text
         PROC  __kmp_bsr32
 




More information about the Openmp-commits mailing list