[llvm] 77e6bb3 - Re-land [Support] Extend TimeProfiler to support multiple threads

Russell Gallop via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 27 05:03:54 PST 2020


Author: Russell Gallop
Date: 2020-01-27T13:01:49Z
New Revision: 77e6bb3cbad26f0a95be5c427fa7f87833d5843e

URL: https://github.com/llvm/llvm-project/commit/77e6bb3cbad26f0a95be5c427fa7f87833d5843e
DIFF: https://github.com/llvm/llvm-project/commit/77e6bb3cbad26f0a95be5c427fa7f87833d5843e.diff

LOG: Re-land [Support] Extend TimeProfiler to support multiple threads

This makes TimeTraceProfilerInstance thread local. Added
timeTraceProfilerFinishThread() which moves the thread local instance to
a global vector of instances. timeTraceProfilerWrite() then writes
recorded data from all instances.

Threads are identified based on their thread ids. Totals are reported
with artificial thread ids higher than the real ones.

This fixes the previous version to work with __thread as well as
thread_local.

Differential Revision: https://reviews.llvm.org/D71059

Added: 
    

Modified: 
    llvm/include/llvm/Support/TimeProfiler.h
    llvm/lib/Support/TimeProfiler.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h
index 678f8c136811..c29677920cb3 100644
--- a/llvm/include/llvm/Support/TimeProfiler.h
+++ b/llvm/include/llvm/Support/TimeProfiler.h
@@ -14,7 +14,7 @@
 namespace llvm {
 
 struct TimeTraceProfiler;
-extern TimeTraceProfiler *TimeTraceProfilerInstance;
+extern LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance;
 
 /// Initialize the time trace profiler.
 /// This sets up the global \p TimeTraceProfilerInstance
@@ -25,6 +25,9 @@ void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
 /// Cleanup the time trace profiler, if it was initialized.
 void timeTraceProfilerCleanup();
 
+/// Finish a time trace profiler running on a worker thread.
+void timeTraceProfilerFinishThread();
+
 /// Is the time trace profiler enabled, i.e. initialized?
 inline bool timeTraceProfilerEnabled() {
   return TimeTraceProfilerInstance != nullptr;

diff  --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index a7c85509064e..24da35ceafb8 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -15,16 +15,25 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Threading.h"
+#include <algorithm>
 #include <cassert>
 #include <chrono>
+#include <mutex>
 #include <string>
 #include <vector>
 
 using namespace std::chrono;
 
+namespace {
+std::mutex Mu;
+std::vector<llvm::TimeTraceProfiler *>
+    ThreadTimeTraceProfilerInstances; // guarded by Mu
+} // namespace
+
 namespace llvm {
 
-TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
+LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
 
 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
 typedef time_point<steady_clock> TimePointType;
@@ -61,7 +70,7 @@ struct Entry {
 struct TimeTraceProfiler {
   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
       : StartTime(steady_clock::now()), ProcName(ProcName),
-        TimeTraceGranularity(TimeTraceGranularity) {}
+        Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
 
   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
     Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
@@ -103,22 +112,31 @@ struct TimeTraceProfiler {
     Stack.pop_back();
   }
 
+  // Write events from this TimeTraceProfilerInstance and
+  // ThreadTimeTraceProfilerInstances.
   void Write(raw_pwrite_stream &OS) {
+    // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
+    std::lock_guard<std::mutex> Lock(Mu);
     assert(Stack.empty() &&
            "All profiler sections should be ended when calling Write");
+    assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(),
+                       ThreadTimeTraceProfilerInstances.end(),
+                       [](const auto &TTP) { return TTP->Stack.empty(); }) &&
+           "All profiler sections should be ended when calling Write");
+
     json::OStream J(OS);
     J.objectBegin();
     J.attributeBegin("traceEvents");
     J.arrayBegin();
 
     // Emit all events for the main flame graph.
-    for (const auto &E : Entries) {
+    auto writeEvent = [&](const auto &E, uint64_t Tid) {
       auto StartUs = E.getFlameGraphStartUs(StartTime);
       auto DurUs = E.getFlameGraphDurUs();
 
       J.object([&]{
         J.attribute("pid", 1);
-        J.attribute("tid", 0);
+        J.attribute("tid", int64_t(Tid));
         J.attribute("ph", "X");
         J.attribute("ts", StartUs);
         J.attribute("dur", DurUs);
@@ -127,39 +145,73 @@ struct TimeTraceProfiler {
           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
         }
       });
+    };
+    for (const auto &E : Entries) {
+      writeEvent(E, this->Tid);
+    }
+    for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
+      for (const auto &E : TTP->Entries) {
+        writeEvent(E, TTP->Tid);
+      }
     }
 
     // Emit totals by section name as additional "thread" events, sorted from
     // longest one.
-    int Tid = 1;
+    // Find highest used thread id.
+    uint64_t MaxTid = this->Tid;
+    for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
+      MaxTid = std::max(MaxTid, TTP->Tid);
+    }
+
+    // Combine all CountAndTotalPerName from threads into one.
+    StringMap<CountAndDurationType> AllCountAndTotalPerName;
+    auto combineStat = [&](const auto &Stat) {
+      std::string Key = Stat.getKey();
+      auto Value = Stat.getValue();
+      auto &CountAndTotal = AllCountAndTotalPerName[Key];
+      CountAndTotal.first += Value.first;
+      CountAndTotal.second += Value.second;
+    };
+    for (const auto &Stat : CountAndTotalPerName) {
+      combineStat(Stat);
+    }
+    for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
+      for (const auto &Stat : TTP->CountAndTotalPerName) {
+        combineStat(Stat);
+      }
+    }
+
     std::vector<NameAndCountAndDurationType> SortedTotals;
-    SortedTotals.reserve(CountAndTotalPerName.size());
-    for (const auto &E : CountAndTotalPerName)
-      SortedTotals.emplace_back(E.getKey(), E.getValue());
+    SortedTotals.reserve(AllCountAndTotalPerName.size());
+    for (const auto &Total : AllCountAndTotalPerName)
+      SortedTotals.emplace_back(Total.getKey(), Total.getValue());
 
     llvm::sort(SortedTotals.begin(), SortedTotals.end(),
                [](const NameAndCountAndDurationType &A,
                   const NameAndCountAndDurationType &B) {
                  return A.second.second > B.second.second;
                });
-    for (const auto &E : SortedTotals) {
-      auto DurUs = duration_cast<microseconds>(E.second.second).count();
-      auto Count = CountAndTotalPerName[E.first].first;
+
+    // Report totals on separate threads of tracing file.
+    uint64_t TotalTid = MaxTid + 1;
+    for (const auto &Total : SortedTotals) {
+      auto DurUs = duration_cast<microseconds>(Total.second.second).count();
+      auto Count = AllCountAndTotalPerName[Total.first].first;
 
       J.object([&]{
         J.attribute("pid", 1);
-        J.attribute("tid", Tid);
+        J.attribute("tid", int64_t(TotalTid));
         J.attribute("ph", "X");
         J.attribute("ts", 0);
         J.attribute("dur", DurUs);
-        J.attribute("name", "Total " + E.first);
+        J.attribute("name", "Total " + Total.first);
         J.attributeObject("args", [&] {
           J.attribute("count", int64_t(Count));
           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
         });
       });
 
-      ++Tid;
+      ++TotalTid;
     }
 
     // Emit metadata event with process name.
@@ -183,6 +235,7 @@ struct TimeTraceProfiler {
   StringMap<CountAndDurationType> CountAndTotalPerName;
   const TimePointType StartTime;
   const std::string ProcName;
+  const uint64_t Tid;
 
   // Minimum time granularity (in microseconds)
   const unsigned TimeTraceGranularity;
@@ -196,8 +249,21 @@ void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
 }
 
+// Removes all TimeTraceProfilerInstances.
+// Called from main thread.
 void timeTraceProfilerCleanup() {
   delete TimeTraceProfilerInstance;
+  std::lock_guard<std::mutex> Lock(Mu);
+  for (auto TTP : ThreadTimeTraceProfilerInstances)
+    delete TTP;
+  ThreadTimeTraceProfilerInstances.clear();
+}
+
+// Finish TimeTraceProfilerInstance on a worker thread.
+// This doesn't remove the instance, just moves the pointer to global vector.
+void timeTraceProfilerFinishThread() {
+  std::lock_guard<std::mutex> Lock(Mu);
+  ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance);
   TimeTraceProfilerInstance = nullptr;
 }
 


        


More information about the llvm-commits mailing list