[llvm] r340825 - [llvm-mca][TimelineView] Force the same number of executions for every entry in the 'wait-times' table.

Andrea Di Biagio via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 28 07:27:01 PDT 2018


Author: adibiagio
Date: Tue Aug 28 07:27:01 2018
New Revision: 340825

URL: http://llvm.org/viewvc/llvm-project?rev=340825&view=rev
Log:
[llvm-mca][TimelineView] Force the same number of executions for every entry in the 'wait-times' table.

This patch also uses colors to highlight problematic wait-time entries.
A problematic entry is an entry with an high wait time that tends to match (or
exceed) the size of the scheduler's buffer.

Color RED is used if an instruction had to wait an average number of cycles
which is bigger than (or equal to) the size of the underlying scheduler's
buffer.
Color YELLOW is used if the time (in cycles) spend waiting for the
operands or pipeline resources is bigger than half the size of the underlying
scheduler's buffer.
Color MAGENTA is used if an instruction does not consume buffer resources
according to the scheduling model.

Modified:
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
    llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pr37790.s
    llvm/trunk/tools/llvm-mca/Views/TimelineView.cpp
    llvm/trunk/tools/llvm-mca/Views/TimelineView.h

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s?rev=340825&r1=340824&r2=340825&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/clear-super-register-2.s Tue Aug 28 07:27:01 2018
@@ -104,17 +104,17 @@ vandps %xmm4, %xmm1, %xmm0
 # CHECK-NEXT: 1.     2     9.5    0.5    35.5      vaddps	%xmm0, %xmm1, %xmm3
 # CHECK-NEXT: 2.     2     11.5   0.0    33.5      vaddps	%ymm3, %ymm1, %ymm4
 # CHECK-NEXT: 3.     2     12.5   2.0    31.5      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 4.     1     5.0    4.0    29.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 5.     1     6.0    6.0    27.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 6.     1     7.0    7.0    26.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 7.     1     8.0    8.0    24.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 8.     1     9.0    9.0    23.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 9.     1     10.0   10.0   21.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 10.    1     11.0   11.0   20.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 11.    1     12.0   12.0   18.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 12.    1     13.0   13.0   17.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 13.    1     14.0   14.0   15.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 14.    1     15.0   15.0   14.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 15.    1     16.0   16.0   12.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 16.    1     17.0   17.0   11.0      vaddps	%ymm3, %ymm1, %ymm4
-# CHECK-NEXT: 17.    1     19.0   0.0    10.0      vandps	%xmm4, %xmm1, %xmm0
+# CHECK-NEXT: 4.     2     13.5   4.0    30.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 5.     2     14.5   6.0    28.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 6.     2     15.5   7.5    27.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 7.     2     16.5   9.0    25.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 8.     2     17.5   10.5   24.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 9.     2     18.5   12.0   22.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 10.    2     19.5   13.5   21.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 11.    2     20.5   15.0   19.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 12.    2     21.5   16.5   18.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 13.    2     22.5   18.0   16.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 14.    2     23.5   19.5   15.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 15.    2     21.0   21.0   13.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 16.    2     22.0   22.0   12.5      vaddps	%ymm3, %ymm1, %ymm4
+# CHECK-NEXT: 17.    2     24.0   0.0    11.5      vandps	%xmm4, %xmm1, %xmm0

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s?rev=340825&r1=340824&r2=340825&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pipes-fpu.s Tue Aug 28 07:27:01 2018
@@ -103,6 +103,6 @@ vsqrtps     %ymm0, %ymm2
 # CHECK-NEXT: 2.     2     1.0    1.0    28.0      vcvttps2dq	%xmm0, %xmm2
 # CHECK-NEXT: 3.     2     1.0    1.0    29.5      vpclmulqdq	$0, %xmm0, %xmm1, %xmm2
 # CHECK-NEXT: 4.     2     1.0    1.0    28.0      vaddps	%xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 5.     1     1.0    1.0    0.0       vsqrtps	%xmm0, %xmm2
-# CHECK-NEXT: 6.     1     1.0    1.0    17.0      vaddps	%ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 7.     1     20.0   20.0   0.0       vsqrtps	%ymm0, %ymm2
+# CHECK-NEXT: 5.     2     29.5   29.5   0.0       vsqrtps	%xmm0, %xmm2
+# CHECK-NEXT: 6.     2     1.0    1.0    45.5      vaddps	%ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 7.     2     48.5   48.5   0.0       vsqrtps	%ymm0, %ymm2

Modified: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pr37790.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pr37790.s?rev=340825&r1=340824&r2=340825&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pr37790.s (original)
+++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/pr37790.s Tue Aug 28 07:27:01 2018
@@ -37,5 +37,5 @@ stmxcsr (%rsp)
 # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
 
 # CHECK:            [0]    [1]    [2]    [3]
-# CHECK-NEXT: 0.     1     1.0    1.0    0.0       int3
-# CHECK-NEXT: 1.     1     101.0  0.0    0.0       stmxcsr	(%rsp)
+# CHECK-NEXT: 0.     2     1.0    0.5    0.0       int3
+# CHECK-NEXT: 1.     2     100.5  0.0    0.0       stmxcsr	(%rsp)

Modified: llvm/trunk/tools/llvm-mca/Views/TimelineView.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Views/TimelineView.cpp?rev=340825&r1=340824&r2=340825&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Views/TimelineView.cpp (original)
+++ llvm/trunk/tools/llvm-mca/Views/TimelineView.cpp Tue Aug 28 07:27:01 2018
@@ -18,41 +18,63 @@ using namespace llvm;
 
 namespace mca {
 
-void TimelineView::initialize(unsigned MaxIterations) {
-  unsigned NumInstructions =
-      AsmSequence.getNumIterations() * AsmSequence.size();
+TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
+                           const SourceMgr &S, unsigned MaxIterations,
+                           unsigned Cycles)
+    : STI(sti), MCIP(Printer), AsmSequence(S), CurrentCycle(0),
+      MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0), WaitTime(S.size()),
+      UsedBuffer(S.size()) {
+  unsigned NumInstructions = AsmSequence.size();
   if (!MaxIterations)
     MaxIterations = DEFAULT_ITERATIONS;
-  unsigned NumEntries =
-      std::min(NumInstructions, MaxIterations * AsmSequence.size());
-  Timeline.resize(NumEntries);
-  TimelineViewEntry NullTVEntry = {0, 0, 0, 0, 0};
-  std::fill(Timeline.begin(), Timeline.end(), NullTVEntry);
+  NumInstructions *= std::min(MaxIterations, AsmSequence.getNumIterations());
+  Timeline.resize(NumInstructions);
 
-  WaitTime.resize(AsmSequence.size());
-  WaitTimeEntry NullWTEntry = {0, 0, 0, 0};
+  WaitTimeEntry NullWTEntry = {0, 0, 0};
   std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
 }
 
+void TimelineView::onReservedBuffers(const InstRef &IR,
+                                     ArrayRef<unsigned> Buffers) {
+  if (IR.getSourceIndex() >= AsmSequence.size())
+    return;
+
+  const MCSchedModel &SM = STI.getSchedModel();
+  std::pair<unsigned, unsigned> BufferInfo = {0, 0};
+  for (const unsigned Buffer : Buffers) {
+    const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
+    if (MCDesc.BufferSize <= 0)
+      continue;
+    unsigned OtherSize = static_cast<unsigned>(MCDesc.BufferSize);
+    if (!BufferInfo.first || BufferInfo.second > OtherSize) {
+      BufferInfo.first = Buffer;
+      BufferInfo.second = OtherSize;
+    }
+  }
+
+  UsedBuffer[IR.getSourceIndex()] = BufferInfo;
+}
+
 void TimelineView::onEvent(const HWInstructionEvent &Event) {
   const unsigned Index = Event.IR.getSourceIndex();
-  if (CurrentCycle >= MaxCycle || Index >= Timeline.size())
+  if (Index >= Timeline.size())
     return;
+
   switch (Event.Type) {
   case HWInstructionEvent::Retired: {
     TimelineViewEntry &TVEntry = Timeline[Index];
-    TVEntry.CycleRetired = CurrentCycle;
+    if (CurrentCycle < MaxCycle)
+      TVEntry.CycleRetired = CurrentCycle;
 
     // Update the WaitTime entry which corresponds to this Index.
     WaitTimeEntry &WTEntry = WaitTime[Index % AsmSequence.size()];
-    WTEntry.Executions++;
     WTEntry.CyclesSpentInSchedulerQueue +=
         TVEntry.CycleIssued - TVEntry.CycleDispatched;
     assert(TVEntry.CycleDispatched <= TVEntry.CycleReady);
     WTEntry.CyclesSpentInSQWhileReady +=
         TVEntry.CycleIssued - TVEntry.CycleReady;
     WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
-        (TVEntry.CycleRetired - 1) - TVEntry.CycleExecuted;
+        (CurrentCycle - 1) - TVEntry.CycleExecuted;
     break;
   }
   case HWInstructionEvent::Ready:
@@ -70,57 +92,83 @@ void TimelineView::onEvent(const HWInstr
   default:
     return;
   }
-  LastCycle = std::max(LastCycle, CurrentCycle);
+  if (CurrentCycle < MaxCycle)
+    LastCycle = std::max(LastCycle, CurrentCycle);
+}
+
+static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
+                                       unsigned Executions,
+                                       unsigned BufferSize) {
+  if (CumulativeCycles && BufferSize == 0)
+    return raw_ostream::MAGENTA;
+  if (CumulativeCycles >= (BufferSize * Executions))
+    return raw_ostream::RED;
+  if ((CumulativeCycles * 2) >= (BufferSize * Executions))
+    return raw_ostream::YELLOW;
+  return raw_ostream::SAVEDCOLOR;
+}
+
+static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
+                           unsigned Executions, unsigned BufferSize) {
+  if (!OS.has_colors())
+    return;
+
+  raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
+  if (Color == raw_ostream::SAVEDCOLOR) {
+    OS.resetColor();
+    return;
+  }
+  OS.changeColor(Color, /* bold */ true, /* BG */ false);
 }
 
 void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
                                       const WaitTimeEntry &Entry,
-                                      unsigned SourceIndex) const {
+                                      unsigned SourceIndex,
+                                      unsigned Executions) const {
   OS << SourceIndex << '.';
   OS.PadToColumn(7);
 
-  if (Entry.Executions == 0) {
-    OS << "-      -      -      -     ";
-  } else {
-    double AverageTime1, AverageTime2, AverageTime3;
-    unsigned Executions = Entry.Executions;
-    AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
-    AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
-    AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
-
-    OS << Executions;
-    OS.PadToColumn(13);
-
-    OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
-    OS.PadToColumn(20);
-    OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
-    OS.PadToColumn(27);
-    OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
-    OS.PadToColumn(34);
-  }
+  double AverageTime1, AverageTime2, AverageTime3;
+  AverageTime1 = (double)Entry.CyclesSpentInSchedulerQueue / Executions;
+  AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / Executions;
+  AverageTime3 = (double)Entry.CyclesSpentAfterWBAndBeforeRetire / Executions;
+
+  OS << Executions;
+  OS.PadToColumn(13);
+  unsigned BufferSize = UsedBuffer[SourceIndex].second;
+  tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, Executions, BufferSize);
+  OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+  OS.PadToColumn(20);
+  tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, Executions, BufferSize);
+  OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+  OS.PadToColumn(27);
+  tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, Executions,
+                 STI.getSchedModel().MicroOpBufferSize);
+  OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+
+  if (OS.has_colors())
+    OS.resetColor();
+  OS.PadToColumn(34);
 }
 
 void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
-  if (WaitTime.empty())
-    return;
-
-  std::string Buffer;
-  raw_string_ostream TempStream(Buffer);
-  formatted_raw_ostream FOS(TempStream);
-
-  FOS << "\n\nAverage Wait times (based on the timeline view):\n"
-      << "[0]: Executions\n"
-      << "[1]: Average time spent waiting in a scheduler's queue\n"
-      << "[2]: Average time spent waiting in a scheduler's queue while ready\n"
-      << "[3]: Average time elapsed from WB until retire stage\n\n";
-  FOS << "      [0]    [1]    [2]    [3]\n";
+  std::string Header =
+      "\n\nAverage Wait times (based on the timeline view):\n"
+      "[0]: Executions\n"
+      "[1]: Average time spent waiting in a scheduler's queue\n"
+      "[2]: Average time spent waiting in a scheduler's queue while ready\n"
+      "[3]: Average time elapsed from WB until retire stage\n\n"
+      "      [0]    [1]    [2]    [3]\n";
+  OS << Header;
 
-  // Use a different string stream for the instruction.
+  // Use a different string stream for printing instructions.
   std::string Instruction;
   raw_string_ostream InstrStream(Instruction);
 
+  formatted_raw_ostream FOS(OS);
+  unsigned Executions = Timeline.size() / AsmSequence.size();
   for (unsigned I = 0, E = WaitTime.size(); I < E; ++I) {
-    printWaitTimeEntry(FOS, WaitTime[I], I);
+    printWaitTimeEntry(FOS, WaitTime[I], I, Executions);
     // Append the instruction info at the end of the line.
     const MCInst &Inst = AsmSequence.getMCInstFromIndex(I);
 
@@ -133,9 +181,6 @@ void TimelineView::printAverageWaitTimes
     FOS << "   " << Str << '\n';
     FOS.flush();
     Instruction = "";
-
-    OS << Buffer;
-    Buffer = "";
   }
 }
 
@@ -202,20 +247,15 @@ static void printTimelineHeader(formatte
 }
 
 void TimelineView::printTimeline(raw_ostream &OS) const {
-  std::string Buffer;
-  raw_string_ostream StringStream(Buffer);
-  formatted_raw_ostream FOS(StringStream);
-
+  formatted_raw_ostream FOS(OS);
   printTimelineHeader(FOS, LastCycle);
   FOS.flush();
-  OS << Buffer;
 
   // Use a different string stream for the instruction.
   std::string Instruction;
   raw_string_ostream InstrStream(Instruction);
 
   for (unsigned I = 0, E = Timeline.size(); I < E; ++I) {
-    Buffer = "";
     const TimelineViewEntry &Entry = Timeline[I];
     if (Entry.CycleRetired == 0)
       return;
@@ -234,7 +274,6 @@ void TimelineView::printTimeline(raw_ost
     FOS << "   " << Str << '\n';
     FOS.flush();
     Instruction = "";
-    OS << Buffer;
   }
 }
 } // namespace mca

Modified: llvm/trunk/tools/llvm-mca/Views/TimelineView.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mca/Views/TimelineView.h?rev=340825&r1=340824&r2=340825&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-mca/Views/TimelineView.h (original)
+++ llvm/trunk/tools/llvm-mca/Views/TimelineView.h Tue Aug 28 07:27:01 2018
@@ -135,23 +135,22 @@ class TimelineView : public View {
   std::vector<TimelineViewEntry> Timeline;
 
   struct WaitTimeEntry {
-    unsigned Executions;
     unsigned CyclesSpentInSchedulerQueue;
     unsigned CyclesSpentInSQWhileReady;
     unsigned CyclesSpentAfterWBAndBeforeRetire;
   };
   std::vector<WaitTimeEntry> WaitTime;
+  std::vector<std::pair<unsigned, unsigned>> UsedBuffer;
 
   void printTimelineViewEntry(llvm::formatted_raw_ostream &OS,
                               const TimelineViewEntry &E, unsigned Iteration,
                               unsigned SourceIndex) const;
   void printWaitTimeEntry(llvm::formatted_raw_ostream &OS,
-                          const WaitTimeEntry &E, unsigned Index) const;
+                          const WaitTimeEntry &E, unsigned Index,
+                          unsigned Executions) const;
 
   const unsigned DEFAULT_ITERATIONS = 10;
 
-  void initialize(unsigned MaxIterations);
-
   // Display characters for the TimelineView report output.
   struct DisplayChar {
     static const char Dispatched = 'D';
@@ -165,15 +164,13 @@ class TimelineView : public View {
 public:
   TimelineView(const llvm::MCSubtargetInfo &sti, llvm::MCInstPrinter &Printer,
                const SourceMgr &Sequence, unsigned MaxIterations,
-               unsigned Cycles)
-      : STI(sti), MCIP(Printer), AsmSequence(Sequence), CurrentCycle(0),
-        MaxCycle(Cycles == 0 ? 80 : Cycles), LastCycle(0) {
-    initialize(MaxIterations);
-  }
+               unsigned Cycles);
 
   // Event handlers.
   void onCycleEnd() override { ++CurrentCycle; }
   void onEvent(const HWInstructionEvent &Event) override;
+  void onReservedBuffers(const InstRef &IR,
+                         llvm::ArrayRef<unsigned> Buffers) override;
 
   // print functionalities.
   void printTimeline(llvm::raw_ostream &OS) const;




More information about the llvm-commits mailing list