[compiler-rt] r271683 - [esan|wset] Add 8-level working set snapshot accumulation

Derek Bruening via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 3 09:27:56 PDT 2016


Author: bruening
Date: Fri Jun  3 11:27:50 2016
New Revision: 271683

URL: http://llvm.org/viewvc/llvm-project?rev=271683&view=rev
Log:
[esan|wset] Add 8-level working set snapshot accumulation

Summary:
Adds a new option -snapshot_step controlling the frequency distribution for
an 8-level series of samples using each bit of each shadow byte.
Implements accumulation from each level to the next higher level at the
specified frequency.

Adds storage of the 8 series of samples using CircularBuffer instances.
Fixes an error in the circular buffer data structure where a static
object's destructor will be called too early.

Prints the results out at the end in a simple manner to give us something
to start with.

Updates the workingset-samples test to test the new feature.

Reviewers: aizatsky

Subscribers: vitalybuka, zhaoqin, kcc, eugenis, llvm-commits, kubabrecka

Differential Revision: http://reviews.llvm.org/D20833

Modified:
    compiler-rt/trunk/lib/esan/esan_circular_buffer.h
    compiler-rt/trunk/lib/esan/esan_flags.inc
    compiler-rt/trunk/lib/esan/working_set.cpp
    compiler-rt/trunk/test/esan/TestCases/workingset-samples.cpp

Modified: compiler-rt/trunk/lib/esan/esan_circular_buffer.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/esan/esan_circular_buffer.h?rev=271683&r1=271682&r2=271683&view=diff
==============================================================================
--- compiler-rt/trunk/lib/esan/esan_circular_buffer.h (original)
+++ compiler-rt/trunk/lib/esan/esan_circular_buffer.h Fri Jun  3 11:27:50 2016
@@ -28,9 +28,11 @@ class CircularBuffer {
   explicit CircularBuffer() {}
   CircularBuffer(uptr BufferCapacity) {
     initialize(BufferCapacity);
+    WasConstructed = true;
   }
   ~CircularBuffer() {
-    free();
+    if (WasConstructed) // Else caller will call free() explicitly.
+      free();
   }
   void initialize(uptr BufferCapacity) {
     Capacity = BufferCapacity;
@@ -38,6 +40,7 @@ class CircularBuffer {
     Data = (T *)MmapOrDie(Capacity * sizeof(T), "CircularBuffer");
     StartIdx = 0;
     Count = 0;
+    WasConstructed = false;
   }
   void free() {
     UnmapOrDie(Data, Capacity * sizeof(T));
@@ -83,6 +86,7 @@ class CircularBuffer {
   CircularBuffer(const CircularBuffer&);
   void operator=(const CircularBuffer&);
 
+  bool WasConstructed;
   T *Data;
   uptr Capacity;
   uptr StartIdx;

Modified: compiler-rt/trunk/lib/esan/esan_flags.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/esan/esan_flags.inc?rev=271683&r1=271682&r2=271683&view=diff
==============================================================================
--- compiler-rt/trunk/lib/esan/esan_flags.inc (original)
+++ compiler-rt/trunk/lib/esan/esan_flags.inc Fri Jun  3 11:27:50 2016
@@ -39,3 +39,9 @@ ESAN_FLAG(bool, record_snapshots, true,
 // To disable samples, turn off record_snapshots.
 ESAN_FLAG(int, sample_freq, 20,
           "Working set tool: sampling frequency in milliseconds.")
+
+// This controls the difference in frequency between each successive series
+// of snapshots.  There are 8 in total, with number 0 using sample_freq.
+// Number N samples number N-1 every (1 << snapshot_step) instance of N-1.
+ESAN_FLAG(int, snapshot_step, 2, "Working set tool: the log of the sampling "
+          "performed for the next-higher-frequency snapshot series.")

Modified: compiler-rt/trunk/lib/esan/working_set.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/esan/working_set.cpp?rev=271683&r1=271682&r2=271683&view=diff
==============================================================================
--- compiler-rt/trunk/lib/esan/working_set.cpp (original)
+++ compiler-rt/trunk/lib/esan/working_set.cpp Fri Jun  3 11:27:50 2016
@@ -14,6 +14,7 @@
 
 #include "working_set.h"
 #include "esan.h"
+#include "esan_circular_buffer.h"
 #include "esan_flags.h"
 #include "esan_shadow.h"
 #include "esan_sideline.h"
@@ -24,9 +25,15 @@
 //   cache line has ever been accessed.
 // - The lowest bit of each shadow byte indicates whether the corresponding
 //   cache line was accessed since the last sample.
-// - The other bits can be used either for a single working set snapshot
-//   between two consecutive samples, or an aggregate working set snapshot
-//   over multiple sample periods (future work).
+// - The other bits are used for working set snapshots at successively
+//   lower frequencies, each bit to the left from the lowest bit stepping
+//   down the frequency by 2 to the power of getFlags()->snapshot_step.
+// Thus we have something like this:
+//   Bit 0: Since last sample
+//   Bit 1: Since last 2^2 samples
+//   Bit 2: Since last 2^4 samples
+//   Bit 3: ...
+//   Bit 7: Ever accessed.
 // We live with races in accessing each shadow byte.
 typedef unsigned char byte;
 
@@ -37,6 +44,10 @@ static const u32 CacheLineSize = 64;
 
 // See the shadow byte layout description above.
 static const u32 TotalWorkingSetBitIdx = 7;
+// We accumulate to the left until we hit this bit.
+// We don't need to accumulate to the final bit as it's set on each ref
+// by the compiler instrumentation.
+static const u32 MaxAccumBitIdx = 6;
 static const u32 CurWorkingSetBitIdx = 0;
 static const byte ShadowAccessedVal =
   (1 << TotalWorkingSetBitIdx) | (1 << CurWorkingSetBitIdx);
@@ -47,6 +58,26 @@ static SidelineThread Thread;
 // may want to consider a 64-bit int.
 static u32 SnapshotNum;
 
+// We store the wset size for each of 8 different sampling frequencies.
+static const u32 NumFreq = 8; // One for each bit of our shadow bytes.
+// We cannot use static objects as the global destructor is called
+// prior to our finalize routine.
+// These are each circular buffers, sized up front.
+CircularBuffer<u32> SizePerFreq[NumFreq];
+// We cannot rely on static initializers (they may run too late) but
+// we record the size here for clarity:
+u32 CircularBufferSizes[NumFreq] = {
+  // These are each mmap-ed so our minimum is one page.
+  32*1024,
+  16*1024,
+  8*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+  4*1024,
+};
+
 void processRangeAccessWorkingSet(uptr PC, uptr Addr, SIZE_T Size,
                                   bool IsWrite) {
   if (Size == 0)
@@ -95,13 +126,17 @@ static u32 countAndClearShadowValues(u32
     ByteValue << 24;
   // Get word aligned start.
   ShadowStart = RoundDownTo(ShadowStart, sizeof(u32));
+  bool Accum = getFlags()->record_snapshots && BitIdx < MaxAccumBitIdx;
   for (u32 *Ptr = (u32 *)ShadowStart; Ptr < (u32 *)ShadowEnd; ++Ptr) {
     if ((*Ptr & WordValue) != 0) {
       byte *BytePtr = (byte *)Ptr;
       for (u32 j = 0; j < sizeof(u32); ++j) {
         if (BytePtr[j] & ByteValue) {
           ++WorkingSetSize;
-          // TODO: Accumulate to the lower-frequency bit to the left.
+          if (Accum) {
+            // Accumulate to the lower-frequency bit to the left.
+            BytePtr[j] |= (ByteValue << 1);
+          }
         }
       }
       // Clear this bit from every shadow byte.
@@ -134,19 +169,41 @@ static u32 computeWorkingSizeAndReset(u3
 // This is invoked from a signal handler but in a sideline thread doing nothing
 // else so it is a little less fragile than a typical signal handler.
 static void takeSample(void *Arg) {
-  // FIXME: record the size and report at process end.  For now this simply
-  // serves as a test of the sideline thread functionality.
-  VReport(1, "%s: snapshot #%d: %u\n", SanitizerToolName, SnapshotNum,
-          computeWorkingSizeAndReset(CurWorkingSetBitIdx));
-  ++SnapshotNum;
+  u32 BitIdx = CurWorkingSetBitIdx;
+  u32 Freq = 1;
+  ++SnapshotNum; // Simpler to skip 0 whose mod matches everything.
+  while (BitIdx <= MaxAccumBitIdx && (SnapshotNum % Freq) == 0) {
+    u32 NumLines = computeWorkingSizeAndReset(BitIdx);
+    VReport(1, "%s: snapshot #%5d bit %d freq %4d: %8u\n", SanitizerToolName,
+            SnapshotNum, BitIdx, Freq, NumLines);
+    SizePerFreq[BitIdx].push_back(NumLines);
+    Freq = Freq << getFlags()->snapshot_step;
+    BitIdx++;
+  }
 }
 
 void initializeWorkingSet() {
   CHECK(getFlags()->cache_line_size == CacheLineSize);
   registerMemoryFaultHandler();
 
-  if (getFlags()->record_snapshots)
+  if (getFlags()->record_snapshots) {
+    for (u32 i = 0; i < NumFreq; ++i)
+      SizePerFreq[i].initialize(CircularBufferSizes[i]);
     Thread.launchThread(takeSample, nullptr, getFlags()->sample_freq);
+  }
+}
+
+static u32 getPeriodForPrinting(u32 MilliSec, const char *&Unit) {
+  if (MilliSec > 600000) {
+    Unit = "min";
+    return MilliSec / 60000;
+  } else if (MilliSec > 10000) {
+    Unit = "sec";
+    return MilliSec / 1000;
+  } else {
+    Unit = "ms";
+    return MilliSec;
+  }
 }
 
 static u32 getSizeForPrinting(u32 NumOfCachelines, const char *&Unit) {
@@ -167,12 +224,28 @@ static u32 getSizeForPrinting(u32 NumOfC
 }
 
 int finalizeWorkingSet() {
-  if (getFlags()->record_snapshots)
+  const char *Unit;
+  if (getFlags()->record_snapshots) {
     Thread.joinThread();
+    u32 Freq = 1;
+    Report(" Total number of samples: %u\n", SnapshotNum);
+    for (u32 i = 0; i < NumFreq; ++i) {
+      u32 Time = getPeriodForPrinting(getFlags()->sample_freq*Freq, Unit);
+      Report(" Samples array #%d at period %u %s\n", i, Time, Unit);
+      // FIXME: report whether we wrapped around and thus whether we
+      // have data on the whole run or just the last N samples.
+      for (u32 j = 0; j < SizePerFreq[i].size(); ++j) {
+        u32 Size = getSizeForPrinting(SizePerFreq[i][j], Unit);
+        Report("#%4d: %8u %s (%9u cache lines)\n", j, Size, Unit,
+               SizePerFreq[i][j]);
+      }
+      Freq = Freq << getFlags()->snapshot_step;
+      SizePerFreq[i].free();
+    }
+  }
 
   // Get the working set size for the entire execution.
   u32 NumOfCachelines = computeWorkingSizeAndReset(TotalWorkingSetBitIdx);
-  const char *Unit;
   u32 Size = getSizeForPrinting(NumOfCachelines, Unit);
   Report(" %s: the total working set size: %u %s (%u cache lines)\n",
          SanitizerToolName, Size, Unit, NumOfCachelines);

Modified: compiler-rt/trunk/test/esan/TestCases/workingset-samples.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/esan/TestCases/workingset-samples.cpp?rev=271683&r1=271682&r2=271683&view=diff
==============================================================================
--- compiler-rt/trunk/test/esan/TestCases/workingset-samples.cpp (original)
+++ compiler-rt/trunk/test/esan/TestCases/workingset-samples.cpp Fri Jun  3 11:27:50 2016
@@ -1,5 +1,5 @@
 // RUN: %clang_esan_wset -O0 %s -o %t 2>&1
-// RUN: %env_esan_opts=verbosity=1 %run %t 2>&1 | FileCheck %s
+// RUN: %run %t 2>&1 | FileCheck %s
 
 #include <sched.h>
 #include <stdlib.h>
@@ -19,8 +19,21 @@ int main(int argc, char **argv) {
   for (int i = 0; i < size; ++i)
     buf[i] = i;
   munmap(buf, size);
-  // CHECK:      {{.*}}EfficiencySanitizer: snapshot {{.*}}
-  // CHECK-NEXT: {{.*}}EfficiencySanitizer: snapshot {{.*}}
+  // We only check for a few samples here to reduce the chance of flakiness.
+  // CHECK:      =={{[0-9]+}}== Total number of samples: {{[0-9]+}}
+  // CHECK-NEXT: =={{[0-9]+}}== Samples array #0 at period 20 ms
+  // CHECK-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines)
+  // CHECK-NEXT: =={{[0-9]+}}==#   1: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines)
+  // CHECK-NEXT: =={{[0-9]+}}==#   2: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines)
+  // CHECK-NEXT: =={{[0-9]+}}==#   3: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines)
+  // CHECK:      =={{[0-9]+}}== Samples array #1 at period 80 ms
+  // CHECK-NEXT: =={{[0-9]+}}==#   0: {{[ 0-9]+}} KB ({{[ 0-9]+}} cache lines)
+  // CHECK:      =={{[0-9]+}}== Samples array #2 at period 320 ms
+  // CHECK:      =={{[0-9]+}}== Samples array #3 at period 1280 ms
+  // CHECK:      =={{[0-9]+}}== Samples array #4 at period 5120 ms
+  // CHECK:      =={{[0-9]+}}== Samples array #5 at period 20 sec
+  // CHECK:      =={{[0-9]+}}== Samples array #6 at period 81 sec
+  // CHECK:      =={{[0-9]+}}== Samples array #7 at period 327 sec
   // CHECK: {{.*}} EfficiencySanitizer: the total working set size: 32 MB (5242{{[0-9][0-9]}} cache lines)
   return 0;
 }




More information about the llvm-commits mailing list