[compiler-rt] r299629 - [XRay] [compiler-rt] Unwriting FDR mode buffers when functions are short.

Dean Michael Berris via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 6 00:14:44 PDT 2017


Author: dberris
Date: Thu Apr  6 02:14:43 2017
New Revision: 299629

URL: http://llvm.org/viewvc/llvm-project?rev=299629&view=rev
Log:
[XRay] [compiler-rt] Unwriting FDR mode buffers when functions are short.

Summary:
"short" is defined as an xray flag, and buffer rewinding happens for both exits
 and tail exits.

 I've made the choice to seek backwards finding pairs of FunctionEntry, TailExit
 record pairs and erasing them if the FunctionEntry occurred before exit from the
 currently exiting function. This is a compromise so that we don't skip logging
 tail calls if the function that they call into takes longer our duration.

 This works by counting the consecutive function and function entry, tail exit
 pairs that proceed the current point in the buffer. The buffer is rewound to
 check whether these entry points happened recently enough to be erased.

 It is still possible we will omit them if they call into a child function that
 is not instrumented which calls a fast grandchild that is instrumented before
 doing other processing.

Reviewers: pelikan, dberris

Reviewed By: dberris

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D31345

Modified:
    compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h
    compiler-rt/trunk/lib/xray/xray_flags.inc
    compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc

Modified: compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h?rev=299629&r1=299628&r2=299629&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h (original)
+++ compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h Thu Apr  6 02:14:43 2017
@@ -32,6 +32,8 @@
 #include "xray_buffer_queue.h"
 #include "xray_defs.h"
 #include "xray_fdr_log_records.h"
+#include "xray_flags.h"
+#include "xray_tsc.h"
 
 namespace __xray {
 
@@ -108,9 +110,17 @@ static void processFunctionHook(int32_t
 //-----------------------------------------------------------------------------|
 
 namespace {
+
 thread_local BufferQueue::Buffer Buffer;
 thread_local char *RecordPtr = nullptr;
 
+// The number of FunctionEntry records immediately preceding RecordPtr.
+thread_local uint8_t NumConsecutiveFnEnters = 0;
+
+// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
+// records preceding RecordPtr.
+thread_local uint8_t NumTailCalls = 0;
+
 constexpr auto MetadataRecSize = sizeof(MetadataRecord);
 constexpr auto FunctionRecSize = sizeof(FunctionRecord);
 
@@ -209,6 +219,8 @@ static inline void writeNewBufferPreambl
   }
   std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
   MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
+  NumConsecutiveFnEnters = 0;
+  NumTailCalls = 0;
 }
 
 static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer,
@@ -221,6 +233,8 @@ static inline void setupNewBuffer(const
   // This is typically clock_gettime, but callers have injection ability.
   wall_clock_reader(CLOCK_MONOTONIC, &TS);
   writeNewBufferPreamble(Tid, TS, RecordPtr);
+  NumConsecutiveFnEnters = 0;
+  NumTailCalls = 0;
 }
 
 static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
@@ -237,6 +251,8 @@ static inline void writeNewCPUIdMetadata
   std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
   std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
   MemPtr += sizeof(MetadataRecord);
+  NumConsecutiveFnEnters = 0;
+  NumTailCalls = 0;
 }
 
 static inline void writeNewCPUIdMetadata(uint16_t CPU,
@@ -251,6 +267,8 @@ static inline void writeEOBMetadata(char
   // For now we don't write any bytes into the Data field.
   std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
   MemPtr += sizeof(MetadataRecord);
+  NumConsecutiveFnEnters = 0;
+  NumTailCalls = 0;
 }
 
 static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
@@ -269,6 +287,8 @@ static inline void writeTSCWrapMetadata(
   std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
   std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
   MemPtr += sizeof(MetadataRecord);
+  NumConsecutiveFnEnters = 0;
+  NumTailCalls = 0;
 }
 
 static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
@@ -289,13 +309,35 @@ static inline void writeFunctionRecord(i
 
   switch (EntryType) {
   case XRayEntryType::ENTRY:
+    ++NumConsecutiveFnEnters;
+    FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
+    break;
   case XRayEntryType::LOG_ARGS_ENTRY:
+    // We should not rewind functions with logged args.
+    NumConsecutiveFnEnters = 0;
+    NumTailCalls = 0;
     FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
     break;
   case XRayEntryType::EXIT:
+    // If we've decided to log the function exit, we will never erase the log
+    // before it.
+    NumConsecutiveFnEnters = 0;
+    NumTailCalls = 0;
     FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
     break;
   case XRayEntryType::TAIL:
+    // If we just entered the function we're tail exiting from or erased every
+    // invocation since then, this function entry tail pair is a candidate to
+    // be erased when the child function exits.
+    if (NumConsecutiveFnEnters > 0) {
+      ++NumTailCalls;
+      NumConsecutiveFnEnters = 0;
+    } else {
+      // We will never be able to erase this tail call since we have logged
+      // something in between the function entry and tail exit.
+      NumTailCalls = 0;
+      NumConsecutiveFnEnters = 0;
+    }
     FuncRecord.RecordKind =
         uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
     break;
@@ -337,6 +379,8 @@ static inline void processFunctionHook(
   // We assume that we'll support only 65536 CPUs for x86_64.
   thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
   thread_local uint64_t LastTSC = 0;
+  thread_local uint64_t LastFunctionEntryTSC = 0;
+  thread_local uint64_t NumberOfTicksThreshold = 0;
 
   // Make sure a thread that's ever called handleArg0 has a thread-local
   // live reference to the buffer queue for this particular instance of
@@ -379,6 +423,10 @@ static inline void processFunctionHook(
       return;
     }
 
+    uint64_t CycleFrequency = getTSCFrequency();
+    NumberOfTicksThreshold = CycleFrequency *
+                             flags()->xray_fdr_log_func_duration_threshold_us /
+                             1000000;
     setupNewBuffer(Buffer, wall_clock_reader);
   }
 
@@ -441,6 +489,10 @@ static inline void processFunctionHook(
              BufferQueue::getErrorString(EC));
       return;
     }
+    uint64_t CycleFrequency = getTSCFrequency();
+    NumberOfTicksThreshold = CycleFrequency *
+                             flags()->xray_fdr_log_func_duration_threshold_us /
+                             1000000;
     setupNewBuffer(Buffer, wall_clock_reader);
   }
 
@@ -486,10 +538,92 @@ static inline void processFunctionHook(
       RecordTSCDelta = Delta;
   }
 
-  // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
-  // us in future computations of this TSC delta value.
   LastTSC = TSC;
   CurrentCPU = CPU;
+  using AlignedFuncStorage =
+      std::aligned_storage<sizeof(FunctionRecord),
+                           alignof(FunctionRecord)>::type;
+  switch (Entry) {
+  case XRayEntryType::ENTRY:
+  case XRayEntryType::LOG_ARGS_ENTRY:
+    // Update the thread local state for the next invocation.
+    LastFunctionEntryTSC = TSC;
+    break;
+  case XRayEntryType::TAIL:
+    break;
+  case XRayEntryType::EXIT:
+    // Break out and write the exit record if we can't erase any functions.
+    if (NumConsecutiveFnEnters == 0 ||
+        (TSC - LastFunctionEntryTSC) >= NumberOfTicksThreshold)
+      break;
+    // Otherwise we unwind functions that are too short to log by decrementing
+    // our view ptr into the buffer. We can erase a Function Entry record and
+    // neglect to log our EXIT. We have to make sure to update some state like
+    // the LastTSC and count of consecutive FunctionEntry records.
+    RecordPtr -= FunctionRecSize;
+    AlignedFuncStorage AlignedFuncRecordBuffer;
+    const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
+        std::memcpy(&AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize));
+    assert(FuncRecord.RecordKind ==
+               uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+           "Expected to find function entry recording when rewinding.");
+    assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
+           "Expected matching function id when rewinding Exit");
+    --NumConsecutiveFnEnters;
+    LastTSC -= FuncRecord.TSCDelta;
+
+    // We unwound one call. Update the state and return without writing a log.
+    if (NumConsecutiveFnEnters != 0) {
+      LastFunctionEntryTSC = LastFunctionEntryTSC - FuncRecord.TSCDelta;
+      return;
+    }
+
+    // Otherwise we've rewound the stack of all function entries, we might be
+    // able to rewind further by erasing tail call functions that are being
+    // exited from via this exit.
+    LastFunctionEntryTSC = 0;
+    auto RewindingTSC = LastTSC;
+    auto RewindingRecordPtr = RecordPtr - FunctionRecSize;
+    while (NumTailCalls > 0) {
+      AlignedFuncStorage TailExitRecordBuffer;
+      // Rewind the TSC back over the TAIL EXIT record.
+      const auto &ExpectedTailExit =
+          *reinterpret_cast<FunctionRecord *>(std::memcpy(
+              &TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));
+
+      assert(ExpectedTailExit.RecordKind ==
+                 uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
+             "Expected to find tail exit when rewinding.");
+      auto TailExitFuncId = ExpectedTailExit.FuncId;
+      RewindingRecordPtr -= FunctionRecSize;
+      RewindingTSC -= ExpectedTailExit.TSCDelta;
+      AlignedFuncStorage FunctionEntryBuffer;
+      const auto &ExpectedFunctionEntry =
+          *reinterpret_cast<FunctionRecord *>(std::memcpy(
+              &FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
+      assert(ExpectedFunctionEntry.RecordKind ==
+                 uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
+             "Expected to find function entry when rewinding tail call.");
+      assert(ExpectedFunctionEntry.FuncId == TailExitFuncId &&
+             "Expected funcids to match when rewinding tail call.");
+
+      if ((TSC - RewindingTSC) < NumberOfTicksThreshold) {
+        // We can erase a tail exit pair that we're exiting through since
+        // its duration is under threshold.
+        --NumTailCalls;
+        RewindingRecordPtr -= FunctionRecSize;
+        RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
+        RecordPtr -= 2 * FunctionRecSize;
+        LastTSC = RewindingTSC;
+      } else {
+        // This tail call exceeded the threshold duration. It will not
+        // be erased.
+        NumTailCalls = 0;
+        return;
+      }
+    }
+    return; // without writing log.
+  }
 
   writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);
 
@@ -510,5 +644,4 @@ static inline void processFunctionHook(
 } // namespace __xray_fdr_internal
 
 } // namespace __xray
-
 #endif // XRAY_XRAY_FDR_LOGGING_IMPL_H

Modified: compiler-rt/trunk/lib/xray/xray_flags.inc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_flags.inc?rev=299629&r1=299628&r2=299629&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_flags.inc (original)
+++ compiler-rt/trunk/lib/xray/xray_flags.inc Thu Apr  6 02:14:43 2017
@@ -22,3 +22,6 @@ XRAY_FLAG(const char *, xray_logfile_bas
           "Filename base for the xray logfile.")
 XRAY_FLAG(bool, xray_fdr_log, false,
           "Whether to install the flight data recorder logging implementation.")
+XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
+          "FDR logging will try to skip functions that execute for fewer "
+          "microseconds than this threshold.")

Modified: compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc?rev=299629&r1=299628&r2=299629&view=diff
==============================================================================
--- compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc (original)
+++ compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc Thu Apr  6 02:14:43 2017
@@ -1,7 +1,10 @@
 // RUN: %clangxx_xray -g -std=c++11 %s -o %t
-// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-logging-test- xray_fdr_log=true verbosity=1" %run %t 2>&1 | FileCheck %s
-// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix TRACE
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-logging-test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | FileCheck %s
+// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-unwrite-test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=5000" %run %t 2>&1 | FileCheck %s
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix=TRACE
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-unwrite-test-* | head -1`" | FileCheck %s --check-prefix=UNWRITE
 // RUN: rm fdr-logging-test-*
+// RUN: rm fdr-unwrite-test-*
 // FIXME: Make llvm-xray work on non-x86_64 as well.
 // REQUIRES: x86_64-linux
 
@@ -61,8 +64,6 @@ int main(int argc, char *argv[]) {
   return 0;
 }
 
-
-
 // Check that we're able to see two threads, each entering and exiting fA().
 // TRACE-DAG: - { type: 0, func-id: [[FIDA:[0-9]+]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
 // TRACE:     - { type: 0, func-id: [[FIDA]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-exit, tsc: {{[0-9]+}} }
@@ -80,3 +81,9 @@ int main(int argc, char *argv[]) {
 // TRACE:     - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-exit, tsc: {{[0-9]+}} }
 // TRACE-DAG: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
 // TRACE:     - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} }
+
+// Assert that when unwriting is enabled with a high threshold time, all the function records are erased. A CPU switch could erroneously fail this test, but
+// is unlikely given the test program.
+// UNWRITE: header
+// UNWRITE-NOT: function-enter
+// UNWRITE-NOT: function-exit




More information about the llvm-commits mailing list