<div dir="ltr">FYI, this patch introduces -Wunused-variable warning:<div><div>llvm/projects/compiler-rt/lib/xray/xray_fdr_logging_impl.h:597:12: error: unused variable 'TailExitFuncId' [-Werror,-Wunused-variable]</div><div>      auto TailExitFuncId = ExpectedTailExit.FuncId;</div><div>           ^</div><div>1 error generated.</div></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Apr 6, 2017 at 9:14 AM, Dean Michael Berris via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: dberris<br>
Date: Thu Apr  6 02:14:43 2017<br>
New Revision: 299629<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=299629&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=299629&view=rev</a><br>
Log:<br>
[XRay] [compiler-rt] Unwriting FDR mode buffers when functions are short.<br>
<br>
Summary:<br>
"short" is defined as an xray flag, and buffer rewinding happens for both exits<br>
 and tail exits.<br>
<br>
 I've made the choice to seek backwards finding pairs of FunctionEntry, TailExit<br>
 record pairs and erasing them if the FunctionEntry occurred before exit from the<br>
 currently exiting function. This is a compromise so that we don't skip logging<br>
 tail calls if the function that they call into takes longer our duration.<br>
<br>
 This works by counting the consecutive function and function entry, tail exit<br>
 pairs that proceed the current point in the buffer. The buffer is rewound to<br>
 check whether these entry points happened recently enough to be erased.<br>
<br>
 It is still possible we will omit them if they call into a child function that<br>
 is not instrumented which calls a fast grandchild that is instrumented before<br>
 doing other processing.<br>
<br>
Reviewers: pelikan, dberris<br>
<br>
Reviewed By: dberris<br>
<br>
Subscribers: llvm-commits<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D31345" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D31345</a><br>
<br>
Modified:<br>
    compiler-rt/trunk/lib/xray/<wbr>xray_fdr_logging_impl.h<br>
    compiler-rt/trunk/lib/xray/<wbr>xray_flags.inc<br>
    compiler-rt/trunk/test/xray/<wbr>TestCases/Linux/fdr-mode.cc<br>
<br>
Modified: compiler-rt/trunk/lib/xray/<wbr>xray_fdr_logging_impl.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h?rev=299629&r1=299628&r2=299629&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/compiler-rt/trunk/lib/<wbr>xray/xray_fdr_logging_impl.h?<wbr>rev=299629&r1=299628&r2=<wbr>299629&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- compiler-rt/trunk/lib/xray/<wbr>xray_fdr_logging_impl.h (original)<br>
+++ compiler-rt/trunk/lib/xray/<wbr>xray_fdr_logging_impl.h Thu Apr  6 02:14:43 2017<br>
@@ -32,6 +32,8 @@<br>
 #include "xray_buffer_queue.h"<br>
 #include "xray_defs.h"<br>
 #include "xray_fdr_log_records.h"<br>
+#include "xray_flags.h"<br>
+#include "xray_tsc.h"<br>
<br>
 namespace __xray {<br>
<br>
@@ -108,9 +110,17 @@ static void processFunctionHook(int32_t<br>
 //----------------------------<wbr>------------------------------<wbr>-------------------|<br>
<br>
 namespace {<br>
+<br>
 thread_local BufferQueue::Buffer Buffer;<br>
 thread_local char *RecordPtr = nullptr;<br>
<br>
+// The number of FunctionEntry records immediately preceding RecordPtr.<br>
+thread_local uint8_t NumConsecutiveFnEnters = 0;<br>
+<br>
+// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit<br>
+// records preceding RecordPtr.<br>
+thread_local uint8_t NumTailCalls = 0;<br>
+<br>
 constexpr auto MetadataRecSize = sizeof(MetadataRecord);<br>
 constexpr auto FunctionRecSize = sizeof(FunctionRecord);<br>
<br>
@@ -209,6 +219,8 @@ static inline void writeNewBufferPreambl<br>
   }<br>
   std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);<br>
   MemPtr += sizeof(MetadataRecord) * InitRecordsCount;<br>
+  NumConsecutiveFnEnters = 0;<br>
+  NumTailCalls = 0;<br>
 }<br>
<br>
 static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer,<br>
@@ -221,6 +233,8 @@ static inline void setupNewBuffer(const<br>
   // This is typically clock_gettime, but callers have injection ability.<br>
   wall_clock_reader(CLOCK_<wbr>MONOTONIC, &TS);<br>
   writeNewBufferPreamble(Tid, TS, RecordPtr);<br>
+  NumConsecutiveFnEnters = 0;<br>
+  NumTailCalls = 0;<br>
 }<br>
<br>
 static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,<br>
@@ -237,6 +251,8 @@ static inline void writeNewCPUIdMetadata<br>
   std::memcpy(&NewCPUId.Data[<wbr>sizeof(CPU)], &TSC, sizeof(TSC));<br>
   std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));<br>
   MemPtr += sizeof(MetadataRecord);<br>
+  NumConsecutiveFnEnters = 0;<br>
+  NumTailCalls = 0;<br>
 }<br>
<br>
 static inline void writeNewCPUIdMetadata(uint16_t CPU,<br>
@@ -251,6 +267,8 @@ static inline void writeEOBMetadata(char<br>
   // For now we don't write any bytes into the Data field.<br>
   std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));<br>
   MemPtr += sizeof(MetadataRecord);<br>
+  NumConsecutiveFnEnters = 0;<br>
+  NumTailCalls = 0;<br>
 }<br>
<br>
 static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {<br>
@@ -269,6 +287,8 @@ static inline void writeTSCWrapMetadata(<br>
   std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));<br>
   std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));<br>
   MemPtr += sizeof(MetadataRecord);<br>
+  NumConsecutiveFnEnters = 0;<br>
+  NumTailCalls = 0;<br>
 }<br>
<br>
 static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {<br>
@@ -289,13 +309,35 @@ static inline void writeFunctionRecord(i<br>
<br>
   switch (EntryType) {<br>
   case XRayEntryType::ENTRY:<br>
+    ++NumConsecutiveFnEnters;<br>
+    FuncRecord.RecordKind = uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionEnter);<br>
+    break;<br>
   case XRayEntryType::LOG_ARGS_ENTRY:<br>
+    // We should not rewind functions with logged args.<br>
+    NumConsecutiveFnEnters = 0;<br>
+    NumTailCalls = 0;<br>
     FuncRecord.RecordKind = uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionEnter);<br>
     break;<br>
   case XRayEntryType::EXIT:<br>
+    // If we've decided to log the function exit, we will never erase the log<br>
+    // before it.<br>
+    NumConsecutiveFnEnters = 0;<br>
+    NumTailCalls = 0;<br>
     FuncRecord.RecordKind = uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionExit);<br>
     break;<br>
   case XRayEntryType::TAIL:<br>
+    // If we just entered the function we're tail exiting from or erased every<br>
+    // invocation since then, this function entry tail pair is a candidate to<br>
+    // be erased when the child function exits.<br>
+    if (NumConsecutiveFnEnters > 0) {<br>
+      ++NumTailCalls;<br>
+      NumConsecutiveFnEnters = 0;<br>
+    } else {<br>
+      // We will never be able to erase this tail call since we have logged<br>
+      // something in between the function entry and tail exit.<br>
+      NumTailCalls = 0;<br>
+      NumConsecutiveFnEnters = 0;<br>
+    }<br>
     FuncRecord.RecordKind =<br>
         uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionTailExit)<wbr>;<br>
     break;<br>
@@ -337,6 +379,8 @@ static inline void processFunctionHook(<br>
   // We assume that we'll support only 65536 CPUs for x86_64.<br>
   thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>:<wbr>:max();<br>
   thread_local uint64_t LastTSC = 0;<br>
+  thread_local uint64_t LastFunctionEntryTSC = 0;<br>
+  thread_local uint64_t NumberOfTicksThreshold = 0;<br>
<br>
   // Make sure a thread that's ever called handleArg0 has a thread-local<br>
   // live reference to the buffer queue for this particular instance of<br>
@@ -379,6 +423,10 @@ static inline void processFunctionHook(<br>
       return;<br>
     }<br>
<br>
+    uint64_t CycleFrequency = getTSCFrequency();<br>
+    NumberOfTicksThreshold = CycleFrequency *<br>
+                             flags()->xray_fdr_log_func_<wbr>duration_threshold_us /<br>
+                             1000000;<br>
     setupNewBuffer(Buffer, wall_clock_reader);<br>
   }<br>
<br>
@@ -441,6 +489,10 @@ static inline void processFunctionHook(<br>
              BufferQueue::getErrorString(<wbr>EC));<br>
       return;<br>
     }<br>
+    uint64_t CycleFrequency = getTSCFrequency();<br>
+    NumberOfTicksThreshold = CycleFrequency *<br>
+                             flags()->xray_fdr_log_func_<wbr>duration_threshold_us /<br>
+                             1000000;<br>
     setupNewBuffer(Buffer, wall_clock_reader);<br>
   }<br>
<br>
@@ -486,10 +538,92 @@ static inline void processFunctionHook(<br>
       RecordTSCDelta = Delta;<br>
   }<br>
<br>
-  // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid<br>
-  // us in future computations of this TSC delta value.<br>
   LastTSC = TSC;<br>
   CurrentCPU = CPU;<br>
+  using AlignedFuncStorage =<br>
+      std::aligned_storage<sizeof(<wbr>FunctionRecord),<br>
+                           alignof(FunctionRecord)>::<wbr>type;<br>
+  switch (Entry) {<br>
+  case XRayEntryType::ENTRY:<br>
+  case XRayEntryType::LOG_ARGS_ENTRY:<br>
+    // Update the thread local state for the next invocation.<br>
+    LastFunctionEntryTSC = TSC;<br>
+    break;<br>
+  case XRayEntryType::TAIL:<br>
+    break;<br>
+  case XRayEntryType::EXIT:<br>
+    // Break out and write the exit record if we can't erase any functions.<br>
+    if (NumConsecutiveFnEnters == 0 ||<br>
+        (TSC - LastFunctionEntryTSC) >= NumberOfTicksThreshold)<br>
+      break;<br>
+    // Otherwise we unwind functions that are too short to log by decrementing<br>
+    // our view ptr into the buffer. We can erase a Function Entry record and<br>
+    // neglect to log our EXIT. We have to make sure to update some state like<br>
+    // the LastTSC and count of consecutive FunctionEntry records.<br>
+    RecordPtr -= FunctionRecSize;<br>
+    AlignedFuncStorage AlignedFuncRecordBuffer;<br>
+    const auto &FuncRecord = *reinterpret_cast<<wbr>FunctionRecord *>(<br>
+        std::memcpy(&<wbr>AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize));<br>
+    assert(FuncRecord.RecordKind ==<br>
+               uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionEnter) &&<br>
+           "Expected to find function entry recording when rewinding.");<br>
+    assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&<br>
+           "Expected matching function id when rewinding Exit");<br>
+    --NumConsecutiveFnEnters;<br>
+    LastTSC -= FuncRecord.TSCDelta;<br>
+<br>
+    // We unwound one call. Update the state and return without writing a log.<br>
+    if (NumConsecutiveFnEnters != 0) {<br>
+      LastFunctionEntryTSC = LastFunctionEntryTSC - FuncRecord.TSCDelta;<br>
+      return;<br>
+    }<br>
+<br>
+    // Otherwise we've rewound the stack of all function entries, we might be<br>
+    // able to rewind further by erasing tail call functions that are being<br>
+    // exited from via this exit.<br>
+    LastFunctionEntryTSC = 0;<br>
+    auto RewindingTSC = LastTSC;<br>
+    auto RewindingRecordPtr = RecordPtr - FunctionRecSize;<br>
+    while (NumTailCalls > 0) {<br>
+      AlignedFuncStorage TailExitRecordBuffer;<br>
+      // Rewind the TSC back over the TAIL EXIT record.<br>
+      const auto &ExpectedTailExit =<br>
+          *reinterpret_cast<<wbr>FunctionRecord *>(std::memcpy(<br>
+              &TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));<br>
+<br>
+      assert(ExpectedTailExit.<wbr>RecordKind ==<br>
+                 uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionTailExit) &&<br>
+             "Expected to find tail exit when rewinding.");<br>
+      auto TailExitFuncId = ExpectedTailExit.FuncId;<br>
+      RewindingRecordPtr -= FunctionRecSize;<br>
+      RewindingTSC -= ExpectedTailExit.TSCDelta;<br>
+      AlignedFuncStorage FunctionEntryBuffer;<br>
+      const auto &ExpectedFunctionEntry =<br>
+          *reinterpret_cast<<wbr>FunctionRecord *>(std::memcpy(<br>
+              &FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));<br>
+      assert(ExpectedFunctionEntry.<wbr>RecordKind ==<br>
+                 uint8_t(FunctionRecord::<wbr>RecordKinds::FunctionEnter) &&<br>
+             "Expected to find function entry when rewinding tail call.");<br>
+      assert(ExpectedFunctionEntry.<wbr>FuncId == TailExitFuncId &&<br>
+             "Expected funcids to match when rewinding tail call.");<br>
+<br>
+      if ((TSC - RewindingTSC) < NumberOfTicksThreshold) {<br>
+        // We can erase a tail exit pair that we're exiting through since<br>
+        // its duration is under threshold.<br>
+        --NumTailCalls;<br>
+        RewindingRecordPtr -= FunctionRecSize;<br>
+        RewindingTSC -= ExpectedFunctionEntry.<wbr>TSCDelta;<br>
+        RecordPtr -= 2 * FunctionRecSize;<br>
+        LastTSC = RewindingTSC;<br>
+      } else {<br>
+        // This tail call exceeded the threshold duration. It will not<br>
+        // be erased.<br>
+        NumTailCalls = 0;<br>
+        return;<br>
+      }<br>
+    }<br>
+    return; // without writing log.<br>
+  }<br>
<br>
   writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);<br>
<br>
@@ -510,5 +644,4 @@ static inline void processFunctionHook(<br>
 } // namespace __xray_fdr_internal<br>
<br>
 } // namespace __xray<br>
-<br>
 #endif // XRAY_XRAY_FDR_LOGGING_IMPL_H<br>
<br>
Modified: compiler-rt/trunk/lib/xray/<wbr>xray_flags.inc<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_flags.inc?rev=299629&r1=299628&r2=299629&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/compiler-rt/trunk/lib/<wbr>xray/xray_flags.inc?rev=<wbr>299629&r1=299628&r2=299629&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- compiler-rt/trunk/lib/xray/<wbr>xray_flags.inc (original)<br>
+++ compiler-rt/trunk/lib/xray/<wbr>xray_flags.inc Thu Apr  6 02:14:43 2017<br>
@@ -22,3 +22,6 @@ XRAY_FLAG(const char *, xray_logfile_bas<br>
           "Filename base for the xray logfile.")<br>
 XRAY_FLAG(bool, xray_fdr_log, false,<br>
           "Whether to install the flight data recorder logging implementation.")<br>
+XRAY_FLAG(int, xray_fdr_log_func_duration_<wbr>threshold_us, 5,<br>
+          "FDR logging will try to skip functions that execute for fewer "<br>
+          "microseconds than this threshold.")<br>
<br>
Modified: compiler-rt/trunk/test/xray/<wbr>TestCases/Linux/fdr-mode.cc<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc?rev=299629&r1=299628&r2=299629&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/compiler-rt/trunk/<wbr>test/xray/TestCases/Linux/fdr-<wbr>mode.cc?rev=299629&r1=299628&<wbr>r2=299629&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- compiler-rt/trunk/test/xray/<wbr>TestCases/Linux/fdr-mode.cc (original)<br>
+++ compiler-rt/trunk/test/xray/<wbr>TestCases/Linux/fdr-mode.cc Thu Apr  6 02:14:43 2017<br>
@@ -1,7 +1,10 @@<br>
 // RUN: %clangxx_xray -g -std=c++11 %s -o %t<br>
-// RUN: XRAY_OPTIONS="patch_premain=<wbr>false xray_naive_log=false xray_logfile_base=fdr-logging-<wbr>test- xray_fdr_log=true verbosity=1" %run %t 2>&1 | FileCheck %s<br>
-// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix TRACE<br>
+// RUN: XRAY_OPTIONS="patch_premain=<wbr>false xray_naive_log=false xray_logfile_base=fdr-logging-<wbr>test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_<wbr>threshold_us=0" %run %t 2>&1 | FileCheck %s<br>
+// RUN: XRAY_OPTIONS="patch_premain=<wbr>false xray_naive_log=false xray_logfile_base=fdr-unwrite-<wbr>test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_<wbr>threshold_us=5000" %run %t 2>&1 | FileCheck %s<br>
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix=TRACE<br>
+// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-unwrite-test-* | head -1`" | FileCheck %s --check-prefix=UNWRITE<br>
 // RUN: rm fdr-logging-test-*<br>
+// RUN: rm fdr-unwrite-test-*<br>
 // FIXME: Make llvm-xray work on non-x86_64 as well.<br>
 // REQUIRES: x86_64-linux<br>
<br>
@@ -61,8 +64,6 @@ int main(int argc, char *argv[]) {<br>
   return 0;<br>
 }<br>
<br>
-<br>
-<br>
 // Check that we're able to see two threads, each entering and exiting fA().<br>
 // TRACE-DAG: - { type: 0, func-id: [[FIDA:[0-9]+]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }<br>
 // TRACE:     - { type: 0, func-id: [[FIDA]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-exit, tsc: {{[0-9]+}} }<br>
@@ -80,3 +81,9 @@ int main(int argc, char *argv[]) {<br>
 // TRACE:     - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-exit, tsc: {{[0-9]+}} }<br>
 // TRACE-DAG: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }<br>
 // TRACE:     - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} }<br>
+<br>
+// Assert that when unwriting is enabled with a high threshold time, all the function records are erased. A CPU switch could erroneously fail this test, but<br>
+// is unlikely given the test program.<br>
+// UNWRITE: header<br>
+// UNWRITE-NOT: function-enter<br>
+// UNWRITE-NOT: function-exit<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>