[compiler-rt] r337341 - [XRay][compiler-rt] FDR Mode: Allow multiple runs

Dean Michael Berris via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 17 18:31:30 PDT 2018


Author: dberris
Date: Tue Jul 17 18:31:30 2018
New Revision: 337341

URL: http://llvm.org/viewvc/llvm-project?rev=337341&view=rev
Log:
[XRay][compiler-rt] FDR Mode: Allow multiple runs

Summary:
Fix a bug in FDR mode which didn't allow for re-initialising the logging
in the same process. This change ensures that:

- When we flush the FDR mode logging, that the state of the logging
  implementation is `XRAY_LOG_UNINITIALIZED`.

- Fix up the thread-local initialisation to use aligned storage and
  `pthread_getspecific` as well as `pthread_setspecific` for the
  thread-specific data.

- Actually use the pointer provided to the thread-exit cleanup handling,
  instead of assuming that the thread has thread-local data associated
  with it, and reaching at thread-exit time.

In this change we also have an explicit test for two consecutive
sessions for FDR mode tracing, and ensuring both sessions succeed.

Reviewers: kpw, eizan

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D49359

Added:
    compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc
Modified:
    compiler-rt/trunk/lib/xray/xray_fdr_logging.cc

Modified: compiler-rt/trunk/lib/xray/xray_fdr_logging.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_fdr_logging.cc?rev=337341&r1=337340&r2=337341&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_fdr_logging.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_fdr_logging.cc Tue Jul 17 18:31:30 2018
@@ -15,11 +15,11 @@
 //
 //===----------------------------------------------------------------------===//
 #include "xray_fdr_logging.h"
-#include <pthread.h>
 #include <cassert>
 #include <errno.h>
 #include <limits>
 #include <memory>
+#include <pthread.h>
 #include <sys/syscall.h>
 #include <sys/time.h>
 #include <time.h>
@@ -80,6 +80,16 @@ static constexpr auto FunctionRecSize =
 // Use a global pthread key to identify thread-local data for logging.
 static pthread_key_t Key;
 
+// Global BufferQueue.
+static BufferQueue *BQ = nullptr;
+
+static atomic_sint32_t LogFlushStatus = {
+    XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
+
+static FDRLoggingOptions FDROptions;
+
+static SpinMutex FDROptionsMutex;
+
 // This function will initialize the thread-local data structure used by the FDR
 // logging implementation and return a reference to it. The implementation
 // details require a bit of care to maintain.
@@ -114,15 +124,18 @@ static pthread_key_t Key;
 // critical section, calling a function that might be XRay instrumented (and
 // thus in turn calling into malloc by virtue of registration of the
 // thread_local's destructor).
+static_assert(alignof(ThreadLocalData) >= 64,
+              "ThreadLocalData must be cache line aligned.");
 static ThreadLocalData &getThreadLocalData() {
-  static_assert(alignof(ThreadLocalData) >= 64,
-                "ThreadLocalData must be cache line aligned.");
-  thread_local ThreadLocalData TLD;
-  thread_local bool UNUSED ThreadOnce = [] {
-    pthread_setspecific(Key, &TLD);
-    return false;
-  }();
-  return TLD;
+  thread_local typename std::aligned_storage<
+      sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
+
+  if (pthread_getspecific(Key) == NULL) {
+    new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
+    pthread_setspecific(Key, &TLDStorage);
+  }
+
+  return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
 }
 
 static void writeNewBufferPreamble(tid_t Tid, timespec TS,
@@ -416,7 +429,7 @@ static bool prepareBuffer(uint64_t TSC,
       return false;
     auto EC = TLD.BQ->getBuffer(TLD.Buffer);
     if (EC != BufferQueue::ErrorCode::Ok) {
-      Report("Failed to acquire a buffer; error=%s\n",
+      Report("Failed to prepare a buffer; error = '%s'\n",
              BufferQueue::getErrorString(EC));
       return false;
     }
@@ -462,7 +475,7 @@ isLogInitializedAndReady(BufferQueue *LB
       auto LS = atomic_load(&LoggingStatus, memory_order_acquire);
       if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING &&
           LS != XRayLogInitStatus::XRAY_LOG_FINALIZED)
-        Report("Failed to acquire a buffer; error=%s\n",
+        Report("Failed to acquire a buffer; error = '%s'\n",
                BufferQueue::getErrorString(EC));
       return false;
     }
@@ -541,8 +554,8 @@ thread_local atomic_uint8_t Running{0};
 static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
                                 uint64_t TSC, unsigned char CPU, uint64_t Arg1,
                                 int (*wall_clock_reader)(clockid_t,
-                                                         struct timespec *),
-                                BufferQueue *BQ) XRAY_NEVER_INSTRUMENT {
+                                                         struct timespec *))
+    XRAY_NEVER_INSTRUMENT {
   __asm volatile("# LLVM-MCA-BEGIN processFunctionHook");
   // Prevent signal handler recursion, so in case we're already in a log writing
   // mode and the signal handler comes in (and is also instrumented) then we
@@ -557,8 +570,6 @@ static void processFunctionHook(int32_t
 
   auto &TLD = getThreadLocalData();
 
-  // In case the reference has been cleaned up before, we make sure we
-  // initialize it to the provided BufferQueue.
   if (TLD.BQ == nullptr)
     TLD.BQ = BQ;
 
@@ -656,15 +667,6 @@ static void processFunctionHook(int32_t
   __asm volatile("# LLVM-MCA-END");
 }
 
-// Global BufferQueue.
-BufferQueue *BQ = nullptr;
-
-atomic_sint32_t LogFlushStatus = {XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
-
-FDRLoggingOptions FDROptions;
-
-SpinMutex FDROptionsMutex;
-
 static XRayFileHeader &fdrCommonHeaderInfo() {
   static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
@@ -789,15 +791,28 @@ XRayLogFlushStatus fdrLoggingFlush() XRA
   // we decide to do anything further with the global buffer queue.
   __xray_log_remove_buffer_iterator();
 
+  // Once flushed, we should set the global status of the logging implementation
+  // to "uninitialized" to allow for FDR-logging multiple runs.
+  auto ResetToUnitialized = at_scope_exit([] {
+    atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
+                 memory_order_release);
+  });
+
+  auto CleanupBuffers = at_scope_exit([] {
+    if (BQ != nullptr) {
+      auto &TLD = getThreadLocalData();
+      if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
+        releaseThreadLocalBuffer(*TLD.BQ);
+      BQ->~BufferQueue();
+      InternalFree(BQ);
+      BQ = nullptr;
+    }
+  });
+
   if (fdrFlags()->no_file_flush) {
     if (Verbosity())
       Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
 
-    // Clean up the buffer queue, and do not bother writing out the files!
-    BQ->~BufferQueue();
-    InternalFree(BQ);
-    BQ = nullptr;
-
     atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
                  memory_order_release);
     return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
@@ -913,13 +928,13 @@ static TSCAndCPU getTimestamp() XRAY_NEV
 void fdrLoggingHandleArg0(int32_t FuncId,
                           XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
   auto TC = getTimestamp();
-  processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime, BQ);
+  processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime);
 }
 
 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
                           uint64_t Arg) XRAY_NEVER_INSTRUMENT {
   auto TC = getTimestamp();
-  processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, BQ);
+  processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime);
 }
 
 void fdrLoggingHandleCustomEvent(void *Event,
@@ -1121,8 +1136,10 @@ XRayLogInitStatus fdrLoggingInit(size_t
                  probeRequiredCPUFeatures() ? getTSCFrequency()
                                             : __xray::NanosecondsPerSecond,
                  memory_order_release);
-    pthread_key_create(&Key, +[](void *) {
-      auto &TLD = getThreadLocalData();
+    pthread_key_create(&Key, +[](void *TLDPtr) {
+      if (TLDPtr == nullptr)
+        return;
+      auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
       if (TLD.BQ == nullptr)
         return;
       auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);

Added: compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc?rev=337341&view=auto
==============================================================================
--- compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc (added)
+++ compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc Tue Jul 17 18:31:30 2018
@@ -0,0 +1,76 @@
+// RUN: %clangxx_xray -g -std=c++11 %s -o %t -fxray-modes=xray-fdr
+// RUN: rm -f fdr-inmemory-test-*
+// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \
+// RUN:     verbosity=1" \
+// RUN: XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \
+// RUN:     %run %t 2>&1 | FileCheck %s
+// RUN: FILES=`find %T -name 'fdr-inmemory-test-*' | wc -l`
+// RUN: [ $FILES -eq 0 ]
+// RUN: rm -f fdr-inmemory-test-*
+//
+// REQUIRES: x86_64-target-arch
+// REQUIRES: built-in-llvm-tree
+
+#include "xray/xray_log_interface.h"
+#include <cassert>
+#include <iostream>
+
+uint64_t var = 0;
+uint64_t buffers = 0;
+[[clang::xray_always_instrument]] void __attribute__((noinline)) f() { ++var; }
+
+int main(int argc, char *argv[]) {
+  assert(__xray_log_select_mode("xray-fdr") ==
+         XRayLogRegisterStatus::XRAY_REGISTRATION_OK);
+  auto status = __xray_log_init_mode(
+      "xray-fdr",
+      "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0");
+  assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+  __xray_patch();
+
+  // Create enough entries.
+  for (int i = 0; i != 1 << 20; ++i) {
+    f();
+  }
+
+  // Then we want to verify that we're getting 10 buffers outside of the initial
+  // header.
+  auto finalize_status = __xray_log_finalize();
+  assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED);
+  auto process_status =
+      __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; });
+  std::cout << "buffers = " << buffers << std::endl;
+  assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+  auto flush_status = __xray_log_flushLog();
+  assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+  // We expect 11 buffers because 1 header buffer + 10 actual FDR buffers.
+  // CHECK: Buffers = 11
+  std::cout << "Buffers = " << buffers << std::endl;
+
+  // In this test we ensure that we can restart the cycle after the flush.
+  status = __xray_log_init_mode(
+      "xray-fdr",
+      "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0");
+  assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED);
+  __xray_patch();
+
+  // Create enough entries.
+  for (int i = 0; i != 1 << 20; ++i) {
+    f();
+  }
+
+  // Then we want to verify that we're getting 10 buffers outside of the initial
+  // header.
+  finalize_status = __xray_log_finalize();
+  assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED);
+  process_status =
+      __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; });
+  std::cout << "buffers = " << buffers << std::endl;
+  assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+  flush_status = __xray_log_flushLog();
+  assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED);
+  // We expect 22 buffers because 1 header buffer + 10 actual FDR buffers, plus
+  // the number of buffers we got from the previous run (also 11).
+  // CHECK: Buffers = 22
+  std::cout << "Buffers = " << buffers << std::endl;
+}




More information about the llvm-commits mailing list