[compiler-rt] r342356 - [XRay] Simplify FDR buffer management

Dean Michael Berris via llvm-commits llvm-commits at lists.llvm.org
Sun Sep 16 20:09:01 PDT 2018


Author: dberris
Date: Sun Sep 16 20:09:01 2018
New Revision: 342356

URL: http://llvm.org/viewvc/llvm-project?rev=342356&view=rev
Log:
[XRay] Simplify FDR buffer management

Summary:
This change makes XRay FDR mode use a single backing store for the
buffer queue, and have indexes into that backing store instead. We also
remove the reliance on the internal allocator implementation in the FDR
mode logging implementation.

In the process of making this change we found an inconsistency with the
way we're returning buffers to the queue, and how we're setting the
extents. We take the chance to simplify the way we're managing the
extents of each buffer. It turns out we do not need the indirection for
the extents, so we co-host the atomic 64-bit int with the buffer object.
It also seems that we've not been returning the buffers for the thread
running the flush functionality when writing out the files, so we can
run into a situation where we could be missing data.

We consolidate all the allocation routines now into xray_allocator.h,
where we used to have routines defined in xray_buffer_queue.cc.

Reviewers: mboerger, eizan

Subscribers: jfb, llvm-commits

Differential Revision: https://reviews.llvm.org/D52077

Modified:
    compiler-rt/trunk/lib/xray/xray_allocator.h
    compiler-rt/trunk/lib/xray/xray_buffer_queue.cc
    compiler-rt/trunk/lib/xray/xray_buffer_queue.h
    compiler-rt/trunk/lib/xray/xray_fdr_logging.cc

Modified: compiler-rt/trunk/lib/xray/xray_allocator.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_allocator.h?rev=342356&r1=342355&r2=342356&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_allocator.h (original)
+++ compiler-rt/trunk/lib/xray/xray_allocator.h Sun Sep 16 20:09:01 2018
@@ -50,23 +50,33 @@ template <class T> void deallocate(T *B)
   internal_munmap(B, sizeof(T));
 }
 
-inline void *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
-  auto B = reinterpret_cast<void *>(internal_mmap(
-      NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+template <class T = uint8_t> T *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
+  auto B = reinterpret_cast<void *>(
+      internal_mmap(NULL, S * sizeof(T), PROT_READ | PROT_WRITE,
+                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
   if (B == MAP_FAILED) {
     if (Verbosity())
       Report("XRay Profiling: Failed to allocate memory of size %d.\n", S);
     return nullptr;
   }
-  return B;
+  return reinterpret_cast<T *>(B);
 }
 
-inline void deallocateBuffer(void *B, size_t S) XRAY_NEVER_INSTRUMENT {
+template <class T> void deallocateBuffer(T *B, size_t S) XRAY_NEVER_INSTRUMENT {
   if (B == nullptr)
     return;
   internal_munmap(B, S);
 }
 
+template <class T, class... U>
+T *initArray(size_t N, U &&... Us) XRAY_NEVER_INSTRUMENT {
+  auto A = allocateBuffer<T>(N);
+  if (A != nullptr)
+    while (N > 0)
+      new (A + (--N)) T(std::forward<U>(Us)...);
+  return A;
+}
+
 /// The Allocator type hands out fixed-sized chunks of memory that are
 /// cache-line aligned and sized. This is useful for placement of
 /// performance-sensitive data in memory that's frequently accessed. The

Modified: compiler-rt/trunk/lib/xray/xray_buffer_queue.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_buffer_queue.cc?rev=342356&r1=342355&r2=342356&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_buffer_queue.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_buffer_queue.cc Sun Sep 16 20:09:01 2018
@@ -16,68 +16,42 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
 #include "sanitizer_common/sanitizer_posix.h"
+#include "xray_allocator.h"
+#include "xray_defs.h"
 #include <memory>
 #include <sys/mman.h>
 
 using namespace __xray;
 using namespace __sanitizer;
 
-template <class T> static T *allocRaw(size_t N) {
-  // TODO: Report errors?
-  void *A = reinterpret_cast<void *>(
-      internal_mmap(NULL, N * sizeof(T), PROT_WRITE | PROT_READ,
-                    MAP_ANONYMOUS | MAP_PRIVATE, -1, 0));
-  return (A == MAP_FAILED) ? nullptr : reinterpret_cast<T *>(A);
-}
-
-template <class T> static void deallocRaw(T *ptr, size_t N) {
-  // TODO: Report errors?
-  if (ptr != nullptr)
-    internal_munmap(ptr, N);
-}
-
-template <class T> static T *initArray(size_t N) {
-  auto A = allocRaw<T>(N);
-  if (A != nullptr)
-    while (N > 0)
-      new (A + (--N)) T();
-  return A;
-}
-
-BufferQueue::BufferQueue(size_t B, size_t N, bool &Success)
-    : BufferSize(B), Buffers(initArray<BufferQueue::BufferRep>(N)),
-      BufferCount(N), Finalizing{0}, OwnedBuffers(initArray<void *>(N)),
-      Next(Buffers), First(Buffers), LiveBuffers(0) {
-  if (Buffers == nullptr) {
+BufferQueue::BufferQueue(size_t B, size_t N,
+                         bool &Success) XRAY_NEVER_INSTRUMENT
+    : BufferSize(B),
+      BufferCount(N),
+      Mutex(),
+      Finalizing{0},
+      BackingStore(allocateBuffer(B *N)),
+      Buffers(initArray<BufferQueue::BufferRep>(N)),
+      Next(Buffers),
+      First(Buffers),
+      LiveBuffers(0) {
+  if (BackingStore == nullptr) {
     Success = false;
     return;
   }
-  if (OwnedBuffers == nullptr) {
-    // Clean up the buffers we've already allocated.
-    for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
-      B->~BufferRep();
-    deallocRaw(Buffers, N);
+  if (Buffers == nullptr) {
+    deallocateBuffer(BackingStore, BufferSize * BufferCount);
     Success = false;
     return;
-  };
+  }
 
   for (size_t i = 0; i < N; ++i) {
     auto &T = Buffers[i];
-    void *Tmp = allocRaw<char>(BufferSize);
-    if (Tmp == nullptr) {
-      Success = false;
-      return;
-    }
-    auto *Extents = allocRaw<BufferExtents>(1);
-    if (Extents == nullptr) {
-      Success = false;
-      return;
-    }
     auto &Buf = T.Buff;
-    Buf.Data = Tmp;
+    Buf.Data = reinterpret_cast<char *>(BackingStore) + (BufferSize * i);
     Buf.Size = B;
-    Buf.Extents = Extents;
-    OwnedBuffers[i] = Tmp;
+    atomic_store(&Buf.Extents, 0, memory_order_release);
+    T.Used = false;
   }
   Success = true;
 }
@@ -85,13 +59,17 @@ BufferQueue::BufferQueue(size_t B, size_
 BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
   if (atomic_load(&Finalizing, memory_order_acquire))
     return ErrorCode::QueueFinalizing;
+
   SpinMutexLock Guard(&Mutex);
   if (LiveBuffers == BufferCount)
     return ErrorCode::NotEnoughMemory;
 
   auto &T = *Next;
   auto &B = T.Buff;
-  Buf = B;
+  auto Extents = atomic_load(&B.Extents, memory_order_acquire);
+  atomic_store(&Buf.Extents, Extents, memory_order_release);
+  Buf.Data = B.Data;
+  Buf.Size = B.Size;
   T.Used = true;
   ++LiveBuffers;
 
@@ -102,15 +80,11 @@ BufferQueue::ErrorCode BufferQueue::getB
 }
 
 BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
-  // Blitz through the buffers array to find the buffer.
-  bool Found = false;
-  for (auto I = OwnedBuffers, E = OwnedBuffers + BufferCount; I != E; ++I) {
-    if (*I == Buf.Data) {
-      Found = true;
-      break;
-    }
-  }
-  if (!Found)
+  // Check whether the buffer being referred to is within the bounds of the
+  // backing store's range.
+  if (Buf.Data < BackingStore ||
+      Buf.Data >
+          reinterpret_cast<char *>(BackingStore) + (BufferCount * BufferSize))
     return ErrorCode::UnrecognizedBuffer;
 
   SpinMutexLock Guard(&Mutex);
@@ -121,10 +95,14 @@ BufferQueue::ErrorCode BufferQueue::rele
     return ErrorCode::NotEnoughMemory;
 
   // Now that the buffer has been released, we mark it as "used".
-  First->Buff = Buf;
+  auto Extents = atomic_load(&Buf.Extents, memory_order_acquire);
+  atomic_store(&First->Buff.Extents, Extents, memory_order_release);
+  First->Buff.Data = Buf.Data;
+  First->Buff.Size = Buf.Size;
   First->Used = true;
   Buf.Data = nullptr;
   Buf.Size = 0;
+  atomic_store(&Buf.Extents, 0, memory_order_release);
   --LiveBuffers;
   if (++First == (Buffers + BufferCount))
     First = Buffers;
@@ -139,14 +117,8 @@ BufferQueue::ErrorCode BufferQueue::fina
 }
 
 BufferQueue::~BufferQueue() {
-  for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) {
-    auto &T = *I;
-    auto &Buf = T.Buff;
-    deallocRaw(Buf.Data, Buf.Size);
-    deallocRaw(Buf.Extents, 1);
-  }
   for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B)
     B->~BufferRep();
-  deallocRaw(Buffers, BufferCount);
-  deallocRaw(OwnedBuffers, BufferCount);
+  deallocateBuffer(Buffers, BufferCount);
+  deallocateBuffer(BackingStore, BufferSize * BufferCount);
 }

Modified: compiler-rt/trunk/lib/xray/xray_buffer_queue.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_buffer_queue.h?rev=342356&r1=342355&r2=342356&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_buffer_queue.h (original)
+++ compiler-rt/trunk/lib/xray/xray_buffer_queue.h Sun Sep 16 20:09:01 2018
@@ -18,6 +18,7 @@
 #include "sanitizer_common/sanitizer_atomic.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_mutex.h"
+#include "xray_defs.h"
 #include <cstddef>
 
 namespace __xray {
@@ -29,14 +30,10 @@ namespace __xray {
 /// trace collection.
 class BufferQueue {
 public:
-  struct alignas(64) BufferExtents {
-    atomic_uint64_t Size;
-  };
-
   struct Buffer {
+    atomic_uint64_t Extents{0};
     void *Data = nullptr;
     size_t Size = 0;
-    BufferExtents *Extents;
   };
 
   struct BufferRep {
@@ -76,8 +73,10 @@ private:
 
     T *operator->() const { return &(Buffers[Offset].Buff); }
 
-    Iterator(BufferRep *Root, size_t O, size_t M)
-        : Buffers(Root), Offset(O), Max(M) {
+    Iterator(BufferRep *Root, size_t O, size_t M) XRAY_NEVER_INSTRUMENT
+        : Buffers(Root),
+          Offset(O),
+          Max(M) {
       // We want to advance to the first Offset where the 'Used' property is
       // true, or to the end of the list/queue.
       while (!Buffers[Offset].Used && Offset != Max) {
@@ -107,16 +106,18 @@ private:
   // Size of each individual Buffer.
   size_t BufferSize;
 
-  BufferRep *Buffers;
-
   // Amount of pre-allocated buffers.
   size_t BufferCount;
 
   SpinMutex Mutex;
   atomic_uint8_t Finalizing;
 
-  // Pointers to buffers managed/owned by the BufferQueue.
-  void **OwnedBuffers;
+  // A pointer to a contiguous block of memory to serve as the backing store for
+  // all the individual buffers handed out.
+  void *BackingStore;
+
+  // A dynamically allocated array of BufferRep instances.
+  BufferRep *Buffers;
 
   // Pointer to the next buffer to be handed out.
   BufferRep *Next;
@@ -198,7 +199,7 @@ public:
   /// Applies the provided function F to each Buffer in the queue, only if the
   /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
   /// releaseBuffer(...) operation).
-  template <class F> void apply(F Fn) {
+  template <class F> void apply(F Fn) XRAY_NEVER_INSTRUMENT {
     SpinMutexLock G(&Mutex);
     for (auto I = begin(), E = end(); I != E; ++I)
       Fn(*I);

Modified: compiler-rt/trunk/lib/xray/xray_fdr_logging.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_fdr_logging.cc?rev=342356&r1=342355&r2=342356&view=diff
==============================================================================
--- compiler-rt/trunk/lib/xray/xray_fdr_logging.cc (original)
+++ compiler-rt/trunk/lib/xray/xray_fdr_logging.cc Sun Sep 16 20:09:01 2018
@@ -30,6 +30,7 @@
 #include "sanitizer_common/sanitizer_common.h"
 #include "xray/xray_interface.h"
 #include "xray/xray_records.h"
+#include "xray_allocator.h"
 #include "xray_buffer_queue.h"
 #include "xray_defs.h"
 #include "xray_fdr_flags.h"
@@ -47,7 +48,7 @@ atomic_sint32_t LoggingStatus = {XRayLog
 // force the alignment to 64-bytes for x86 cache line alignment, as this
 // structure is used in the hot path of implementation.
 struct alignas(64) ThreadLocalData {
-  BufferQueue::Buffer Buffer;
+  BufferQueue::Buffer Buffer{};
   char *RecordPtr = nullptr;
   // The number of FunctionEntry records immediately preceding RecordPtr.
   uint8_t NumConsecutiveFnEnters = 0;
@@ -81,6 +82,7 @@ static constexpr auto FunctionRecSize =
 static pthread_key_t Key;
 
 // Global BufferQueue.
+static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
 static BufferQueue *BQ = nullptr;
 
 static atomic_sint32_t LogFlushStatus = {
@@ -184,11 +186,10 @@ static void writeNewBufferPreamble(tid_t
   TLD.NumTailCalls = 0;
   if (TLD.BQ == nullptr || TLD.BQ->finalizing())
     return;
-  internal_memcpy(TLD.RecordPtr, Metadata, sizeof(Metadata));
-  TLD.RecordPtr += sizeof(Metadata);
-  // Since we write out the extents as the first metadata record of the
-  // buffer, we need to write out the extents including the extents record.
-  atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata),
+  internal_memcpy(TLD.RecordPtr, Metadata,
+                  sizeof(MetadataRecord) * InitRecordsCount);
+  TLD.RecordPtr += sizeof(MetadataRecord) * InitRecordsCount;
+  atomic_store(&TLD.Buffer.Extents, sizeof(MetadataRecord) * InitRecordsCount,
                memory_order_release);
 }
 
@@ -209,12 +210,12 @@ static void setupNewBuffer(int (*wall_cl
 
 static void incrementExtents(size_t Add) {
   auto &TLD = getThreadLocalData();
-  atomic_fetch_add(&TLD.Buffer.Extents->Size, Add, memory_order_acq_rel);
+  atomic_fetch_add(&TLD.Buffer.Extents, Add, memory_order_acq_rel);
 }
 
 static void decrementExtents(size_t Subtract) {
   auto &TLD = getThreadLocalData();
-  atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract, memory_order_acq_rel);
+  atomic_fetch_sub(&TLD.Buffer.Extents, Subtract, memory_order_acq_rel);
 }
 
 static void writeNewCPUIdMetadata(uint16_t CPU,
@@ -583,7 +584,7 @@ static void processFunctionHook(int32_t
 
   auto &TLD = getThreadLocalData();
 
-  if (TLD.BQ == nullptr)
+  if (TLD.BQ == nullptr && BQ != nullptr)
     TLD.BQ = BQ;
 
   if (!isLogInitializedAndReady(TLD.BQ, TSC, CPU, wall_clock_reader))
@@ -758,6 +759,7 @@ XRayBuffer fdrIterator(const XRayBuffer
   static BufferQueue::const_iterator It{};
   static BufferQueue::const_iterator End{};
   static void *CurrentBuffer{nullptr};
+  static size_t SerializedBufferSize = 0;
   if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
     // From this point on, we provide raw access to the raw buffer we're getting
     // from the BufferQueue. We're relying on the iterators from the current
@@ -767,7 +769,7 @@ XRayBuffer fdrIterator(const XRayBuffer
   }
 
   if (CurrentBuffer != nullptr) {
-    InternalFree(CurrentBuffer);
+    deallocateBuffer(CurrentBuffer, SerializedBufferSize);
     CurrentBuffer = nullptr;
   }
 
@@ -778,9 +780,9 @@ XRayBuffer fdrIterator(const XRayBuffer
   // out to disk. The difference here would be that we still write "empty"
   // buffers, or at least go through the iterators faithfully to let the
   // handlers see the empty buffers in the queue.
-  auto BufferSize = atomic_load(&It->Extents->Size, memory_order_acquire);
-  auto SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
-  CurrentBuffer = InternalAlloc(SerializedBufferSize);
+  auto BufferSize = atomic_load(&It->Extents, memory_order_acquire);
+  SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
+  CurrentBuffer = allocateBuffer(SerializedBufferSize);
   if (CurrentBuffer == nullptr)
     return {nullptr, 0};
 
@@ -848,7 +850,6 @@ XRayLogFlushStatus fdrLoggingFlush() XRA
       if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
         releaseThreadLocalBuffer(*TLD.BQ);
       BQ->~BufferQueue();
-      InternalFree(BQ);
       BQ = nullptr;
     }
   });
@@ -883,6 +884,13 @@ XRayLogFlushStatus fdrLoggingFlush() XRA
   retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
                    reinterpret_cast<char *>(&Header) + sizeof(Header));
 
+  // Release the current thread's buffer before we attempt to write out all the
+  // buffers. This ensures that in case we had only a single thread going, that
+  // we are able to capture the data nonetheless.
+  auto &TLD = getThreadLocalData();
+  if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr)
+    releaseThreadLocalBuffer(*TLD.BQ);
+
   BQ->apply([&](const BufferQueue::Buffer &B) {
     // Starting at version 2 of the FDR logging implementation, we only write
     // the records identified by the extents of the buffer. We use the Extents
@@ -890,7 +898,7 @@ XRayLogFlushStatus fdrLoggingFlush() XRA
     // still use a Metadata record, but fill in the extents instead for the
     // data.
     MetadataRecord ExtentsRecord;
-    auto BufferExtents = atomic_load(&B.Extents->Size, memory_order_acquire);
+    auto BufferExtents = atomic_load(&B.Extents, memory_order_acquire);
     DCHECK(BufferExtents <= B.Size);
     ExtentsRecord.Type = uint8_t(RecordType::Metadata);
     ExtentsRecord.RecordKind =
@@ -922,7 +930,12 @@ XRayLogInitStatus fdrLoggingFinalize() X
 
   // Do special things to make the log finalize itself, and not allow any more
   // operations to be performed until re-initialized.
-  BQ->finalize();
+  if (BQ == nullptr) {
+    if (Verbosity())
+      Report("Attempting to finalize an uninitialized global buffer!\n");
+  } else {
+    BQ->finalize();
+  }
 
   atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
                memory_order_release);
@@ -1132,13 +1145,11 @@ XRayLogInitStatus fdrLoggingInit(UNUSED
 
   if (BQ != nullptr) {
     BQ->~BufferQueue();
-    InternalFree(BQ);
     BQ = nullptr;
   }
 
   if (BQ == nullptr) {
-    BQ = reinterpret_cast<BufferQueue *>(
-        InternalAlloc(sizeof(BufferQueue), nullptr, 64));
+    BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
     new (BQ) BufferQueue(BufferSize, BufferMax, Success);
   }
 
@@ -1146,7 +1157,6 @@ XRayLogInitStatus fdrLoggingInit(UNUSED
     Report("BufferQueue init failed.\n");
     if (BQ != nullptr) {
       BQ->~BufferQueue();
-      InternalFree(BQ);
       BQ = nullptr;
     }
     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;




More information about the llvm-commits mailing list