[compiler-rt] r314766 - [XRay][compiler-rt] Use a hand-written circular buffer in BufferQueue

Dean Michael Berris via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 3 04:43:38 PDT 2017


Reverted in rL314786.

> On 3 Oct 2017, at 22:39, Dean Michael Berris <dberris at google.com> wrote:
> 
> Hi Diana -- yes, I'll revert in the meantime.
> 
>> On 3 Oct 2017, at 20:10, Diana Picus <diana.picus at linaro.org> wrote:
>> 
>> Hi,
>> 
>> Looks like this broke a number of bots:
>> http://lab.llvm.org:8011/builders/clang-cmake-armv7-a15-full/builds/11102
>> http://lab.llvm.org:8011/builders/clang-ppc64le-linux-lnt/builds/7016/
>> http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-debian-fast/builds/6251
>> 
>> Are you looking into it?
>> 
>> Thanks,
>> Diana
>> 
>> On 3 October 2017 at 08:15, Dean Michael Berris via llvm-commits
>> <llvm-commits at lists.llvm.org> wrote:
>>> Author: dberris
>>> Date: Mon Oct  2 23:15:34 2017
>>> New Revision: 314766
>>> 
>>> URL: http://llvm.org/viewvc/llvm-project?rev=314766&view=rev
>>> Log:
>>> [XRay][compiler-rt] Use a hand-written circular buffer in BufferQueue
>>> 
>>> Summary:
>>> This change removes the dependency on using a std::deque<...> for the
>>> storage of the buffers in the buffer queue. We instead implement a
>>> fixed-size circular buffer that's resilient to exhaustion, and preserves
>>> the semantics of the BufferQueue.
>>> 
>>> We're moving away from using std::deque<...> for two reasons:
>>> 
>>> - We want to remove dependencies on the STL for data structures.
>>> 
>>> - We want the data structure we use to not require re-allocation in
>>>   the normal course of operation.
>>> 
>>> The internal implementation of the buffer queue uses heap-allocated
>>> arrays that are initialized once when the BufferQueue is created, and
>>> re-uses slots in the buffer array as buffers are returned in order.
>>> 
>>> We also change the lock used in the implementation to a spinlock
>>> instead of a blocking mutex. We reason that since the release operations
>>> now take very little time in the critical section, that a spinlock would
>>> be appropriate.
>>> 
>>> This change is related to D38073.
>>> 
>>> Reviewers: dblaikie, kpw, pelikan
>>> 
>>> Subscribers: llvm-commits
>>> 
>>> Differential Revision: https://reviews.llvm.org/D38119
>>> 
>>> Modified:
>>>   compiler-rt/trunk/lib/xray/xray_buffer_queue.cc
>>>   compiler-rt/trunk/lib/xray/xray_buffer_queue.h
>>>   compiler-rt/trunk/test/xray/TestCases/Linux/fdr-thread-order.cc
>>> 
>>> Modified: compiler-rt/trunk/lib/xray/xray_buffer_queue.cc
>>> URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_buffer_queue.cc?rev=314766&r1=314765&r2=314766&view=diff
>>> ==============================================================================
>>> --- compiler-rt/trunk/lib/xray/xray_buffer_queue.cc (original)
>>> +++ compiler-rt/trunk/lib/xray/xray_buffer_queue.cc Mon Oct  2 23:15:34 2017
>>> @@ -16,6 +16,7 @@
>>> #include "sanitizer_common/sanitizer_common.h"
>>> #include "sanitizer_common/sanitizer_libc.h"
>>> 
>>> +#include <algorithm>
>>> #include <cstdlib>
>>> #include <tuple>
>>> 
>>> @@ -23,18 +24,21 @@ using namespace __xray;
>>> using namespace __sanitizer;
>>> 
>>> BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success)
>>> -    : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing{0} {
>>> -  for (auto &T : Buffers) {
>>> +    : BufferSize(B), Buffers(new std::tuple<Buffer, bool>[N]()),
>>> +      BufferCount(N), Finalizing{0}, OwnedBuffers(new void *[N]()),
>>> +      Next(Buffers.get()), First(nullptr) {
>>> +  for (size_t i = 0; i < N; ++i) {
>>> +    auto &T = Buffers[i];
>>>    void *Tmp = malloc(BufferSize);
>>>    if (Tmp == nullptr) {
>>>      Success = false;
>>>      return;
>>>    }
>>> -
>>>    auto &Buf = std::get<0>(T);
>>> +    std::get<1>(T) = false;
>>>    Buf.Buffer = Tmp;
>>>    Buf.Size = B;
>>> -    OwnedBuffers.emplace(Tmp);
>>> +    OwnedBuffers[i] = Tmp;
>>>  }
>>>  Success = true;
>>> }
>>> @@ -42,27 +46,43 @@ BufferQueue::BufferQueue(std::size_t B,
>>> BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) {
>>>  if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire))
>>>    return ErrorCode::QueueFinalizing;
>>> -  __sanitizer::BlockingMutexLock Guard(&Mutex);
>>> -  if (Buffers.empty())
>>> +  __sanitizer::SpinMutexLock Guard(&Mutex);
>>> +
>>> +  if (Next == First)
>>>    return ErrorCode::NotEnoughMemory;
>>> -  auto &T = Buffers.front();
>>> +
>>> +  auto &T = *Next;
>>>  auto &B = std::get<0>(T);
>>>  Buf = B;
>>> -  B.Buffer = nullptr;
>>> -  B.Size = 0;
>>> -  Buffers.pop_front();
>>> +
>>> +  if (First == nullptr)
>>> +    First = Next;
>>> +  ++Next;
>>> +  if (Next == (Buffers.get() + BufferCount))
>>> +    Next = Buffers.get();
>>> +
>>>  return ErrorCode::Ok;
>>> }
>>> 
>>> BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) {
>>> -  if (OwnedBuffers.count(Buf.Buffer) == 0)
>>> +  // Blitz through the buffers array to find the buffer.
>>> +  if (std::none_of(OwnedBuffers.get(), OwnedBuffers.get() + BufferCount,
>>> +                   [&Buf](void *P) { return P == Buf.Buffer; }))
>>>    return ErrorCode::UnrecognizedBuffer;
>>> -  __sanitizer::BlockingMutexLock Guard(&Mutex);
>>> +  __sanitizer::SpinMutexLock Guard(&Mutex);
>>> +
>>> +  // This points to a semantic bug, we really ought to not be releasing more
>>> +  // buffers than we actually get.
>>> +  if (First == nullptr || First == Next)
>>> +    return ErrorCode::NotEnoughMemory;
>>> 
>>>  // Now that the buffer has been released, we mark it as "used".
>>> -  Buffers.emplace(Buffers.end(), Buf, true /* used */);
>>> +  *First = std::make_tuple(Buf, true);
>>>  Buf.Buffer = nullptr;
>>>  Buf.Size = 0;
>>> +  ++First;
>>> +  if (First == (Buffers.get() + BufferCount))
>>> +    First = Buffers.get();
>>>  return ErrorCode::Ok;
>>> }
>>> 
>>> @@ -74,7 +94,8 @@ BufferQueue::ErrorCode BufferQueue::fina
>>> }
>>> 
>>> BufferQueue::~BufferQueue() {
>>> -  for (auto &T : Buffers) {
>>> +  for (auto I = Buffers.get(), E = Buffers.get() + BufferCount; I != E; ++I) {
>>> +    auto &T = *I;
>>>    auto &Buf = std::get<0>(T);
>>>    free(Buf.Buffer);
>>>  }
>>> 
>>> Modified: compiler-rt/trunk/lib/xray/xray_buffer_queue.h
>>> URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/xray/xray_buffer_queue.h?rev=314766&r1=314765&r2=314766&view=diff
>>> ==============================================================================
>>> --- compiler-rt/trunk/lib/xray/xray_buffer_queue.h (original)
>>> +++ compiler-rt/trunk/lib/xray/xray_buffer_queue.h Mon Oct  2 23:15:34 2017
>>> @@ -17,8 +17,8 @@
>>> 
>>> #include "sanitizer_common/sanitizer_atomic.h"
>>> #include "sanitizer_common/sanitizer_mutex.h"
>>> -#include <deque>
>>> -#include <unordered_set>
>>> +#include <cstdint>
>>> +#include <memory>
>>> #include <utility>
>>> 
>>> namespace __xray {
>>> @@ -36,15 +36,23 @@ public:
>>>  };
>>> 
>>> private:
>>> +  // Size of each individual Buffer.
>>>  size_t BufferSize;
>>> 
>>>  // We use a bool to indicate whether the Buffer has been used in this
>>>  // freelist implementation.
>>> -  std::deque<std::tuple<Buffer, bool>> Buffers;
>>> -  __sanitizer::BlockingMutex Mutex;
>>> -  std::unordered_set<void *> OwnedBuffers;
>>> +  std::unique_ptr<std::tuple<Buffer, bool>[]> Buffers;
>>> +  size_t BufferCount;
>>> +
>>> +  __sanitizer::SpinMutex Mutex;
>>>  __sanitizer::atomic_uint8_t Finalizing;
>>> 
>>> +  // Sorted buffer pointers, making it quick to find buffers that we own.
>>> +  std::unique_ptr<void *[]> OwnedBuffers;
>>> +
>>> +  std::tuple<Buffer, bool> *Next;
>>> +  std::tuple<Buffer, bool> *First;
>>> +
>>> public:
>>>  enum class ErrorCode : unsigned {
>>>    Ok,
>>> @@ -117,8 +125,9 @@ public:
>>>  /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a
>>>  /// releaseBuffer(...) operation).
>>>  template <class F> void apply(F Fn) {
>>> -    __sanitizer::BlockingMutexLock G(&Mutex);
>>> -    for (const auto &T : Buffers) {
>>> +    __sanitizer::SpinMutexLock G(&Mutex);
>>> +    for (auto I = Buffers.get(), E = Buffers.get() + BufferCount; I != E; ++I) {
>>> +      const auto &T = *I;
>>>      if (std::get<1>(T))
>>>        Fn(std::get<0>(T));
>>>    }
>>> 
>>> Modified: compiler-rt/trunk/test/xray/TestCases/Linux/fdr-thread-order.cc
>>> URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/xray/TestCases/Linux/fdr-thread-order.cc?rev=314766&r1=314765&r2=314766&view=diff
>>> ==============================================================================
>>> --- compiler-rt/trunk/test/xray/TestCases/Linux/fdr-thread-order.cc (original)
>>> +++ compiler-rt/trunk/test/xray/TestCases/Linux/fdr-thread-order.cc Mon Oct  2 23:15:34 2017
>>> @@ -1,37 +1,63 @@
>>> // RUN: %clangxx_xray -g -std=c++11 %s -o %t
>>> // RUN: rm fdr-thread-order.* || true
>>> -// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | FileCheck %s
>>> -// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-thread-order.* | head -1`" | FileCheck %s --check-prefix TRACE
>>> +// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false \
>>> +// RUN:    xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 \
>>> +// RUN:    xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | \
>>> +// RUN:    FileCheck %s
>>> +// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
>>> +// RUN:    "`ls fdr-thread-order.* | head -1`"
>>> +// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \
>>> +// RUN:    "`ls fdr-thread-order.* | head -1`" | \
>>> +// RUN:    FileCheck %s --check-prefix TRACE
>>> // RUN: rm fdr-thread-order.*
>>> // FIXME: Make llvm-xray work on non-x86_64 as well.
>>> // REQUIRES: x86_64-linux
>>> // REQUIRES: built-in-llvm-tree
>>> 
>>> #include "xray/xray_log_interface.h"
>>> -#include <thread>
>>> +#include <atomic>
>>> #include <cassert>
>>> +#include <thread>
>>> 
>>> constexpr auto kBufferSize = 16384;
>>> constexpr auto kBufferMax = 10;
>>> 
>>> -thread_local uint64_t var = 0;
>>> -[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() { ++var; }
>>> -[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() { ++var; }
>>> +std::atomic<uint64_t> var{0};
>>> +
>>> +[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() {
>>> +  for (auto i = 0; i < 1 << 20; ++i)
>>> +    ++var;
>>> +}
>>> +
>>> +[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() {
>>> +  for (auto i = 0; i < 1 << 20; ++i)
>>> +    ++var;
>>> +}
>>> 
>>> int main(int argc, char *argv[]) {
>>>  using namespace __xray;
>>>  FDRLoggingOptions Options;
>>> +  __xray_patch();
>>>  assert(__xray_log_init(kBufferSize, kBufferMax, &Options,
>>>                         sizeof(FDRLoggingOptions)) ==
>>>         XRayLogInitStatus::XRAY_LOG_INITIALIZED);
>>> -  __xray_patch();
>>> -  std::thread t1([] { f1(); });
>>> -  std::thread t2([] { f2(); });
>>> -  t1.join();
>>> -  t2.join();
>>> +
>>> +  std::atomic_thread_fence(std::memory_order_acq_rel);
>>> +
>>> +  {
>>> +    std::thread t1([] { f1(); });
>>> +    std::thread t2([] { f2(); });
>>> +    t1.join();
>>> +    t2.join();
>>> +  }
>>> +
>>> +  std::atomic_thread_fence(std::memory_order_acq_rel);
>>>  __xray_log_finalize();
>>>  __xray_log_flushLog();
>>> +  __xray_unpatch();
>>> +  return var > 0 ? 0 : 1;
>>>  // CHECK: {{.*}}XRay: Log file in '{{.*}}'
>>> +  // CHECK-NOT: Failed
>>> }
>>> 
>>> // We want to make sure that the order of the function log doesn't matter.
>>> 
>>> 
>>> _______________________________________________
>>> llvm-commits mailing list
>>> llvm-commits at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
> 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171003/e1ec4412/attachment-0001.html>


More information about the llvm-commits mailing list