[compiler-rt] fc69967 - [scudo][standalone] Shift some data from dynamic to static

Kostya Kortchinsky via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 18 09:39:04 PST 2020


Author: Kostya Kortchinsky
Date: 2020-02-18T09:38:50-08:00
New Revision: fc69967a4b98df34c03a3c57134940688b863dff

URL: https://github.com/llvm/llvm-project/commit/fc69967a4b98df34c03a3c57134940688b863dff
DIFF: https://github.com/llvm/llvm-project/commit/fc69967a4b98df34c03a3c57134940688b863dff.diff

LOG: [scudo][standalone] Shift some data from dynamic to static

Summary:
Most of our larger data is dynamically allocated (via `map`) but it
became an hindrance with regard to init time, for a cost to benefit
ratio that is not great. So change the `TSD`s, `RegionInfo`, `ByteMap`
to be static.

Additionally, for reclaiming, we used mapped & unmapped a buffer each
time, which is costly. It turns out that we can have a static buffer,
and that there isn't much contention on it.

One of the other things changed here, is that we hard set the number
of TSDs on Android to the maximum number, as there could be a
situation where cores are put to sleep and we could miss some.

Subscribers: mgorny, #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D74696

Added: 
    compiler-rt/lib/scudo/standalone/release.cpp

Modified: 
    compiler-rt/lib/scudo/standalone/CMakeLists.txt
    compiler-rt/lib/scudo/standalone/bytemap.h
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h
    compiler-rt/lib/scudo/standalone/release.h
    compiler-rt/lib/scudo/standalone/tsd_exclusive.h
    compiler-rt/lib/scudo/standalone/tsd_shared.h
    compiler-rt/lib/scudo/standalone/wrappers_c.inc

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
index 9b85256e8c5a..91b48b7064d3 100644
--- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt
@@ -88,6 +88,7 @@ set(SCUDO_SOURCES
   flags_parser.cpp
   fuchsia.cpp
   linux.cpp
+  release.cpp
   report.cpp
   string_utils.cpp
   )

diff  --git a/compiler-rt/lib/scudo/standalone/bytemap.h b/compiler-rt/lib/scudo/standalone/bytemap.h
index b3582a5a04d2..e0d54f4e5971 100644
--- a/compiler-rt/lib/scudo/standalone/bytemap.h
+++ b/compiler-rt/lib/scudo/standalone/bytemap.h
@@ -17,12 +17,10 @@ namespace scudo {
 
 template <uptr Size> class FlatByteMap {
 public:
-  void initLinkerInitialized() {
-    Map = reinterpret_cast<u8 *>(map(nullptr, Size, "scudo:bytemap"));
-  }
-  void init() { initLinkerInitialized(); }
+  void initLinkerInitialized() {}
+  void init() { memset(Map, 0, sizeof(Map)); }
 
-  void unmapTestOnly() { unmap(reinterpret_cast<void *>(Map), Size); }
+  void unmapTestOnly() {}
 
   void set(uptr Index, u8 Value) {
     DCHECK_LT(Index, Size);
@@ -38,7 +36,7 @@ template <uptr Size> class FlatByteMap {
   void enable() {}
 
 private:
-  u8 *Map;
+  u8 Map[Size];
 };
 
 } // namespace scudo

diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 79345cb348b6..b50f91d492ed 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -40,7 +40,8 @@ namespace scudo {
 
 template <class SizeClassMapT, uptr RegionSizeLog,
           s32 MinReleaseToOsIntervalMs = INT32_MIN,
-          s32 MaxReleaseToOsIntervalMs = INT32_MAX> class SizeClassAllocator32 {
+          s32 MaxReleaseToOsIntervalMs = INT32_MAX>
+class SizeClassAllocator32 {
 public:
   typedef SizeClassMapT SizeClassMap;
   // The bytemap can only track UINT8_MAX - 1 classes.
@@ -49,7 +50,8 @@ template <class SizeClassMapT, uptr RegionSizeLog,
   static_assert((1UL << RegionSizeLog) >= SizeClassMap::MaxSize, "");
   typedef SizeClassAllocator32<SizeClassMapT, RegionSizeLog,
                                MinReleaseToOsIntervalMs,
-                               MaxReleaseToOsIntervalMs> ThisT;
+                               MaxReleaseToOsIntervalMs>
+      ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef typename CacheT::TransferBatch TransferBatch;
   static const bool SupportsMemoryTagging = false;

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index bc31db88ebb8..188f3082aee2 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -46,10 +46,9 @@ template <class SizeClassMapT, uptr RegionSizeLog,
 class SizeClassAllocator64 {
 public:
   typedef SizeClassMapT SizeClassMap;
-  typedef SizeClassAllocator64<SizeClassMap, RegionSizeLog,
-                               MinReleaseToOsIntervalMs,
-                               MaxReleaseToOsIntervalMs,
-                               MaySupportMemoryTagging>
+  typedef SizeClassAllocator64<
+      SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs,
+      MaxReleaseToOsIntervalMs, MaySupportMemoryTagging>
       ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef typename CacheT::TransferBatch TransferBatch;
@@ -69,11 +68,6 @@ class SizeClassAllocator64 {
     PrimaryBase = reinterpret_cast<uptr>(
         map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data));
 
-    RegionInfoArray = reinterpret_cast<RegionInfo *>(
-        map(nullptr, sizeof(RegionInfo) * NumClasses, "scudo:regioninfo"));
-    DCHECK_EQ(reinterpret_cast<uptr>(RegionInfoArray) % SCUDO_CACHE_LINE_SIZE,
-              0);
-
     u32 Seed;
     if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
       Seed = static_cast<u32>(getMonotonicTime() ^ (PrimaryBase >> 12));
@@ -106,8 +100,6 @@ class SizeClassAllocator64 {
 
   void unmapTestOnly() {
     unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data);
-    unmap(reinterpret_cast<void *>(RegionInfoArray),
-          sizeof(RegionInfo) * NumClasses);
   }
 
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
@@ -156,7 +148,7 @@ class SizeClassAllocator64 {
     }
   }
 
-  template <typename F> void iterateOverBlocks(F Callback) const {
+  template <typename F> void iterateOverBlocks(F Callback) {
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
@@ -169,7 +161,7 @@ class SizeClassAllocator64 {
     }
   }
 
-  void getStats(ScopedString *Str) const {
+  void getStats(ScopedString *Str) {
     // TODO(kostyak): get the RSS per region.
     uptr TotalMapped = 0;
     uptr PoppedBlocks = 0;
@@ -252,12 +244,12 @@ class SizeClassAllocator64 {
   static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
 
   uptr PrimaryBase;
-  RegionInfo *RegionInfoArray;
   MapPlatformData Data;
   atomic_s32 ReleaseToOsIntervalMs;
   bool UseMemoryTagging;
+  RegionInfo RegionInfoArray[NumClasses];
 
-  RegionInfo *getRegionInfo(uptr ClassId) const {
+  RegionInfo *getRegionInfo(uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     return &RegionInfoArray[ClassId];
   }
@@ -371,7 +363,7 @@ class SizeClassAllocator64 {
     return B;
   }
 
-  void getStats(ScopedString *Str, uptr ClassId, uptr Rss) const {
+  void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
     RegionInfo *Region = getRegionInfo(ClassId);
     if (Region->MappedUser == 0)
       return;

diff  --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp
new file mode 100644
index 000000000000..e144b354b258
--- /dev/null
+++ b/compiler-rt/lib/scudo/standalone/release.cpp
@@ -0,0 +1,16 @@
+//===-- release.cpp ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "release.h"
+
+namespace scudo {
+
+HybridMutex PackedCounterArray::Mutex = {};
+uptr PackedCounterArray::StaticBuffer[1024];
+
+} // namespace scudo

diff  --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index 37aa5d4db8a5..c4f679711073 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -11,6 +11,7 @@
 
 #include "common.h"
 #include "list.h"
+#include "mutex.h"
 
 namespace scudo {
 
@@ -39,11 +40,13 @@ class ReleaseRecorder {
 };
 
 // A packed array of Counters. Each counter occupies 2^N bits, enough to store
-// counter's MaxValue. Ctor will try to allocate the required Buffer via map()
-// and the caller is expected to check whether the initialization was successful
-// by checking isAllocated() result. For the performance sake, none of the
-// accessors check the validity of the arguments, It is assumed that Index is
-// always in [0, N) range and the value is not incremented past MaxValue.
+// counter's MaxValue. Ctor will try to use a static buffer first, and if that
+// fails (the buffer is too small or already locked), will allocate the
+// required Buffer via map(). The caller is expected to check whether the
+// initialization was successful by checking isAllocated() result. For
+// performance sake, none of the accessors check the validity of the arguments,
+// It is assumed that Index is always in [0, N) range and the value is not
+// incremented past MaxValue.
 class PackedCounterArray {
 public:
   PackedCounterArray(uptr NumCounters, uptr MaxValue) : N(NumCounters) {
@@ -66,11 +69,20 @@ class PackedCounterArray {
     BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >>
                   PackingRatioLog) *
                  sizeof(*Buffer);
-    Buffer = reinterpret_cast<uptr *>(
-        map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM));
+    if (BufferSize <= StaticBufferSize && Mutex.tryLock()) {
+      Buffer = &StaticBuffer[0];
+      memset(Buffer, 0, BufferSize);
+    } else {
+      Buffer = reinterpret_cast<uptr *>(
+          map(nullptr, BufferSize, "scudo:counters", MAP_ALLOWNOMEM));
+    }
   }
   ~PackedCounterArray() {
-    if (isAllocated())
+    if (!isAllocated())
+      return;
+    if (Buffer == &StaticBuffer[0])
+      Mutex.unlock();
+    else
       unmap(reinterpret_cast<void *>(Buffer), BufferSize);
   }
 
@@ -110,6 +122,10 @@ class PackedCounterArray {
 
   uptr BufferSize;
   uptr *Buffer;
+
+  static HybridMutex Mutex;
+  static const uptr StaticBufferSize = 1024U;
+  static uptr StaticBuffer[StaticBufferSize];
 };
 
 template <class ReleaseRecorderT> class FreePagesRangeTracker {

diff  --git a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
index 69479ea7bdf4..3492509b5a8e 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_exclusive.h
@@ -25,9 +25,7 @@ template <class Allocator> struct TSDRegistryExT {
   void initLinkerInitialized(Allocator *Instance) {
     Instance->initLinkerInitialized();
     CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
-    FallbackTSD = reinterpret_cast<TSD<Allocator> *>(
-        map(nullptr, sizeof(TSD<Allocator>), "scudo:tsd"));
-    FallbackTSD->initLinkerInitialized(Instance);
+    FallbackTSD.initLinkerInitialized(Instance);
     Initialized = true;
   }
   void init(Allocator *Instance) {
@@ -35,9 +33,7 @@ template <class Allocator> struct TSDRegistryExT {
     initLinkerInitialized(Instance);
   }
 
-  void unmapTestOnly() {
-    unmap(reinterpret_cast<void *>(FallbackTSD), sizeof(TSD<Allocator>));
-  }
+  void unmapTestOnly() {}
 
   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
     if (LIKELY(State != ThreadState::NotInitialized))
@@ -51,23 +47,22 @@ template <class Allocator> struct TSDRegistryExT {
       *UnlockRequired = false;
       return &ThreadTSD;
     }
-    DCHECK(FallbackTSD);
-    FallbackTSD->lock();
+    FallbackTSD.lock();
     *UnlockRequired = true;
-    return FallbackTSD;
+    return &FallbackTSD;
   }
 
   // To disable the exclusive TSD registry, we effectively lock the fallback TSD
   // and force all threads to attempt to use it instead of their local one.
   void disable() {
     Mutex.lock();
-    FallbackTSD->lock();
+    FallbackTSD.lock();
     atomic_store(&Disabled, 1U, memory_order_release);
   }
 
   void enable() {
     atomic_store(&Disabled, 0U, memory_order_release);
-    FallbackTSD->unlock();
+    FallbackTSD.unlock();
     Mutex.unlock();
   }
 
@@ -96,7 +91,7 @@ template <class Allocator> struct TSDRegistryExT {
   pthread_key_t PThreadKey;
   bool Initialized;
   atomic_u8 Disabled;
-  TSD<Allocator> *FallbackTSD;
+  TSD<Allocator> FallbackTSD;
   HybridMutex Mutex;
   static THREADLOCAL ThreadState State;
   static THREADLOCAL TSD<Allocator> ThreadTSD;

diff  --git a/compiler-rt/lib/scudo/standalone/tsd_shared.h b/compiler-rt/lib/scudo/standalone/tsd_shared.h
index cf5453d20208..038a5905ff48 100644
--- a/compiler-rt/lib/scudo/standalone/tsd_shared.h
+++ b/compiler-rt/lib/scudo/standalone/tsd_shared.h
@@ -19,10 +19,9 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
     Instance->initLinkerInitialized();
     CHECK_EQ(pthread_key_create(&PThreadKey, nullptr), 0); // For non-TLS
     const u32 NumberOfCPUs = getNumberOfCPUs();
-    NumberOfTSDs =
-        (NumberOfCPUs == 0) ? MaxTSDCount : Min(NumberOfCPUs, MaxTSDCount);
-    TSDs = reinterpret_cast<TSD<Allocator> *>(
-        map(nullptr, sizeof(TSD<Allocator>) * NumberOfTSDs, "scudo:tsd"));
+    NumberOfTSDs = (SCUDO_ANDROID || NumberOfCPUs == 0)
+                       ? MaxTSDCount
+                       : Min(NumberOfCPUs, MaxTSDCount);
     for (u32 I = 0; I < NumberOfTSDs; I++)
       TSDs[I].initLinkerInitialized(Instance);
     // Compute all the coprimes of NumberOfTSDs. This will be used to walk the
@@ -48,8 +47,6 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
   }
 
   void unmapTestOnly() {
-    unmap(reinterpret_cast<void *>(TSDs),
-          sizeof(TSD<Allocator>) * NumberOfTSDs);
     setCurrentTSD(nullptr);
     pthread_key_delete(PThreadKey);
   }
@@ -162,11 +159,11 @@ template <class Allocator, u32 MaxTSDCount> struct TSDRegistrySharedT {
   pthread_key_t PThreadKey;
   atomic_u32 CurrentIndex;
   u32 NumberOfTSDs;
-  TSD<Allocator> *TSDs;
   u32 NumberOfCoPrimes;
   u32 CoPrimes[MaxTSDCount];
   bool Initialized;
   HybridMutex Mutex;
+  TSD<Allocator> TSDs[MaxTSDCount];
 #if SCUDO_LINUX && !_BIONIC
   static THREADLOCAL TSD<Allocator> *ThreadTSD;
 #endif

diff  --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
index 314a835074e6..5a6c1a8d4087 100644
--- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc
+++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc
@@ -195,7 +195,7 @@ INTERFACE WEAK int SCUDO_PREFIX(malloc_info)(UNUSED int options, FILE *stream) {
       decltype(SCUDO_ALLOCATOR)::PrimaryT::SizeClassMap::MaxSize;
   auto *sizes = static_cast<scudo::uptr *>(
       SCUDO_PREFIX(calloc)(max_size, sizeof(scudo::uptr)));
-  auto callback = [](uintptr_t, size_t size, void* arg) {
+  auto callback = [](uintptr_t, size_t size, void *arg) {
     auto *sizes = reinterpret_cast<scudo::uptr *>(arg);
     if (size < max_size)
       sizes[size]++;


        


More information about the llvm-commits mailing list