[compiler-rt] c514198 - [scudo] Adjust page map buffer size

Chia-hung Duan via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 8 13:21:43 PST 2023


Author: Chia-hung Duan
Date: 2023-03-08T21:19:43Z
New Revision: c514198e4d39743369377011a55e74a8640b08f4

URL: https://github.com/llvm/llvm-project/commit/c514198e4d39743369377011a55e74a8640b08f4
DIFF: https://github.com/llvm/llvm-project/commit/c514198e4d39743369377011a55e74a8640b08f4.diff

LOG: [scudo] Adjust page map buffer size

Given the memory group, we are unlikely to need a huge page map to
record entire region. This CL reduces the size of default page map
buffer from 2048 to 512 and increase the number of static buffers to 2.

Reviewed By: cferris

Differential Revision: https://reviews.llvm.org/D144754

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/release.cpp
    compiler-rt/lib/scudo/standalone/release.h
    compiler-rt/lib/scudo/standalone/tests/release_test.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp
index 3f40dbec6d7a8..938bb41faf695 100644
--- a/compiler-rt/lib/scudo/standalone/release.cpp
+++ b/compiler-rt/lib/scudo/standalone/release.cpp
@@ -10,7 +10,7 @@
 
 namespace scudo {
 
-HybridMutex RegionPageMap::Mutex = {};
-uptr RegionPageMap::StaticBuffer[RegionPageMap::StaticBufferCount];
+BufferPool<RegionPageMap::StaticBufferCount, RegionPageMap::StaticBufferSize>
+    RegionPageMap::Buffers;
 
 } // namespace scudo

diff  --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index b0151a4d812be..c3b0a77d45ab6 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -49,6 +49,99 @@ class ReleaseRecorder {
   MapPlatformData *Data = nullptr;
 };
 
+// A buffer pool which holds a fixed number of static buffers for fast buffer
+// allocation. If the request size is greater than `StaticBufferSize`, it'll
+// delegate the allocation to map().
+template <uptr StaticBufferCount, uptr StaticBufferSize> class BufferPool {
+public:
+  // Preserve 1 bit in the `Mask` so that we don't need to do zero-check while
+  // extracting the least significant bit from the `Mask`.
+  static_assert(StaticBufferCount < SCUDO_WORDSIZE, "");
+  static_assert(isAligned(StaticBufferSize, SCUDO_CACHE_LINE_SIZE), "");
+
+  // Return a buffer which is at least `BufferSize`.
+  uptr *getBuffer(const uptr BufferSize) {
+    if (UNLIKELY(BufferSize > StaticBufferSize))
+      return getDynamicBuffer(BufferSize);
+
+    uptr index;
+    {
+      // TODO: In general, we expect this operation should be fast so the
+      // waiting thread won't be put into sleep. The HybridMutex does implement
+      // the busy-waiting but we may want to review the performance and see if
+      // we need an explict spin lock here.
+      ScopedLock L(Mutex);
+      index = getLeastSignificantSetBitIndex(Mask);
+      if (index < StaticBufferCount)
+        Mask ^= static_cast<uptr>(1) << index;
+    }
+
+    if (index >= StaticBufferCount)
+      return getDynamicBuffer(BufferSize);
+
+    const uptr Offset = index * StaticBufferSize;
+    memset(&RawBuffer[Offset], 0, StaticBufferSize);
+    return &RawBuffer[Offset];
+  }
+
+  void releaseBuffer(uptr *Buffer, const uptr BufferSize) {
+    const uptr index = getStaticBufferIndex(Buffer, BufferSize);
+    if (index < StaticBufferCount) {
+      ScopedLock L(Mutex);
+      DCHECK_EQ((Mask & (static_cast<uptr>(1) << index)), 0U);
+      Mask |= static_cast<uptr>(1) << index;
+    } else {
+      unmap(reinterpret_cast<void *>(Buffer),
+            roundUp(BufferSize, getPageSizeCached()));
+    }
+  }
+
+  bool isStaticBufferTestOnly(uptr *Buffer, uptr BufferSize) {
+    return getStaticBufferIndex(Buffer, BufferSize) < StaticBufferCount;
+  }
+
+private:
+  uptr getStaticBufferIndex(uptr *Buffer, uptr BufferSize) {
+    if (UNLIKELY(BufferSize > StaticBufferSize))
+      return StaticBufferCount;
+
+    const uptr BufferBase = reinterpret_cast<uptr>(Buffer);
+    const uptr RawBufferBase = reinterpret_cast<uptr>(RawBuffer);
+
+    if (BufferBase < RawBufferBase ||
+        BufferBase >= RawBufferBase + sizeof(RawBuffer)) {
+      return StaticBufferCount;
+    }
+
+    DCHECK_LE(BufferSize, StaticBufferSize);
+    DCHECK_LE(BufferBase + BufferSize, RawBufferBase + sizeof(RawBuffer));
+    DCHECK_EQ((BufferBase - RawBufferBase) % StaticBufferSize, 0U);
+
+    const uptr index =
+        (BufferBase - RawBufferBase) / (StaticBufferSize * sizeof(uptr));
+    DCHECK_LT(index, StaticBufferCount);
+    return index;
+  }
+
+  uptr *getDynamicBuffer(const uptr BufferSize) {
+    // When using a heap-based buffer, precommit the pages backing the
+    // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization
+    // where page fault exceptions are skipped as the allocated memory
+    // is accessed. So far, this is only enabled on Fuchsia. It hasn't proven a
+    // performance benefit on other platforms.
+    const uptr MmapFlags = MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0);
+    return reinterpret_cast<uptr *>(
+        map(nullptr, roundUp(BufferSize, getPageSizeCached()), "scudo:counters",
+            MmapFlags, &MapData));
+  }
+
+  HybridMutex Mutex;
+  // '1' means that buffer index is not used. '0' means the buffer is in use.
+  uptr Mask GUARDED_BY(Mutex) = ~static_cast<uptr>(0);
+  uptr RawBuffer[StaticBufferCount * StaticBufferSize] GUARDED_BY(Mutex);
+  [[no_unique_address]] MapPlatformData MapData = {};
+};
+
 // A Region page map is used to record the usage of pages in the regions. It
 // implements a packed array of Counters. Each counter occupies 2^N bits, enough
 // to store counter's MaxValue. Ctor will try to use a static buffer first, and
@@ -76,11 +169,7 @@ class RegionPageMap {
   ~RegionPageMap() {
     if (!isAllocated())
       return;
-    if (Buffer == &StaticBuffer[0])
-      Mutex.unlock();
-    else
-      unmap(reinterpret_cast<void *>(Buffer),
-            roundUp(BufferSize, getPageSizeCached()));
+    Buffers.releaseBuffer(Buffer, BufferSize);
     Buffer = nullptr;
   }
 
@@ -88,8 +177,7 @@ class RegionPageMap {
   // specify the thread-safety attribute properly in current code structure.
   // Besides, it's the only place we may want to check thread safety. Therefore,
   // it's fine to bypass the thread-safety analysis now.
-  void reset(uptr NumberOfRegion, uptr CountersPerRegion,
-             uptr MaxValue) NO_THREAD_SAFETY_ANALYSIS {
+  void reset(uptr NumberOfRegion, uptr CountersPerRegion, uptr MaxValue) {
     DCHECK_GT(NumberOfRegion, 0);
     DCHECK_GT(CountersPerRegion, 0);
     DCHECK_GT(MaxValue, 0);
@@ -115,21 +203,8 @@ class RegionPageMap {
         roundUp(NumCounters, static_cast<uptr>(1U) << PackingRatioLog) >>
         PackingRatioLog;
     BufferSize = SizePerRegion * sizeof(*Buffer) * Regions;
-    if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) &&
-        Mutex.tryLock()) {
-      Buffer = &StaticBuffer[0];
-      memset(Buffer, 0, BufferSize);
-    } else {
-      // When using a heap-based buffer, precommit the pages backing the
-      // Vmar by passing |MAP_PRECOMMIT| flag. This allows an optimization
-      // where page fault exceptions are skipped as the allocated memory
-      // is accessed.
-      const uptr MmapFlags =
-          MAP_ALLOWNOMEM | (SCUDO_FUCHSIA ? MAP_PRECOMMIT : 0);
-      Buffer = reinterpret_cast<uptr *>(
-          map(nullptr, roundUp(BufferSize, getPageSizeCached()),
-              "scudo:counters", MmapFlags, &MapData));
-    }
+    Buffer = Buffers.getBuffer(BufferSize);
+    DCHECK_NE(Buffer, nullptr);
   }
 
   bool isAllocated() const { return !!Buffer; }
@@ -206,8 +281,6 @@ class RegionPageMap {
 
   uptr getBufferSize() const { return BufferSize; }
 
-  static const uptr StaticBufferCount = 2048U;
-
 private:
   uptr Regions;
   uptr NumCounters;
@@ -219,10 +292,12 @@ class RegionPageMap {
   uptr SizePerRegion;
   uptr BufferSize;
   uptr *Buffer;
-  [[no_unique_address]] MapPlatformData MapData = {};
 
-  static HybridMutex Mutex;
-  static uptr StaticBuffer[StaticBufferCount] GUARDED_BY(Mutex);
+  // We may consider making this configurable if there are cases which may
+  // benefit from this.
+  static const uptr StaticBufferCount = 2U;
+  static const uptr StaticBufferSize = 512U;
+  static BufferPool<StaticBufferCount, StaticBufferSize> Buffers;
 };
 
 template <class ReleaseRecorderT> class FreePagesRangeTracker {

diff  --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp
index e549d0dca2967..173928ead207e 100644
--- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp
@@ -562,3 +562,24 @@ TEST(ScudoReleaseTest, ReleasePartialRegion) {
   testReleasePartialRegion<scudo::FuchsiaSizeClassMap>();
   testReleasePartialRegion<scudo::SvelteSizeClassMap>();
 }
+
+TEST(ScudoReleaseTest, BufferPool) {
+  constexpr scudo::uptr StaticBufferCount = SCUDO_WORDSIZE - 1;
+  constexpr scudo::uptr StaticBufferSize = 512U;
+  scudo::BufferPool<StaticBufferCount, StaticBufferSize> Pool;
+
+  std::vector<std::pair<scudo::uptr *, scudo::uptr>> Buffers;
+  for (scudo::uptr I = 0; I < StaticBufferCount; ++I) {
+    scudo::uptr *P = Pool.getBuffer(StaticBufferSize);
+    EXPECT_TRUE(Pool.isStaticBufferTestOnly(P, StaticBufferSize));
+    Buffers.emplace_back(P, StaticBufferSize);
+  }
+
+  // The static buffer is supposed to be used up.
+  scudo::uptr *P = Pool.getBuffer(StaticBufferSize);
+  EXPECT_FALSE(Pool.isStaticBufferTestOnly(P, StaticBufferSize));
+
+  Pool.releaseBuffer(P, StaticBufferSize);
+  for (auto &Buffer : Buffers)
+    Pool.releaseBuffer(Buffer.first, Buffer.second);
+}


        


More information about the llvm-commits mailing list