[compiler-rt] 9c26f51 - [scudo] Support partial page releasing

Thu Oct 13 16:35:31 PDT 2022

Author: Chia-hung Duan
Date: 2022-10-13T23:35:07Z
New Revision: 9c26f51f5e178ac0fda98419e3a61d205d3b58b1

URL: https://github.com/llvm/llvm-project/commit/9c26f51f5e178ac0fda98419e3a61d205d3b58b1
DIFF: https://github.com/llvm/llvm-project/commit/9c26f51f5e178ac0fda98419e3a61d205d3b58b1.diff

LOG: [scudo] Support partial page releasing

Block grouping enables us doing partial page releasing so that we can
release the pages in a finer granularity. Which means we don't need to
visit all blocks to determine which pages are unused. Besides, this
means we can do incremental page releasing depends on the number fo free
blocks.

Reviewed By: cryptoad, cferris

Differential Revision: https://reviews.llvm.org/D134226

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/local_cache.h
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h
    compiler-rt/lib/scudo/standalone/tests/combined_test.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index 7f270472d70ff..6e5b09fdb709e 100644

--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -65,6 +65,13 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     uptr GroupId;
     // Cache value of TransferBatch::getMaxCached()
     u16 MaxCachedPerBatch;
+    // Number of blocks pushed into this group. This is an increment-only
+    // counter.
+    uptr PushedBlocks;
+    // This is used to track how many blocks are pushed since last time we
+    // checked `PushedBlocks`. It's useful for page releasing to determine the
+    // usage of a BatchGroup.
+    uptr PushedBlocksAtLastCheckpoint;
     // Blocks are managed by TransferBatch in a list.
     SinglyLinkedList<TransferBatch> Batches;
   };

diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index b411a3bd837a0..fb6cb53392358 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -421,6 +421,8 @@ template <typename Config> class SizeClassAllocator32 {
 
       BG->GroupId = GroupId;
       BG->Batches.push_front(TB);
+      BG->PushedBlocks = 0;
+      BG->PushedBlocksAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -446,6 +448,8 @@ template <typename Config> class SizeClassAllocator32 {
         CurBatch->appendFromArray(&Array[I], AppendSize);
         I += AppendSize;
       }
+
+      BG->PushedBlocks += Size;
     };
 
     BatchGroup *Cur = Sci->FreeList.front();
@@ -652,16 +656,13 @@ template <typename Config> class SizeClassAllocator32 {
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    const bool CheckDensity = BlockSize < PageSize / 16U;
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (BlockSize < PageSize / 16U) {
+    if (CheckDensity) {
       if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
-      if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
-          (100U - 1U - BlockSize / 16U))
-        return 0;
     }
 
     if (!Force) {
@@ -682,17 +683,47 @@ template <typename Config> class SizeClassAllocator32 {
     uptr TotalReleasedBytes = 0;
     const uptr Base = First * RegionSize;
     const uptr NumberOfRegions = Last - First + 1U;
+    const uptr GroupSize = (1U << GroupSizeLog);
+    const uptr CurRegionGroupId =
+        compactPtrGroup(compactPtr(ClassId, Sci->CurrentRegion));
+
     ReleaseRecorder Recorder(Base);
-    auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
-      return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
-    };
+    PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
+
     auto DecompactPtr = [](CompactPtrT CompactPtr) {
       return reinterpret_cast<uptr>(CompactPtr);
     };
-    PageReleaseContext Context(BlockSize, RegionSize, NumberOfRegions);
-    for (BatchGroup &BG : Sci->FreeList)
+    for (BatchGroup &BG : Sci->FreeList) {
+      const uptr PushedBytesDelta =
+          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
+      if (PushedBytesDelta * BlockSize < PageSize)
+        continue;
+
+      uptr AllocatedGroupSize =
+          BG.GroupId == CurRegionGroupId ? Sci->CurrentRegionAllocated :
+                                           GroupSize;
+      if (AllocatedGroupSize == 0) continue;
+
+      const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
+          BG.Batches.back()->getCount();
+      const uptr BytesInBG = NumBlocks * BlockSize;
+      // Given the randomness property, we try to release the pages only if the
+      // bytes used by free blocks exceed certain proportion of allocated
+      // spaces.
+      if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize <
+          (100U - 1U - BlockSize / 16U)) {
+        continue;
+      }
+
+      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // Note that we don't always visit blocks in each BatchGroup so that we
+      // may miss the chance of releasing certain pages that cross BatchGroups.
       Context.markFreeBlocks(BG.Batches, DecompactPtr, Base);
+    }
 
+    auto SkipRegion = [this, First, ClassId](uptr RegionIndex) {
+      return (PossibleRegions[First + RegionIndex] - 1U) != ClassId;
+    };
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 84b811a8b399d..2b6e71c8584ab 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -370,6 +370,9 @@ template <typename Config> class SizeClassAllocator64 {
   static uptr compactPtrGroup(CompactPtrT CompactPtr) {
     return CompactPtr >> (GroupSizeLog - CompactPtrScale);
   }
+  static uptr batchGroupBase(uptr Base, uptr GroupId) {
+    return (GroupId << GroupSizeLog) + Base;
+  }
 
   // Push the blocks to their batch group. The layout will be like,
   //
@@ -424,6 +427,8 @@ template <typename Config> class SizeClassAllocator64 {
 
       BG->GroupId = GroupId;
       BG->Batches.push_front(TB);
+      BG->PushedBlocks = 0;
+      BG->PushedBlocksAtLastCheckpoint = 0;
       BG->MaxCachedPerBatch =
           TransferBatch::getMaxCached(getSizeByClassId(ClassId));
 
@@ -450,6 +455,8 @@ template <typename Config> class SizeClassAllocator64 {
         CurBatch->appendFromArray(&Array[I], AppendSize);
         I += AppendSize;
       }
+
+      BG->PushedBlocks += Size;
     };
 
     BatchGroup *Cur = Region->FreeList.front();
@@ -661,16 +668,13 @@ template <typename Config> class SizeClassAllocator64 {
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    bool CheckDensity = BlockSize < PageSize / 16U;
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (BlockSize < PageSize / 16U) {
+    if (CheckDensity) {
       if (!Force && BytesPushed < Region->AllocatedUser / 16U)
         return 0;
-      // We want 8x% to 9x% free bytes (the larger the block, the lower the %).
-      if ((BytesInFreeList * 100U) / Region->AllocatedUser <
-          (100U - 1U - BlockSize / 16U))
-        return 0;
     }
 
     if (!Force) {
@@ -684,16 +688,46 @@ template <typename Config> class SizeClassAllocator64 {
       }
     }
 
+    const uptr GroupSize = (1U << GroupSizeLog);
+    const uptr AllocatedUserEnd = Region->AllocatedUser + Region->RegionBeg;
     ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data);
+    PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
+
     const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
     auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
       return decompactPtrInternal(CompactPtrBase, CompactPtr);
     };
-    auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
-    PageReleaseContext Context(BlockSize, RegionSize, /*NumberOfRegions=*/1U);
-    for (BatchGroup &BG : Region->FreeList)
+    for (BatchGroup &BG : Region->FreeList) {
+      const uptr PushedBytesDelta =
+          BG.PushedBlocks - BG.PushedBlocksAtLastCheckpoint;
+      if (PushedBytesDelta * BlockSize < PageSize)
+        continue;
+      const uptr BatchGroupEnd =
+          batchGroupBase(BG.GroupId, CompactPtrBase) + GroupSize;
+      const uptr AllocatedGroupSize =
+          AllocatedUserEnd >= BatchGroupEnd ? GroupSize :
+                                              AllocatedUserEnd - BatchGroupEnd;
+      if (AllocatedGroupSize == 0) continue;
+
+      const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
+          BG.Batches.back()->getCount();
+      const uptr BytesInBG = NumBlocks * BlockSize;
+      // Given the randomness property, we try to release the pages only if the
+      // bytes used by free blocks exceed certain proportion of group size. Note
+      // that this heuristic only applies when all the spaces in a BatchGroup
+      // are allocated.
+      if (CheckDensity && (BytesInBG * 100U) / AllocatedGroupSize <
+          (100U - 1U - BlockSize / 16U)) {
+        continue;
+      }
+
+      BG.PushedBlocksAtLastCheckpoint = BG.PushedBlocks;
+      // Note that we don't always visit blocks in each BatchGroup so that we
+      // may miss the chance of releasing certain pages that cross BatchGroups.
       Context.markFreeBlocks(BG.Batches, DecompactPtr, Region->RegionBeg);
+    }
 
+    auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
     releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
 
     if (Recorder.getReleasedRangesCount() > 0) {

diff  --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 5b9d47151c7f5..02a6e04cb758c 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -506,7 +506,7 @@ struct DeathSizeClassConfig {
   static const scudo::uptr MinSizeLog = 10;
   static const scudo::uptr MidSizeLog = 10;
   static const scudo::uptr MaxSizeLog = 13;
-  static const scudo::u16 MaxNumCachedHint = 4;
+  static const scudo::u16 MaxNumCachedHint = 8;
   static const scudo::uptr MaxBytesCachedLog = 12;
   static const scudo::uptr SizeDelta = 0;
 };