[compiler-rt] Revert "[scudo] Store more blocks in each TransferBatch" (PR #83078)

Mon Feb 26 14:49:56 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: None (ChiaHungDuan)

<details>
<summary>Changes</summary>

Reverts llvm/llvm-project#70390

There's a bug caught by `ScudoCombinedTestReallocateInPlaceStress_DefaultConfig.ReallocateInPlaceStress` with gwp asan. It's an easy fix but given that this is a major change, I would like to revert it first

---

Patch is 23.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83078.diff


4 Files Affected:

- (modified) compiler-rt/lib/scudo/standalone/allocator_common.h (-7) 
- (modified) compiler-rt/lib/scudo/standalone/primary32.h (+51-55) 
- (modified) compiler-rt/lib/scudo/standalone/primary64.h (+71-82) 
- (modified) compiler-rt/lib/scudo/standalone/tests/primary_test.cpp (+19-15) 


``````````diff

diff --git a/compiler-rt/lib/scudo/standalone/allocator_common.h b/compiler-rt/lib/scudo/standalone/allocator_common.h
index 46dc7c0f3b914e..95f4776ac596dc 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_common.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_common.h
@@ -40,7 +40,6 @@ template <class SizeClassAllocator> struct TransferBatch {
     B->Count = static_cast<u16>(B->Count - N);
   }
   void clear() { Count = 0; }
-  bool empty() { return Count == 0; }
   void add(CompactPtrT P) {
     DCHECK_LT(Count, MaxNumCached);
     Batch[Count++] = P;
@@ -49,12 +48,6 @@ template <class SizeClassAllocator> struct TransferBatch {
     memcpy(Array, Batch, sizeof(Batch[0]) * Count);
     clear();
   }
-
-  void moveNToArray(CompactPtrT *Array, u16 N) {
-    DCHECK_LE(N, Count);
-    memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * Count);
-    Count -= N;
-  }
   u16 getCount() const { return Count; }
   bool isEmpty() const { return Count == 0U; }
   CompactPtrT get(u16 I) const {
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index c86e75b8fd66a8..4d03b282d000de 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -191,21 +191,38 @@ template <typename Config> class SizeClassAllocator32 {
     return BlockSize > PageSize;
   }
 
+  // Note that the `MaxBlockCount` will be used when we support arbitrary blocks
+  // count. Now it's the same as the number of blocks stored in the
+  // `TransferBatch`.
   u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
-                const u16 MaxBlockCount) {
+                UNUSED const u16 MaxBlockCount) {
+    TransferBatchT *B = popBatch(C, ClassId);
+    if (!B)
+      return 0;
+
+    const u16 Count = B->getCount();
+    DCHECK_GT(Count, 0U);
+    B->moveToArray(ToArray);
+
+    if (ClassId != SizeClassMap::BatchClassId)
+      C->deallocate(SizeClassMap::BatchClassId, B);
+
+    return Count;
+  }
+
+  TransferBatchT *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
     ScopedLock L(Sci->Mutex);
-
-    u16 PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
-    if (UNLIKELY(PopCount == 0)) {
+    TransferBatchT *B = popBatchImpl(C, ClassId, Sci);
+    if (UNLIKELY(!B)) {
       if (UNLIKELY(!populateFreeList(C, ClassId, Sci)))
-        return 0U;
-      PopCount = popBlocksImpl(C, ClassId, Sci, ToArray, MaxBlockCount);
-      DCHECK_NE(PopCount, 0U);
+        return nullptr;
+      B = popBatchImpl(C, ClassId, Sci);
+      // if `populateFreeList` succeeded, we are supposed to get free blocks.
+      DCHECK_NE(B, nullptr);
     }
-
-    return PopCount;
+    return B;
   }
 
   // Push the array of free blocks to the designated batch group.
@@ -493,7 +510,7 @@ template <typename Config> class SizeClassAllocator32 {
     // by TransferBatch is also free for use. We don't need to recycle the
     // TransferBatch. Note that the correctness is maintained by the invariant,
     //
-    //   Each popBlocks() request returns the entire TransferBatch. Returning
+    //   The unit of each popBatch() request is entire TransferBatch. Return
     //   part of the blocks in a TransferBatch is invalid.
     //
     // This ensures that TransferBatch won't leak the address itself while it's
@@ -617,7 +634,7 @@ template <typename Config> class SizeClassAllocator32 {
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
+      BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
 
       return BG;
     };
@@ -709,11 +726,14 @@ template <typename Config> class SizeClassAllocator32 {
     InsertBlocks(Cur, Array + Size - Count, Count);
   }
 
-  u16 popBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
-                    CompactPtrT *ToArray, const u16 MaxBlockCount)
+  // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
+  // group id will be considered first.
+  //
+  // The region mutex needs to be held while calling this method.
+  TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
       REQUIRES(Sci->Mutex) {
     if (Sci->FreeListInfo.BlockList.empty())
-      return 0U;
+      return nullptr;
 
     SinglyLinkedList<TransferBatchT> &Batches =
         Sci->FreeListInfo.BlockList.front()->Batches;
@@ -726,57 +746,33 @@ template <typename Config> class SizeClassAllocator32 {
       // Block used by `BatchGroup` is from BatchClassId. Turn the block into
       // `TransferBatch` with single block.
       TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
-      ToArray[0] =
-          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
+      TB->clear();
+      TB->add(
+          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
       Sci->FreeListInfo.PoppedBlocks += 1;
-      return 1U;
+      return TB;
     }
 
-    // So far, instead of always filling the blocks to `MaxBlockCount`, we only
-    // examine single `TransferBatch` to minimize the time spent on the primary
-    // allocator. Besides, the sizes of `TransferBatch` and
-    // `CacheT::getMaxCached()` may also impact the time spent on accessing the
-    // primary allocator.
-    // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
-    // blocks and/or adjust the size of `TransferBatch` according to
-    // `CacheT::getMaxCached()`.
     TransferBatchT *B = Batches.front();
+    Batches.pop_front();
     DCHECK_NE(B, nullptr);
     DCHECK_GT(B->getCount(), 0U);
 
-    // BachClassId should always take all blocks in the TransferBatch. Read the
-    // comment in `pushBatchClassBlocks()` for more details.
-    const u16 PopCount = ClassId == SizeClassMap::BatchClassId
-                             ? B->getCount()
-                             : Min(MaxBlockCount, B->getCount());
-    B->moveNToArray(ToArray, PopCount);
-
-    // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
-    // done without holding `Mutex`.
-    if (B->empty()) {
-      Batches.pop_front();
-      // `TransferBatch` of BatchClassId is self-contained, no need to
-      // deallocate. Read the comment in `pushBatchClassBlocks()` for more
-      // details.
+    if (Batches.empty()) {
+      BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
+      Sci->FreeListInfo.BlockList.pop_front();
+
+      // We don't keep BatchGroup with zero blocks to avoid empty-checking while
+      // allocating. Note that block used by constructing BatchGroup is recorded
+      // as free blocks in the last element of BatchGroup::Batches. Which means,
+      // once we pop the last TransferBatch, the block is implicitly
+      // deallocated.
       if (ClassId != SizeClassMap::BatchClassId)
-        C->deallocate(SizeClassMap::BatchClassId, B);
-
-      if (Batches.empty()) {
-        BatchGroupT *BG = Sci->FreeListInfo.BlockList.front();
-        Sci->FreeListInfo.BlockList.pop_front();
-
-        // We don't keep BatchGroup with zero blocks to avoid empty-checking
-        // while allocating. Note that block used for constructing BatchGroup is
-        // recorded as free blocks in the last element of BatchGroup::Batches.
-        // Which means, once we pop the last TransferBatch, the block is
-        // implicitly deallocated.
-        if (ClassId != SizeClassMap::BatchClassId)
-          C->deallocate(SizeClassMap::BatchClassId, BG);
-      }
+        C->deallocate(SizeClassMap::BatchClassId, BG);
     }
 
-    Sci->FreeListInfo.PoppedBlocks += PopCount;
-    return PopCount;
+    Sci->FreeListInfo.PoppedBlocks += B->getCount();
+    return B;
   }
 
   NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, SizeClassInfo *Sci)
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 971a65ee15f0fc..9a642d23620e1e 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -12,7 +12,6 @@
 #include "allocator_common.h"
 #include "bytemap.h"
 #include "common.h"
-#include "condition_variable.h"
 #include "list.h"
 #include "local_cache.h"
 #include "mem_map.h"
@@ -23,6 +22,8 @@
 #include "string_utils.h"
 #include "thread_annotations.h"
 
+#include "condition_variable.h"
+
 namespace scudo {
 
 // SizeClassAllocator64 is an allocator tuned for 64-bit address space.
@@ -220,24 +221,41 @@ template <typename Config> class SizeClassAllocator64 {
     DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
   }
 
+  // Note that the `MaxBlockCount` will be used when we support arbitrary blocks
+  // count. Now it's the same as the number of blocks stored in the
+  // `TransferBatch`.
   u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
-                const u16 MaxBlockCount) {
+                UNUSED const u16 MaxBlockCount) {
+    TransferBatchT *B = popBatch(C, ClassId);
+    if (!B)
+      return 0;
+
+    const u16 Count = B->getCount();
+    DCHECK_GT(Count, 0U);
+    B->moveToArray(ToArray);
+
+    if (ClassId != SizeClassMap::BatchClassId)
+      C->deallocate(SizeClassMap::BatchClassId, B);
+
+    return Count;
+  }
+
+  TransferBatchT *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     RegionInfo *Region = getRegionInfo(ClassId);
-    u16 PopCount = 0;
 
     {
       ScopedLock L(Region->FLLock);
-      PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
-      if (PopCount != 0U)
-        return PopCount;
+      TransferBatchT *B = popBatchImpl(C, ClassId, Region);
+      if (LIKELY(B))
+        return B;
     }
 
     bool ReportRegionExhausted = false;
+    TransferBatchT *B = nullptr;
 
     if (conditionVariableEnabled()) {
-      PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount,
-                                 ReportRegionExhausted);
+      B = popBatchWithCV(C, ClassId, Region, ReportRegionExhausted);
     } else {
       while (true) {
         // When two threads compete for `Region->MMLock`, we only want one of
@@ -246,15 +264,13 @@ template <typename Config> class SizeClassAllocator64 {
         ScopedLock ML(Region->MMLock);
         {
           ScopedLock FL(Region->FLLock);
-          PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
-          if (PopCount != 0U)
-            return PopCount;
+          if ((B = popBatchImpl(C, ClassId, Region)))
+            break;
         }
 
         const bool RegionIsExhausted = Region->Exhausted;
         if (!RegionIsExhausted)
-          PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
-                                                  MaxBlockCount);
+          B = populateFreeListAndPopBatch(C, ClassId, Region);
         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
         break;
       }
@@ -270,7 +286,7 @@ template <typename Config> class SizeClassAllocator64 {
         reportOutOfBatchClass();
     }
 
-    return PopCount;
+    return B;
   }
 
   // Push the array of free blocks to the designated batch group.
@@ -624,7 +640,7 @@ template <typename Config> class SizeClassAllocator64 {
     // by TransferBatch is also free for use. We don't need to recycle the
     // TransferBatch. Note that the correctness is maintained by the invariant,
     //
-    //   Each popBlocks() request returns the entire TransferBatch. Returning
+    //   The unit of each popBatch() request is entire TransferBatch. Return
     //   part of the blocks in a TransferBatch is invalid.
     //
     // This ensures that TransferBatch won't leak the address itself while it's
@@ -747,7 +763,7 @@ template <typename Config> class SizeClassAllocator64 {
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
+      BG->MaxCachedPerBatch = CacheT::getMaxCached(getSizeByClassId(ClassId));
 
       return BG;
     };
@@ -839,10 +855,9 @@ template <typename Config> class SizeClassAllocator64 {
     InsertBlocks(Cur, Array + Size - Count, Count);
   }
 
-  u16 popBlocksWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
-                      CompactPtrT *ToArray, const u16 MaxBlockCount,
-                      bool &ReportRegionExhausted) {
-    u16 PopCount = 0;
+  TransferBatchT *popBatchWithCV(CacheT *C, uptr ClassId, RegionInfo *Region,
+                                 bool &ReportRegionExhausted) {
+    TransferBatchT *B = nullptr;
 
     while (true) {
       // We only expect one thread doing the freelist refillment and other
@@ -863,8 +878,7 @@ template <typename Config> class SizeClassAllocator64 {
 
         const bool RegionIsExhausted = Region->Exhausted;
         if (!RegionIsExhausted)
-          PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
-                                                  MaxBlockCount);
+          B = populateFreeListAndPopBatch(C, ClassId, Region);
         ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
 
         {
@@ -891,8 +905,7 @@ template <typename Config> class SizeClassAllocator64 {
       // blocks were used up right after the refillment. Therefore, we have to
       // check if someone is still populating the freelist.
       ScopedLock FL(Region->FLLock);
-      PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
-      if (PopCount != 0U)
+      if (LIKELY(B = popBatchImpl(C, ClassId, Region)))
         break;
 
       if (!Region->isPopulatingFreeList)
@@ -905,19 +918,21 @@ template <typename Config> class SizeClassAllocator64 {
       // `pushBatchClassBlocks()` and `mergeGroupsToReleaseBack()`.
       Region->FLLockCV.wait(Region->FLLock);
 
-      PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
-      if (PopCount != 0U)
+      if (LIKELY(B = popBatchImpl(C, ClassId, Region)))
         break;
     }
 
-    return PopCount;
+    return B;
   }
 
-  u16 popBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
-                    CompactPtrT *ToArray, const u16 MaxBlockCount)
+  // Pop one TransferBatch from a BatchGroup. The BatchGroup with the smallest
+  // group id will be considered first.
+  //
+  // The region mutex needs to be held while calling this method.
+  TransferBatchT *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region)
       REQUIRES(Region->FLLock) {
     if (Region->FreeListInfo.BlockList.empty())
-      return 0U;
+      return nullptr;
 
     SinglyLinkedList<TransferBatchT> &Batches =
         Region->FreeListInfo.BlockList.front()->Batches;
@@ -930,64 +945,39 @@ template <typename Config> class SizeClassAllocator64 {
       // Block used by `BatchGroup` is from BatchClassId. Turn the block into
       // `TransferBatch` with single block.
       TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
-      ToArray[0] =
-          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
+      TB->clear();
+      TB->add(
+          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
       Region->FreeListInfo.PoppedBlocks += 1;
-      return 1U;
+      return TB;
     }
 
-    // So far, instead of always fill blocks to `MaxBlockCount`, we only exmaine
-    // single `TransferBatch` to minimize the time spent in the primary
-    // allocator. Besides, the sizes of `TransferBatch` and
-    // `CacheT::getMaxCached()` may also impact the time spent on accessing the
-    // primary allocator.
-    // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
-    // blocks and/or adjust the size of `TransferBatch` according to
-    // `CacheT::getMaxCached()`.
     TransferBatchT *B = Batches.front();
+    Batches.pop_front();
     DCHECK_NE(B, nullptr);
     DCHECK_GT(B->getCount(), 0U);
 
-    // BachClassId should always take all blocks in the TransferBatch. Read the
-    // comment in `pushBatchClassBlocks()` for more details.
-    const u16 PopCount = ClassId == SizeClassMap::BatchClassId
-                             ? B->getCount()
-                             : Min(MaxBlockCount, B->getCount());
-    B->moveNToArray(ToArray, PopCount);
-
-    // TODO(chiahungduan): The deallocation of unused BatchClassId blocks can be
-    // done without holding `FLLock`.
-    if (B->empty()) {
-      Batches.pop_front();
-      // `TransferBatch` of BatchClassId is self-contained, no need to
-      // deallocate. Read the comment in `pushBatchClassBlocks()` for more
-      // details.
-      if (ClassId != SizeClassMap::BatchClassId)
-        C->deallocate(SizeClassMap::BatchClassId, B);
-
-      if (Batches.empty()) {
-        BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
-        Region->FreeListInfo.BlockList.pop_front();
+    if (Batches.empty()) {
+      BatchGroupT *BG = Region->FreeListInfo.BlockList.front();
+      Region->FreeListInfo.BlockList.pop_front();
 
-        // We don't keep BatchGroup with zero blocks to avoid empty-checking
-        // while allocating. Note that block used for constructing BatchGroup is
-        // recorded as free blocks in the last element of BatchGroup::Batches.
-        // Which means, once we pop the last TransferBatch, the block is
-        // implicitly deallocated.
-        if (ClassId != SizeClassMap::BatchClassId)
-          C->deallocate(SizeClassMap::BatchClassId, BG);
-      }
+      // We don't keep BatchGroup with zero blocks to avoid empty-checking while
+      // allocating. Note that block used by constructing BatchGroup is recorded
+      // as free blocks in the last element of BatchGroup::Batches. Which means,
+      // once we pop the last TransferBatch, the block is implicitly
+      // deallocated.
+      if (ClassId != SizeClassMap::BatchClassId)
+        C->deallocate(SizeClassMap::BatchClassId, BG);
     }
 
-    Region->FreeListInfo.PoppedBlocks += PopCount;
+    Region->FreeListInfo.PoppedBlocks += B->getCount();
 
-    return PopCount;
+    return B;
   }
 
-  NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId,
-                                            RegionInfo *Region,
-                                            CompactPtrT *ToArray,
-                                            const u16 MaxBlockCount)
+  // Refill the freelist and return one batch.
+  NOINLINE TransferBatchT *populateFreeListAndPopBatch(CacheT *C, uptr ClassId,
+                                                       RegionInfo *Region)
       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
     const uptr Size = getSizeByClassId(ClassId);
     const u16 MaxCount = CacheT::getMaxCached(Size);
@@ -1004,7 +994,7 @@ template <typename Config> class SizeClassAllocator64 {
       const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
       if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
         Region->Exhausted = true;
-        return 0U;
+        return nullptr;
       }
 
       if (UNLIKELY(!Region->MemMapInfo.MemMap.remap(
@@ -1012,7 +1002,7 @@ template <typename Config> class SizeClassAllocator64 {
               MAP_ALLOWNOMEM | MAP_RESIZABLE |
                   (useMemoryTagging<Config>(Options.load()) ? MAP_MEMTAG
                                                             : 0)))) {
-        return 0U;
+        return nullptr;
       }
       Region->MemMapInfo.MappedUser += MapSize;
       C->getStats().add(StatMapped, MapSize);
@@ -1059,9 +1049,8 @@ template <typename Config> class SizeClassAllocator64 {
       pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks);
     }
 
-    const u16 PopCount =
-        popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
-    DCHECK_NE(PopCount, 0U);
+    TransferBatchT *B = popBatchImpl(C, ClassId, Region);
+    DCHECK_NE(B, nullptr);
 
     // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record
     // the requests from `PushBlocks` and `PopBatch` which are external
@@ -1073,7 +1062,7 @@ template <typename Config> class SizeClassAllocator64 {
     C->getStats().add(StatFree, AllocatedUser);
     Region->MemMapInfo.AllocatedUser += AllocatedUser;
 
-    return PopCount;
+    return B;
   }
 
   void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Re...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/83078