[compiler-rt] 1ff3a5d - [scudo] Allow pushing single block to the freelist of BatchClass

Fri Jul 7 10:28:37 PDT 2023

Author: Chia-hung Duan
Date: 2023-07-07T17:27:48Z
New Revision: 1ff3a5d9bb110b25ca203af5e2dc6e2fe777df72

URL: https://github.com/llvm/llvm-project/commit/1ff3a5d9bb110b25ca203af5e2dc6e2fe777df72
DIFF: https://github.com/llvm/llvm-project/commit/1ff3a5d9bb110b25ca203af5e2dc6e2fe777df72.diff

LOG: [scudo] Allow pushing single block to the freelist of BatchClass

This CL removes the restriction that pushing blocks into BatchClassId
can only be done when freelist is not empty. Without this constraint,
BatchClassId is also available for gathering blocks into groups.

Reviewed By: cferris

Differential Revision: https://reviews.llvm.org/D153492

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index c8dd6977b9ab09..c2fb3e7dd1fc1a 100644

--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -135,18 +135,29 @@ template <typename Config> class SizeClassAllocator32 {
 
       const uptr BlockSize = getSizeByClassId(I);
       DCHECK_EQ(TotalBlocks, Sci->AllocatedUser / BlockSize);
+      DCHECK_EQ(Sci->FreeListInfo.PushedBlocks, Sci->FreeListInfo.PoppedBlocks);
     }
 
     SizeClassInfo *Sci = getSizeClassInfo(SizeClassMap::BatchClassId);
     ScopedLock L1(Sci->Mutex);
     uptr TotalBlocks = 0;
-    for (BatchGroup &BG : Sci->FreeListInfo.BlockList)
-      for (const auto &It : BG.Batches)
-        TotalBlocks += It.getCount();
+    for (BatchGroup &BG : Sci->FreeListInfo.BlockList) {
+      if (LIKELY(!BG.Batches.empty())) {
+        for (const auto &It : BG.Batches)
+          TotalBlocks += It.getCount();
+      } else {
+        // `BatchGroup` with empty freelist doesn't have `TransferBatch` record
+        // itself.
+        ++TotalBlocks;
+      }
+    }
 
     const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId);
     DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists,
               Sci->AllocatedUser / BlockSize);
+    const uptr BlocksInUse =
+        Sci->FreeListInfo.PoppedBlocks - Sci->FreeListInfo.PushedBlocks;
+    DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
   }
 
   CompactPtrT compactPtr(UNUSED uptr ClassId, uptr Ptr) const {
@@ -199,15 +210,7 @@ template <typename Config> class SizeClassAllocator32 {
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
     if (ClassId == SizeClassMap::BatchClassId) {
       ScopedLock L(Sci->Mutex);
-      // Constructing a batch group in the free list will use two blocks in
-      // BatchClassId. If we are pushing BatchClassId blocks, we will use the
-      // blocks in the array directly (can't delegate local cache which will
-      // cause a recursive allocation). However, The number of free blocks may
-      // be less than two. Therefore, populate the free list before inserting
-      // the blocks.
-      if (Size == 1 && !populateFreeList(C, ClassId, Sci))
-        return;
-      pushBlocksImpl(C, ClassId, Sci, Array, Size);
+      pushBatchClassBlocks(Sci, Array, Size);
       return;
     }
 
@@ -450,6 +453,116 @@ template <typename Config> class SizeClassAllocator32 {
     return &SizeClassInfoArray[ClassId];
   }
 
+  void pushBatchClassBlocks(SizeClassInfo *Sci, CompactPtrT *Array, u32 Size)
+      REQUIRES(Sci->Mutex) {
+    DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId));
+
+    // Free blocks are recorded by TransferBatch in freelist for all
+    // size-classes. In addition, TransferBatch is allocated from BatchClassId.
+    // In order not to use additional block to record the free blocks in
+    // BatchClassId, they are self-contained. I.e., A TransferBatch records the
+    // block address of itself. See the figure below:
+    //
+    // TransferBatch at 0xABCD
+    // +----------------------------+
+    // | Free blocks' addr          |
+    // | +------+------+------+     |
+    // | |0xABCD|...   |...   |     |
+    // | +------+------+------+     |
+    // +----------------------------+
+    //
+    // When we allocate all the free blocks in the TransferBatch, the block used
+    // by TransferBatch is also free for use. We don't need to recycle the
+    // TransferBatch. Note that the correctness is maintained by the invariant,
+    //
+    //   The unit of each popBatch() request is entire TransferBatch. Return
+    //   part of the blocks in a TransferBatch is invalid.
+    //
+    // This ensures that TransferBatch won't leak the address itself while it's
+    // still holding other valid data.
+    //
+    // Besides, BatchGroup is also allocated from BatchClassId and has its
+    // address recorded in the TransferBatch too. To maintain the correctness,
+    //
+    //   The address of BatchGroup is always recorded in the last TransferBatch
+    //   in the freelist (also imply that the freelist should only be
+    //   updated with push_front). Once the last TransferBatch is popped,
+    //   the block used by BatchGroup is also free for use.
+    //
+    // With this approach, the blocks used by BatchGroup and TransferBatch are
+    // reusable and don't need additional space for them.
+
+    Sci->FreeListInfo.PushedBlocks += Size;
+    BatchGroup *BG = Sci->FreeListInfo.BlockList.front();
+
+    if (BG == nullptr) {
+      // Construct `BatchGroup` on the last element.
+      BG = reinterpret_cast<BatchGroup *>(
+          decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
+      --Size;
+      BG->Batches.clear();
+      // BatchClass hasn't enabled memory group. Use `0` to indicate there's no
+      // memory group here.
+      BG->CompactPtrGroupBase = 0;
+      // `BG` is also the block of BatchClassId. Note that this is 
diff erent
+      // from `CreateGroup` in `pushBlocksImpl`
+      BG->PushedBlocks = 1;
+      BG->BytesInBGAtLastCheckpoint = 0;
+      BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
+          getSizeByClassId(SizeClassMap::BatchClassId));
+
+      Sci->FreeListInfo.BlockList.push_front(BG);
+    }
+
+    if (UNLIKELY(Size == 0))
+      return;
+
+    // This happens under 2 cases.
+    //   1. just allocated a new `BatchGroup`.
+    //   2. Only 1 block is pushed when the freelist is empty.
+    if (BG->Batches.empty()) {
+      // Construct the `TransferBatch` on the last element.
+      TransferBatch *TB = reinterpret_cast<TransferBatch *>(
+          decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
+      TB->clear();
+      // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
+      // recorded in the TransferBatch.
+      TB->add(Array[Size - 1]);
+      TB->add(
+          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
+      --Size;
+      DCHECK_EQ(BG->PushedBlocks, 1U);
+      // `TB` is also the block of BatchClassId.
+      BG->PushedBlocks += 1;
+      BG->Batches.push_front(TB);
+    }
+
+    TransferBatch *CurBatch = BG->Batches.front();
+    DCHECK_NE(CurBatch, nullptr);
+
+    for (u32 I = 0; I < Size;) {
+      u16 UnusedSlots =
+          static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
+      if (UnusedSlots == 0) {
+        CurBatch = reinterpret_cast<TransferBatch *>(
+            decompactPtr(SizeClassMap::BatchClassId, Array[I]));
+        CurBatch->clear();
+        // Self-contained
+        CurBatch->add(Array[I]);
+        ++I;
+        // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of
+        // BatchClassId.
+        BG->Batches.push_front(CurBatch);
+        UnusedSlots = BG->MaxCachedPerBatch - 1;
+      }
+      // `UnusedSlots` is u16 so the result will be also fit in u16.
+      const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
+      CurBatch->appendFromArray(&Array[I], AppendSize);
+      I += AppendSize;
+    }
+
+    BG->PushedBlocks += Size;
+  }
   // Push the blocks to their batch group. The layout will be like,
   //
   // FreeListInfo.BlockList - > BG -> BG -> BG
@@ -471,69 +584,16 @@ template <typename Config> class SizeClassAllocator32 {
   void pushBlocksImpl(CacheT *C, uptr ClassId, SizeClassInfo *Sci,
                       CompactPtrT *Array, u32 Size, bool SameGroup = false)
       REQUIRES(Sci->Mutex) {
+    DCHECK_NE(ClassId, SizeClassMap::BatchClassId);
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroup *BG = nullptr;
-      TransferBatch *TB = nullptr;
-      if (ClassId == SizeClassMap::BatchClassId) {
-        DCHECK_GE(Size, 2U);
-
-        // Free blocks are recorded by TransferBatch in freelist, blocks of
-        // BatchClassId are included. In order not to use additional memory to
-        // record blocks of BatchClassId, they are self-contained. I.e., A
-        // TransferBatch may record the block address of itself. See the figure
-        // below:
-        //
-        // TransferBatch at 0xABCD
-        // +----------------------------+
-        // | Free blocks' addr          |
-        // | +------+------+------+     |
-        // | |0xABCD|...   |...   |     |
-        // | +------+------+------+     |
-        // +----------------------------+
-        //
-        // The safeness of manipulating TransferBatch is kept by the invariant,
-        //
-        //   The unit of each pop-block request is a TransferBatch. Return
-        //   part of the blocks in a TransferBatch is not allowed.
-        //
-        // This ensures that TransferBatch won't leak the address itself while
-        // it's still holding other valid data.
-        //
-        // Besides, BatchGroup uses the same size-class as TransferBatch does
-        // and its address is recorded in the TransferBatch too. To maintain the
-        // safeness, the invariant to keep is,
-        //
-        //   The address of itself is always recorded in the last TransferBatch
-        //   of the freelist (also imply that the freelist should only be
-        //   updated with push_front). Once the last TransferBatch is popped,
-        //   the BatchGroup becomes invalid.
-        //
-        // As a result, the blocks used by BatchGroup and TransferBatch are
-        // reusable and don't need additional space for them.
-        BG = reinterpret_cast<BatchGroup *>(
-            decompactPtr(ClassId, Array[Size - 1]));
-        BG->Batches.clear();
-
-        TB = reinterpret_cast<TransferBatch *>(
-            decompactPtr(ClassId, Array[Size - 2]));
-        TB->clear();
-
-        // Append the blocks used by BatchGroup and TransferBatch immediately so
-        // that we ensure that they are in the last TransBatch.
-        TB->appendFromArray(Array + Size - 2, 2);
-        Size -= 2;
-      } else {
-        BG = C->createGroup();
-        BG->Batches.clear();
-
-        TB = C->createBatch(ClassId, nullptr);
-        TB->clear();
-      }
+      BatchGroup *BG = C->createGroup();
+      BG->Batches.clear();
+      TransferBatch *TB = C->createBatch(ClassId, nullptr);
+      TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
-      // TODO(chiahungduan): Avoid the use of push_back() in `Batches`.
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
@@ -572,16 +632,6 @@ template <typename Config> class SizeClassAllocator32 {
     Sci->FreeListInfo.PushedBlocks += Size;
     BatchGroup *Cur = Sci->FreeListInfo.BlockList.front();
 
-    if (ClassId == SizeClassMap::BatchClassId) {
-      if (Cur == nullptr) {
-        // Don't need to classify BatchClassId.
-        Cur = CreateGroup(/*CompactPtrGroupBase=*/0);
-        Sci->FreeListInfo.BlockList.push_front(Cur);
-      }
-      InsertBlocks(Cur, Array, Size);
-      return;
-    }
-
     // In the following, `Cur` always points to the BatchGroup for blocks that
     // will be pushed next. `Prev` is the element right before `Cur`.
     BatchGroup *Prev = nullptr;
@@ -652,7 +702,21 @@ template <typename Config> class SizeClassAllocator32 {
 
     SinglyLinkedList<TransferBatch> &Batches =
         Sci->FreeListInfo.BlockList.front()->Batches;
-    DCHECK(!Batches.empty());
+
+    if (Batches.empty()) {
+      DCHECK_EQ(ClassId, SizeClassMap::BatchClassId);
+      BatchGroup *BG = Sci->FreeListInfo.BlockList.front();
+      Sci->FreeListInfo.BlockList.pop_front();
+
+      // Block used by `BatchGroup` is from BatchClassId. Turn the block into
+      // `TransferBatch` with single block.
+      TransferBatch *TB = reinterpret_cast<TransferBatch *>(BG);
+      TB->clear();
+      TB->add(
+          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
+      Sci->FreeListInfo.PoppedBlocks += 1;
+      return TB;
+    }
 
     TransferBatch *B = Batches.front();
     Batches.pop_front();
@@ -742,8 +806,7 @@ template <typename Config> class SizeClassAllocator32 {
       pushBlocksImpl(C, ClassId, Sci, &ShuffleArray[NumberOfBlocks - N], N,
                      /*SameGroup=*/true);
     } else {
-      pushBlocksImpl(C, ClassId, Sci, ShuffleArray, NumberOfBlocks,
-                     /*SameGroup=*/true);
+      pushBatchClassBlocks(Sci, ShuffleArray, NumberOfBlocks);
     }
 
     // Note that `PushedBlocks` and `PoppedBlocks` are supposed to only record

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index dd58ebabba0b30..df4e8931c6cb94 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -173,6 +173,8 @@ template <typename Config> class SizeClassAllocator64 {
       }
 
       DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize);
+      DCHECK_EQ(Region->FreeListInfo.PushedBlocks,
+                Region->FreeListInfo.PoppedBlocks);
     }
 
     RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId);
@@ -180,11 +182,23 @@ template <typename Config> class SizeClassAllocator64 {
     ScopedLock FL(Region->FLLock);
     const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId);
     uptr TotalBlocks = 0;
-    for (BatchGroup &BG : Region->FreeListInfo.BlockList)
-      for (const auto &It : BG.Batches)
-        TotalBlocks += It.getCount();
+    for (BatchGroup &BG : Region->FreeListInfo.BlockList) {
+      if (LIKELY(!BG.Batches.empty())) {
+        for (const auto &It : BG.Batches)
+          TotalBlocks += It.getCount();
+      } else {
+        // `BatchGroup` with empty freelist doesn't have `TransferBatch` record
+        // itself.
+        ++TotalBlocks;
+      }
+    }
     DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists,
               Region->MemMapInfo.AllocatedUser / BlockSize);
+    DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
+              Region->FreeListInfo.PushedBlocks);
+    const uptr BlocksInUse =
+        Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
+    DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
   }
 
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
@@ -232,6 +246,11 @@ template <typename Config> class SizeClassAllocator64 {
           "Scudo OOM: The process has exhausted %zuM for size class %zu.\n",
           RegionSize >> 20, getSizeByClassId(ClassId));
       Str.output();
+
+      // Theoretically, BatchClass shouldn't be used up. Abort immediately  when
+      // it happens.
+      if (ClassId == SizeClassMap::BatchClassId)
+        reportOutOfBatchClass();
     }
 
     return B;
@@ -244,68 +263,8 @@ template <typename Config> class SizeClassAllocator64 {
 
     RegionInfo *Region = getRegionInfo(ClassId);
     if (ClassId == SizeClassMap::BatchClassId) {
-      // Constructing a batch group in the free list will use two blocks in
-      // BatchClassId. If we are pushing BatchClassId blocks, we will use the
-      // blocks in the array directly (can't delegate local cache which will
-      // cause a recursive allocation). However, The number of free blocks may
-      // be less than two. Therefore, populate the freelist before inserting the
-      // blocks.
-      TransferBatch *B = nullptr;
-      while (true) {
-        // TODO(chiahungduan): Move the lock right before the call of
-        // populateFreeListAndPopBatch() by using condition variable. See more
-        // details in the comment of popBatch().
-        ScopedLock L(Region->MMLock);
-        {
-          ScopedLock L(Region->FLLock);
-          const bool NeedToRefill = Size == 1U &&
-                                    Region->FreeListInfo.BlockList.empty() &&
-                                    B == nullptr;
-          if (UNLIKELY(!NeedToRefill)) {
-            if (UNLIKELY(B)) {
-              // Even though we always populate the blocks with the number which
-              // is multiple of TransferBatch::getMaxCached() , the top
-              // `TransferBatch` from the freelist may still have fewer elements
-              // than the size of TransferBatch::getMaxCached() because we
-              // always fill up the top `TransferBatch` first when it's not
-              // full.
-              // When this happens, simply push the block in `TransferBatch` and
-              // `Array` together.
-              if (UNLIKELY(B->getCount() == 1)) {
-                DCHECK_EQ(Size, 1U);
-                B->appendFromArray(Array, 1U);
-                Size = 0;
-              }
-              pushBlocksImpl(C, SizeClassMap::BatchClassId, Region,
-                             B->getRawArray(), B->getCount());
-              CHECK(!Region->FreeListInfo.BlockList.empty());
-              if (Size == 0)
-                return;
-            }
-            pushBlocksImpl(C, SizeClassMap::BatchClassId, Region, Array, Size);
-            return;
-          }
-        }
-
-        // Note that this batch will be pushed again and it's only used to
-        // ensure we have enough blocks to construct batch group.
-        B = populateFreeListAndPopBatch(C, SizeClassMap::BatchClassId, Region);
-        if (!B)
-          break;
-      }
-
-      // At here, it hints that the BatchClass region has been used up. Dump the
-      // region stats before the program shutdown.
-      ScopedString Str;
-      getStats(&Str);
-      Str.append(
-          "Scudo OOM: The process has exhausted %zuM for size class %zu.\n",
-          RegionSize >> 20, getSizeByClassId(ClassId));
-      Str.output();
-      // Theoretically, BatchClass shouldn't be used up. Abort immediately
-      // when it happens.
-      reportOutOfBatchClass();
-
+      ScopedLock L(Region->FLLock);
+      pushBatchClassBlocks(Region, Array, Size);
       return;
     }
 
@@ -606,6 +565,117 @@ template <typename Config> class SizeClassAllocator64 {
     return BlockSize > PageSize;
   }
 
+  void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size)
+      REQUIRES(Region->FLLock) {
+    DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId));
+
+    // Free blocks are recorded by TransferBatch in freelist for all
+    // size-classes. In addition, TransferBatch is allocated from BatchClassId.
+    // In order not to use additional block to record the free blocks in
+    // BatchClassId, they are self-contained. I.e., A TransferBatch records the
+    // block address of itself. See the figure below:
+    //
+    // TransferBatch at 0xABCD
+    // +----------------------------+
+    // | Free blocks' addr          |
+    // | +------+------+------+     |
+    // | |0xABCD|...   |...   |     |
+    // | +------+------+------+     |
+    // +----------------------------+
+    //
+    // When we allocate all the free blocks in the TransferBatch, the block used
+    // by TransferBatch is also free for use. We don't need to recycle the
+    // TransferBatch. Note that the correctness is maintained by the invariant,
+    //
+    //   The unit of each popBatch() request is entire TransferBatch. Return
+    //   part of the blocks in a TransferBatch is invalid.
+    //
+    // This ensures that TransferBatch won't leak the address itself while it's
+    // still holding other valid data.
+    //
+    // Besides, BatchGroup is also allocated from BatchClassId and has its
+    // address recorded in the TransferBatch too. To maintain the correctness,
+    //
+    //   The address of BatchGroup is always recorded in the last TransferBatch
+    //   in the freelist (also imply that the freelist should only be
+    //   updated with push_front). Once the last TransferBatch is popped,
+    //   the block used by BatchGroup is also free for use.
+    //
+    // With this approach, the blocks used by BatchGroup and TransferBatch are
+    // reusable and don't need additional space for them.
+
+    Region->FreeListInfo.PushedBlocks += Size;
+    BatchGroup *BG = Region->FreeListInfo.BlockList.front();
+
+    if (BG == nullptr) {
+      // Construct `BatchGroup` on the last element.
+      BG = reinterpret_cast<BatchGroup *>(
+          decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
+      --Size;
+      BG->Batches.clear();
+      // BatchClass hasn't enabled memory group. Use `0` to indicate there's no
+      // memory group here.
+      BG->CompactPtrGroupBase = 0;
+      // `BG` is also the block of BatchClassId. Note that this is 
diff erent
+      // from `CreateGroup` in `pushBlocksImpl`
+      BG->PushedBlocks = 1;
+      BG->BytesInBGAtLastCheckpoint = 0;
+      BG->MaxCachedPerBatch = TransferBatch::getMaxCached(
+          getSizeByClassId(SizeClassMap::BatchClassId));
+
+      Region->FreeListInfo.BlockList.push_front(BG);
+    }
+
+    if (UNLIKELY(Size == 0))
+      return;
+
+    // This happens under 2 cases.
+    //   1. just allocated a new `BatchGroup`.
+    //   2. Only 1 block is pushed when the freelist is empty.
+    if (BG->Batches.empty()) {
+      // Construct the `TransferBatch` on the last element.
+      TransferBatch *TB = reinterpret_cast<TransferBatch *>(
+          decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
+      TB->clear();
+      // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
+      // recorded in the TransferBatch.
+      TB->add(Array[Size - 1]);
+      TB->add(
+          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
+      --Size;
+      DCHECK_EQ(BG->PushedBlocks, 1U);
+      // `TB` is also the block of BatchClassId.
+      BG->PushedBlocks += 1;
+      BG->Batches.push_front(TB);
+    }
+
+    TransferBatch *CurBatch = BG->Batches.front();
+    DCHECK_NE(CurBatch, nullptr);
+
+    for (u32 I = 0; I < Size;) {
+      u16 UnusedSlots =
+          static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
+      if (UnusedSlots == 0) {
+        CurBatch = reinterpret_cast<TransferBatch *>(
+            decompactPtr(SizeClassMap::BatchClassId, Array[I]));
+        CurBatch->clear();
+        // Self-contained
+        CurBatch->add(Array[I]);
+        ++I;
+        // TODO(chiahungduan): Avoid the use of push_back() in `Batches` of
+        // BatchClassId.
+        BG->Batches.push_front(CurBatch);
+        UnusedSlots = BG->MaxCachedPerBatch - 1;
+      }
+      // `UnusedSlots` is u16 so the result will be also fit in u16.
+      const u16 AppendSize = static_cast<u16>(Min<u32>(UnusedSlots, Size - I));
+      CurBatch->appendFromArray(&Array[I], AppendSize);
+      I += AppendSize;
+    }
+
+    BG->PushedBlocks += Size;
+  }
+
   // Push the blocks to their batch group. The layout will be like,
   //
   // FreeListInfo.BlockList - > BG -> BG -> BG
@@ -622,74 +692,19 @@ template <typename Config> class SizeClassAllocator64 {
   // that we can get better performance of maintaining sorted property.
   // Use `SameGroup=true` to indicate that all blocks in the array are from the
   // same group then we will skip checking the group id of each block.
-  //
-  // The region mutex needs to be held while calling this method.
   void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
                       CompactPtrT *Array, u32 Size, bool SameGroup = false)
       REQUIRES(Region->FLLock) {
+    DCHECK_NE(ClassId, SizeClassMap::BatchClassId);
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
-      BatchGroup *BG = nullptr;
-      TransferBatch *TB = nullptr;
-      if (ClassId == SizeClassMap::BatchClassId) {
-        DCHECK_GE(Size, 2U);
-
-        // Free blocks are recorded by TransferBatch in freelist, blocks of
-        // BatchClassId are included. In order not to use additional memory to
-        // record blocks of BatchClassId, they are self-contained. I.e., A
-        // TransferBatch may record the block address of itself. See the figure
-        // below:
-        //
-        // TransferBatch at 0xABCD
-        // +----------------------------+
-        // | Free blocks' addr          |
-        // | +------+------+------+     |
-        // | |0xABCD|...   |...   |     |
-        // | +------+------+------+     |
-        // +----------------------------+
-        //
-        // The safeness of manipulating TransferBatch is kept by the invariant,
-        //
-        //   The unit of each pop-block request is a TransferBatch. Return
-        //   part of the blocks in a TransferBatch is not allowed.
-        //
-        // This ensures that TransferBatch won't leak the address itself while
-        // it's still holding other valid data.
-        //
-        // Besides, BatchGroup uses the same size-class as TransferBatch does
-        // and its address is recorded in the TransferBatch too. To maintain the
-        // safeness, the invariant to keep is,
-        //
-        //   The address of itself is always recorded in the last TransferBatch
-        //   of the freelist (also imply that the freelist should only be
-        //   updated with push_front). Once the last TransferBatch is popped,
-        //   the BatchGroup becomes invalid.
-        //
-        // As a result, the blocks used by BatchGroup and TransferBatch are
-        // reusable and don't need additional space for them.
-        BG = reinterpret_cast<BatchGroup *>(
-            decompactPtr(ClassId, Array[Size - 1]));
-        BG->Batches.clear();
-
-        TB = reinterpret_cast<TransferBatch *>(
-            decompactPtr(ClassId, Array[Size - 2]));
-        TB->clear();
-
-        // Append the blocks used by BatchGroup and TransferBatch immediately so
-        // that we ensure that they are in the last TransBatch.
-        TB->appendFromArray(Array + Size - 2, 2);
-        Size -= 2;
-      } else {
-        BG = C->createGroup();
-        BG->Batches.clear();
-
-        TB = C->createBatch(ClassId, nullptr);
-        TB->clear();
-      }
+      BatchGroup *BG = C->createGroup();
+      BG->Batches.clear();
+      TransferBatch *TB = C->createBatch(ClassId, nullptr);
+      TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
-      // TODO(chiahungduan): Avoid the use of push_back() in `Batches`.
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
@@ -808,7 +823,21 @@ template <typename Config> class SizeClassAllocator64 {
 
     SinglyLinkedList<TransferBatch> &Batches =
         Region->FreeListInfo.BlockList.front()->Batches;
-    DCHECK(!Batches.empty());
+
+    if (Batches.empty()) {
+      DCHECK_EQ(ClassId, SizeClassMap::BatchClassId);
+      BatchGroup *BG = Region->FreeListInfo.BlockList.front();
+      Region->FreeListInfo.BlockList.pop_front();
+
+      // Block used by `BatchGroup` is from BatchClassId. Turn the block into
+      // `TransferBatch` with single block.
+      TransferBatch *TB = reinterpret_cast<TransferBatch *>(BG);
+      TB->clear();
+      TB->add(
+          compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB)));
+      Region->FreeListInfo.PoppedBlocks += 1;
+      return TB;
+    }
 
     TransferBatch *B = Batches.front();
     Batches.pop_front();
@@ -904,8 +933,7 @@ template <typename Config> class SizeClassAllocator64 {
       pushBlocksImpl(C, ClassId, Region, &ShuffleArray[NumberOfBlocks - N], N,
                      /*SameGroup=*/true);
     } else {
-      pushBlocksImpl(C, ClassId, Region, ShuffleArray, NumberOfBlocks,
-                     /*SameGroup=*/true);
+      pushBatchClassBlocks(Region, ShuffleArray, NumberOfBlocks);
     }
 
     TransferBatch *B = popBatchImpl(C, ClassId, Region);