[compiler-rt] [scudo] Add EnableMultiRegions mode (PR #98076)

Fri Jun 13 21:06:42 PDT 2025

https://github.com/ChiaHungDuan updated https://github.com/llvm/llvm-project/pull/98076

>From c57763dbdcee3cbabfa0c530639a83892ee59be2 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Thu, 4 Jul 2024 22:57:25 +0000
Subject: [PATCH 1/6] [scudo] Add support of assigning the alignment in
 ReservedMemory

---
 .../lib/scudo/standalone/mem_map_base.h       |  6 ++++--
 .../lib/scudo/standalone/mem_map_fuchsia.cpp  |  4 +++-
 .../lib/scudo/standalone/mem_map_fuchsia.h    |  3 ++-
 .../lib/scudo/standalone/mem_map_linux.cpp    | 19 +++++++++++++++++--
 .../lib/scudo/standalone/mem_map_linux.h      |  3 ++-
 5 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/mem_map_base.h b/compiler-rt/lib/scudo/standalone/mem_map_base.h
index 99ab0cba604fc..f4261f035d778 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_base.h
+++ b/compiler-rt/lib/scudo/standalone/mem_map_base.h
@@ -93,9 +93,11 @@ template <class Derived, typename MemMapTy> class ReservedMemory {
   constexpr ReservedMemory() = default;
 
   // Reserve a chunk of memory at a suggested address.
-  bool create(uptr Addr, uptr Size, const char *Name, uptr Flags = 0) {
+  bool create(uptr Addr, uptr Size, const char *Name, uptr Flags = 0,
+              uptr Alignment = getPageSizeCached()) {
     DCHECK(!isCreated());
-    return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags);
+    DCHECK_EQ(Alignment % getPageSizeCached(), 0U);
+    return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags, Alignment);
   }
 
   // Release the entire reserved memory.
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp
index fc793abf44cda..dca6c717519e3 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp
+++ b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.cpp
@@ -227,7 +227,9 @@ void MemMapFuchsia::setMemoryPermissionImpl(uptr Addr, uptr Size, uptr Flags) {
 }
 
 bool ReservedMemoryFuchsia::createImpl(UNUSED uptr Addr, uptr Size,
-                                       UNUSED const char *Name, uptr Flags) {
+                                       UNUSED const char *Name, uptr Flags,
+                                       UNUSED uptr Alignment) {
+  // TODO: Add the support of alignment.
   const bool AllowNoMem = !!(Flags & MAP_ALLOWNOMEM);
 
   // Reserve memory by mapping the placeholder VMO without any permission.
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h
index 2e66f89cfca55..3adab733645c4 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h
+++ b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h
@@ -57,7 +57,8 @@ class ReservedMemoryFuchsia final
 public:
   constexpr ReservedMemoryFuchsia() = default;
 
-  bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags);
+  bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags,
+                  uptr Alignment);
   void releaseImpl();
   MemMapT dispatchImpl(uptr Addr, uptr Size);
   uptr getBaseImpl() { return Base; }
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp b/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp
index 783c4f0d9ab0f..943b528e379d0 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp
+++ b/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp
@@ -127,11 +127,26 @@ void MemMapLinux::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) {
 }
 
 bool ReservedMemoryLinux::createImpl(uptr Addr, uptr Size, const char *Name,
-                                     uptr Flags) {
+                                     uptr Flags, uptr Alignment) {
   ReservedMemoryLinux::MemMapT MemMap;
-  if (!MemMap.map(Addr, Size, Name, Flags | MAP_NOACCESS))
+  uptr MapSize = Size;
+  if (Alignment != getPageSizeCached())
+    MapSize += Alignment;
+  if (!MemMap.map(Addr, MapSize, Name, Flags | MAP_NOACCESS))
     return false;
 
+  if (Alignment != getPageSizeCached()) {
+    uptr Offset = MemMap.getBase() % Alignment;
+    if (Offset != 0) {
+      Offset = Alignment - Offset;
+      MemMap.unmap(MemMap.getBase(), Offset);
+    }
+    MemMap.unmap(MemMap.getBase() + Size, MemMap.getCapacity() - Size);
+  }
+
+  DCHECK_EQ(MemMap.getBase() % Alignment, 0);
+  DCHECK_EQ(MemMap.getCapacity(), Size);
+
   MapBase = MemMap.getBase();
   MapCapacity = MemMap.getCapacity();
 
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_linux.h b/compiler-rt/lib/scudo/standalone/mem_map_linux.h
index 7a89b3bff5ed1..9f61d8d1f47ef 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_linux.h
+++ b/compiler-rt/lib/scudo/standalone/mem_map_linux.h
@@ -51,7 +51,8 @@ class ReservedMemoryLinux final
   uptr getCapacityImpl() { return MapCapacity; }
 
   // These threes are specific to `ReservedMemory`.
-  bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags);
+  bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags,
+                  uptr Alignment);
   void releaseImpl();
   MemMapT dispatchImpl(uptr Addr, uptr Size);
 

>From 70bfda98a6ed9da18bbda33c8431601d34dc7dc0 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Tue, 30 Apr 2024 18:14:42 +0000
Subject: [PATCH 2/6] [scudo] Add EnableMultiRegions mode

Instead of having single region for a size class, this mode increases
the number of regions when one is exhausted. This reduces the
fragmentation issue when one region is exhausted and gives the finer
grunularity of choosing the size classes. However, it inevitably
introduces some performance overhead because of the management of
several regions for single size class. Currently, it's an experimental
option and expected to see performance turbulance for a little bit.

See more details and constraints in allocator_config.def.
---
 .../lib/scudo/standalone/allocator_config.def |  14 +
 compiler-rt/lib/scudo/standalone/primary32.h  |   2 +-
 compiler-rt/lib/scudo/standalone/primary64.h  | 644 ++++++++++++++----
 compiler-rt/lib/scudo/standalone/release.h    |   6 +-
 .../scudo/standalone/tests/combined_test.cpp  |  36 +-
 .../scudo/standalone/tests/primary_test.cpp   |  27 +-
 6 files changed, 602 insertions(+), 127 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.def b/compiler-rt/lib/scudo/standalone/allocator_config.def
index ce37b1cfaedcc..a14541f519f7c 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_config.def
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.def
@@ -84,6 +84,20 @@ PRIMARY_OPTIONAL(const uptr, CompactPtrScale, SCUDO_MIN_ALIGNMENT_LOG)
 // Indicates support for offsetting the start of a region by a random number of
 // pages. This is only used if `EnableContiguousRegions` is enabled.
 PRIMARY_OPTIONAL(const bool, EnableRandomOffset, false)
+
+// This allows each size class to have multiple regions instead of one. Note
+// that this is an experimental option so it has a few constraints while using.
+//   a. Pointer compaction is diabled. Which means `CompactPtrT` needs to be the
+//      pointer integral type, i.e., uptr.
+//   b. `EnableRandomOffset` needs to be false. Pointer grouping requires
+//      the beginning of allocation address of a region to be aligned with
+//      `GroupSizeLog`. Without pointer compaction, it relies the region to be
+//      allocated with proper alignment and the random offset will break the
+//      assumption.
+//   c. Condition variable is not supported under this mode. This is still under
+//      developing.
+PRIMARY_OPTIONAL(const bool, EnableMultiRegions, false)
+
 PRIMARY_OPTIONAL(const s32, DefaultReleaseToOsIntervalMs, INT32_MIN)
 
 // When `EnableContiguousRegions` is true, all regions will be be arranged in
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index ebfb8dfe0a31f..57b762d1baf77 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -365,7 +365,7 @@ template <typename Config> class SizeClassAllocator32 {
   }
 
   const char *getRegionInfoArrayAddress() const { return nullptr; }
-  static uptr getRegionInfoArraySize() { return 0; }
+  uptr getRegionInfoArraySize() { return 0; }
 
   static BlockInfo findNearestBlock(UNUSED const char *RegionInfoData,
                                     UNUSED uptr Ptr) {
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 8a583bacb4a93..37b3ec2ddddb3 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -56,6 +56,17 @@ template <typename Config> class SizeClassAllocator64 {
   static_assert(RegionSizeLog >= GroupSizeLog,
                 "Group size shouldn't be greater than the region size");
   static const uptr GroupScale = GroupSizeLog - CompactPtrScale;
+  // Local cache stores the pointers in the type of compacted pointer and the
+  // compaction is done by calculating the offset to the base address of a
+  // region. Currently, we don't support decompacting through multiple regions
+  // because of the concern of performance and so we disable the pointer
+  // compaction.
+  // TODO(chiahungduan): Allow local cache store the raw pointer and keep
+  // storing the compacted pointers in each region to save memory.
+  static const bool DisablePtrCompaction = Config::getEnableMultiRegions();
+  static_assert(!DisablePtrCompaction || sizeof(CompactPtrT) == sizeof(uptr),
+                "Pointer compaction is disabled, `CompactPtrT` needs to be the "
+                "same size of `uptr`");
   typedef SizeClassAllocator64<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef TransferBatch<ThisT> TransferBatchT;
@@ -117,35 +128,45 @@ template <typename Config> class SizeClassAllocator64 {
     SmallerBlockReleasePageDelta =
         PagesInGroup * (1 + MinSizeClass / 16U) / 100;
 
+    RegionInfoManager.init(RegionInfoAllocator);
+
     u32 Seed;
     const u64 Time = getMonotonicTimeFast();
     if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
       Seed = static_cast<u32>(Time ^ (reinterpret_cast<uptr>(&Seed) >> 12));
 
     for (uptr I = 0; I < NumClasses; I++)
-      getRegionInfo(I)->RandState = getRandomU32(&Seed);
+      RegionInfoManager.getCurRegionInfo(I)->RandState = getRandomU32(&Seed);
 
     if (Config::getEnableContiguousRegions()) {
       ReservedMemoryT ReservedMemory = {};
+      // Block grouping requires the base address of a Region to be aligned
+      // with GrouopSize and pointer is compacted according to the offset to the
+      // base of a region so it always meets the requirement. As a result when
+      // the compaction is disabled, it relies the base address to be aligned.
+      const uptr Alignment =
+          DisablePtrCompaction ? (1UL << GroupSizeLog) : PageSize;
       // Reserve the space required for the Primary.
       CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses,
-                                  "scudo:primary_reserve"));
+                                  "scudo:primary_reserve", /*Flag=*/0,
+                                  Alignment));
       const uptr PrimaryBase = ReservedMemory.getBase();
 
       for (uptr I = 0; I < NumClasses; I++) {
         MemMapT RegionMemMap = ReservedMemory.dispatch(
             PrimaryBase + (I << RegionSizeLog), RegionSize);
-        RegionInfo *Region = getRegionInfo(I);
+        RegionInfo *Region = RegionInfoManager.getCurRegionInfo(I);
 
         initRegion(Region, I, RegionMemMap, Config::getEnableRandomOffset());
       }
-      shuffle(RegionInfoArray, NumClasses, &Seed);
+      RegionInfoManager.shuffle(&Seed);
     }
 
     // The binding should be done after region shuffling so that it won't bind
     // the FLLock from the wrong region.
     for (uptr I = 0; I < NumClasses; I++)
-      getRegionInfo(I)->FLLockCV.bindTestOnly(getRegionInfo(I)->FLLock);
+      RegionInfoManager.getCurRegionInfo(I)->FLLockCV.bindTestOnly(
+          RegionInfoManager.getCurRegionInfo(I)->FLLock);
 
     // The default value in the primary config has the higher priority.
     if (Config::getDefaultReleaseToOsIntervalMs() != INT32_MIN)
@@ -155,82 +176,111 @@ template <typename Config> class SizeClassAllocator64 {
 
   void unmapTestOnly() {
     for (uptr I = 0; I < NumClasses; I++) {
-      RegionInfo *Region = getRegionInfo(I);
-      {
-        ScopedLock ML(Region->MMLock);
-        MemMapT MemMap = Region->MemMapInfo.MemMap;
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(I);
+      do {
+        ScopedLock ML(RegionInfoIter->MMLock);
+        MemMapT MemMap = RegionInfoIter->MemMapInfo.MemMap;
         if (MemMap.isAllocated())
           MemMap.unmap(MemMap.getBase(), MemMap.getCapacity());
-      }
-      *Region = {};
+        RegionInfo *OldRegion = RegionInfoIter.get();
+        ++RegionInfoIter;
+        *OldRegion = {};
+      } while (!RegionInfoIter.end());
     }
   }
 
   // When all blocks are freed, it has to be the same size as `AllocatedUser`.
   void verifyAllBlocksAreReleasedTestOnly() {
+    uptr NumRegionInfo = 0;
+    // TODO: Verify all pointers are belong to the right region
     // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass.
     uptr BatchClassUsedInFreeLists = 0;
     for (uptr I = 0; I < NumClasses; I++) {
       // We have to count BatchClassUsedInFreeLists in other regions first.
       if (I == SizeClassMap::BatchClassId)
         continue;
-      RegionInfo *Region = getRegionInfo(I);
-      ScopedLock ML(Region->MMLock);
-      ScopedLock FL(Region->FLLock);
-      const uptr BlockSize = getSizeByClassId(I);
-      uptr TotalBlocks = 0;
-      for (BatchGroupT &BG : Region->FreeListInfo.BlockList) {
-        // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
-        BatchClassUsedInFreeLists += BG.Batches.size() + 1;
-        for (const auto &It : BG.Batches)
-          TotalBlocks += It.getCount();
-      }
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(I);
+
+      do {
+        ++NumRegionInfo;
+
+        ScopedLock ML(RegionInfoIter->MMLock);
+        ScopedLock FL(RegionInfoIter->FLLock);
+        const uptr BlockSize = getSizeByClassId(I);
+        uptr TotalBlocks = 0;
+        for (BatchGroupT &BG : RegionInfoIter->FreeListInfo.BlockList) {
+          // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
+          BatchClassUsedInFreeLists += BG.Batches.size() + 1;
+          for (const auto &It : BG.Batches)
+            TotalBlocks += It.getCount();
+        }
 
-      DCHECK_EQ(TotalBlocks, Region->MemMapInfo.AllocatedUser / BlockSize);
-      DCHECK_EQ(Region->FreeListInfo.PushedBlocks,
-                Region->FreeListInfo.PoppedBlocks);
+        DCHECK_EQ(TotalBlocks,
+                  RegionInfoIter->MemMapInfo.AllocatedUser / BlockSize);
+        DCHECK_EQ(RegionInfoIter->FreeListInfo.PushedBlocks,
+                  RegionInfoIter->FreeListInfo.PoppedBlocks);
+
+        ++RegionInfoIter;
+      } while (!RegionInfoIter.end());
     }
 
-    RegionInfo *Region = getRegionInfo(SizeClassMap::BatchClassId);
-    ScopedLock ML(Region->MMLock);
-    ScopedLock FL(Region->FLLock);
-    const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId);
-    uptr TotalBlocks = 0;
-    for (BatchGroupT &BG : Region->FreeListInfo.BlockList) {
-      if (LIKELY(!BG.Batches.empty())) {
-        for (const auto &It : BG.Batches)
-          TotalBlocks += It.getCount();
-      } else {
-        // `BatchGroup` with empty freelist doesn't have `TransferBatch` record
-        // itself.
-        ++TotalBlocks;
+    auto RegionInfoIter =
+        RegionInfoManager.getRegionInfoIter(SizeClassMap::BatchClassId);
+
+    do {
+      ++NumRegionInfo;
+
+      ScopedLock ML(RegionInfoIter->MMLock);
+      ScopedLock FL(RegionInfoIter->FLLock);
+      const uptr BlockSize = getSizeByClassId(SizeClassMap::BatchClassId);
+      uptr TotalBlocks = 0;
+      for (BatchGroupT &BG : RegionInfoIter->FreeListInfo.BlockList) {
+        if (LIKELY(!BG.Batches.empty())) {
+          for (const auto &It : BG.Batches)
+            TotalBlocks += It.getCount();
+        } else {
+          // `BatchGroup` with empty freelist doesn't have `TransferBatch`
+          // record itself.
+          ++TotalBlocks;
+        }
       }
-    }
-    DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists,
-              Region->MemMapInfo.AllocatedUser / BlockSize);
-    DCHECK_GE(Region->FreeListInfo.PoppedBlocks,
-              Region->FreeListInfo.PushedBlocks);
-    const uptr BlocksInUse =
-        Region->FreeListInfo.PoppedBlocks - Region->FreeListInfo.PushedBlocks;
-    DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
+      DCHECK_EQ(TotalBlocks + BatchClassUsedInFreeLists,
+                RegionInfoIter->MemMapInfo.AllocatedUser / BlockSize);
+      DCHECK_GE(RegionInfoIter->FreeListInfo.PoppedBlocks,
+                RegionInfoIter->FreeListInfo.PushedBlocks);
+      const uptr BlocksInUse = RegionInfoIter->FreeListInfo.PoppedBlocks -
+                               RegionInfoIter->FreeListInfo.PushedBlocks;
+      DCHECK_EQ(BlocksInUse, BatchClassUsedInFreeLists);
+      ++RegionInfoIter;
+    } while (!RegionInfoIter.end());
+
+    RegionInfoAllocator.verifyTheNumberOfAllocatedRegionInfo(NumRegionInfo);
   }
 
   u16 popBlocks(CacheT *C, uptr ClassId, CompactPtrT *ToArray,
                 const u16 MaxBlockCount) {
     DCHECK_LT(ClassId, NumClasses);
-    RegionInfo *Region = getRegionInfo(ClassId);
+    auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(ClassId);
     u16 PopCount = 0;
 
-    {
-      ScopedLock L(Region->FLLock);
-      PopCount = popBlocksImpl(C, ClassId, Region, ToArray, MaxBlockCount);
-      if (PopCount != 0U)
-        return PopCount;
-    }
+    do {
+      {
+        ScopedLock FL(RegionInfoIter->FLLock);
+        PopCount = popBlocksImpl(C, ClassId, RegionInfoIter.get(), ToArray,
+                                 MaxBlockCount);
+        if (PopCount != 0U)
+          return PopCount;
+      }
+
+      ++RegionInfoIter;
+    } while (!RegionInfoIter.end());
 
     bool ReportRegionExhausted = false;
 
-    if (conditionVariableEnabled()) {
+    RegionInfo *Region = RegionInfoManager.getCurRegionInfo(ClassId);
+
+    // TODO(chiahungduan): Support multiple-regions with condition variable.
+    if (conditionVariableEnabled() && !Config::getEnableMultiRegions()) {
       PopCount = popBlocksWithCV(C, ClassId, Region, ToArray, MaxBlockCount,
                                  ReportRegionExhausted);
     } else {
@@ -247,14 +297,37 @@ template <typename Config> class SizeClassAllocator64 {
         }
 
         const bool RegionIsExhausted = Region->Exhausted;
-        if (!RegionIsExhausted) {
+        if (!Config::getEnableMultiRegions()) {
+          if (!RegionIsExhausted) {
+            PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
+                                                    MaxBlockCount);
+          }
+          ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
+          break;
+        } else {
+          // When a region is exhaused, a new region will be created unless it's
+          // OOM in RegionInfoAllocator. If so, there's no way to create a new
+          // region.
+          if (RegionIsExhausted)
+            break;
           PopCount = populateFreeListAndPopBlocks(C, ClassId, Region, ToArray,
                                                   MaxBlockCount);
+          if (PopCount != 0)
+            break;
+
+          DCHECK(Region->Exhausted);
+          RegionInfo *NewRegion = populateNewRegion(Region, ClassId);
+          if (NewRegion == nullptr) {
+            ReportRegionExhausted = true;
+            break;
+          }
+
+          // Try to allocate from the new region in the next iteration so that
+          // we can release the `MMLock` of previous region first.
+          Region = NewRegion;
         }
-        ReportRegionExhausted = !RegionIsExhausted && Region->Exhausted;
-        break;
       }
-    }
+    } // if (conditionVariableEnabled() && !Config::getEnableMultiRegions())
 
     if (UNLIKELY(ReportRegionExhausted)) {
       Printf("Can't populate more pages for size class %zu.\n",
@@ -274,8 +347,24 @@ template <typename Config> class SizeClassAllocator64 {
     DCHECK_LT(ClassId, NumClasses);
     DCHECK_GT(Size, 0);
 
-    RegionInfo *Region = getRegionInfo(ClassId);
-    if (ClassId == SizeClassMap::BatchClassId) {
+    auto IsPtrInRegion = [](RegionInfo *Region,
+                            uptr Ptr) NO_THREAD_SAFETY_ANALYSIS {
+      // Thread-safety annotation doesn't support lambda. Use a runtime check
+      // instead.
+      Region->MMLock.assertHeld();
+      const uptr RegionEnd = Region->MemMapInfo.MemMap.getBase() +
+                             Region->MemMapInfo.MemMap.getCapacity();
+      return Ptr >= Region->RegionBeg && Ptr < RegionEnd;
+    };
+
+    // When multiple-regions is enabled, we need to sort the array to dispatch
+    // the blocks to different regions efficiently. Thus even we don't put
+    // BatchClass into groups, sorting is still necessary and it'll be handled
+    // later in the function.
+    // TODO: Reorder the use of variable
+    RegionInfo *Region = RegionInfoManager.getCurRegionInfo(ClassId);
+    if (ClassId == SizeClassMap::BatchClassId &&
+        !Config::getEnableMultiRegions()) {
       ScopedLock L(Region->FLLock);
       pushBatchClassBlocks(Region, Array, Size);
       if (conditionVariableEnabled())
@@ -287,7 +376,7 @@ template <typename Config> class SizeClassAllocator64 {
     // greater than the block size with a certain scale.
 
     bool SameGroup = true;
-    if (GroupSizeLog < RegionSizeLog) {
+    if (GroupSizeLog < RegionSizeLog || Config::getEnableMultiRegions()) {
       // Sort the blocks so that blocks belonging to the same group can be
       // pushed together.
       for (u32 I = 1; I < Size; ++I) {
@@ -303,11 +392,41 @@ template <typename Config> class SizeClassAllocator64 {
       }
     }
 
-    {
+    if (!Config::getEnableMultiRegions()) {
       ScopedLock L(Region->FLLock);
       pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup);
       if (conditionVariableEnabled())
         Region->FLLockCV.notifyAll(Region->FLLock);
+    } else {
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(ClassId);
+      DCHECK_NE(RegionInfoIter.get(), nullptr);
+
+      u32 I = 0;
+      u32 Shift = 0;
+      do {
+        ScopedLock MML(RegionInfoIter->MMLock);
+        while (I < Size && IsPtrInRegion(RegionInfoIter.get(),
+                                         reinterpret_cast<uptr>(decompactPtr(
+                                             ClassId, Array[I])))) {
+          ++I;
+        }
+        if (I > Shift) {
+          ScopedLock FL(RegionInfoIter->FLLock);
+          if (ClassId == SizeClassMap::BatchClassId) {
+            pushBatchClassBlocks(RegionInfoIter.get(), Array + Shift,
+                                 I - Shift);
+          } else {
+            pushBlocksImpl(C, ClassId, RegionInfoIter.get(), Array + Shift,
+                           I - Shift, SameGroup);
+          }
+          // TODO(chiahungduan): `notifyAll` when condition variable is
+          // supported.
+          Shift = I;
+        }
+
+        ++RegionInfoIter;
+      } while (!RegionInfoIter.end() && I < Size);
+      DCHECK_EQ(I, Size);
     }
   }
 
@@ -316,29 +435,37 @@ template <typename Config> class SizeClassAllocator64 {
     for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
       if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
         continue;
-      getRegionInfo(static_cast<uptr>(I))->MMLock.lock();
-      getRegionInfo(static_cast<uptr>(I))->FLLock.lock();
+      RegionInfoManager.getCurRegionInfo(static_cast<uptr>(I))->MMLock.lock();
+      RegionInfoManager.getCurRegionInfo(static_cast<uptr>(I))->FLLock.lock();
     }
-    getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock();
-    getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock();
+    RegionInfoManager.getCurRegionInfo(SizeClassMap::BatchClassId)
+        ->MMLock.lock();
+    RegionInfoManager.getCurRegionInfo(SizeClassMap::BatchClassId)
+        ->FLLock.lock();
   }
 
   void enable() NO_THREAD_SAFETY_ANALYSIS {
-    getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock();
-    getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock();
+    RegionInfoManager.getCurRegionInfo(SizeClassMap::BatchClassId)
+        ->FLLock.unlock();
+    RegionInfoManager.getCurRegionInfo(SizeClassMap::BatchClassId)
+        ->MMLock.unlock();
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
-      getRegionInfo(I)->FLLock.unlock();
-      getRegionInfo(I)->MMLock.unlock();
+      RegionInfoManager.getCurRegionInfo(I)->FLLock.unlock();
+      RegionInfoManager.getCurRegionInfo(I)->MMLock.unlock();
     }
   }
 
   template <typename F> void iterateOverBlocks(F Callback) {
+    if (Config::getEnableMultiRegions()) {
+      Printf("MultiRegions hasn't supported blocks iteration yet.\n");
+      return;
+    }
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
-      RegionInfo *Region = getRegionInfo(I);
+      RegionInfo *Region = RegionInfoManager.getCurRegionInfo(I);
       // TODO: The call of `iterateOverBlocks` requires disabling
       // SizeClassAllocator64. We may consider locking each region on demand
       // only.
@@ -358,16 +485,19 @@ template <typename Config> class SizeClassAllocator64 {
     uptr PoppedBlocks = 0;
     uptr PushedBlocks = 0;
     for (uptr I = 0; I < NumClasses; I++) {
-      RegionInfo *Region = getRegionInfo(I);
-      {
-        ScopedLock L(Region->MMLock);
-        TotalMapped += Region->MemMapInfo.MappedUser;
-      }
-      {
-        ScopedLock L(Region->FLLock);
-        PoppedBlocks += Region->FreeListInfo.PoppedBlocks;
-        PushedBlocks += Region->FreeListInfo.PushedBlocks;
-      }
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(I);
+      do {
+        {
+          ScopedLock L(RegionInfoIter->MMLock);
+          TotalMapped += RegionInfoIter->MemMapInfo.MappedUser;
+        }
+        {
+          ScopedLock L(RegionInfoIter->FLLock);
+          PoppedBlocks += RegionInfoIter->FreeListInfo.PoppedBlocks;
+          PushedBlocks += RegionInfoIter->FreeListInfo.PushedBlocks;
+        }
+        ++RegionInfoIter;
+      } while (!RegionInfoIter.end());
     }
     const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
     Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu "
@@ -376,10 +506,14 @@ template <typename Config> class SizeClassAllocator64 {
                 PoppedBlocks - PushedBlocks, IntervalMs >= 0 ? IntervalMs : -1);
 
     for (uptr I = 0; I < NumClasses; I++) {
-      RegionInfo *Region = getRegionInfo(I);
-      ScopedLock L1(Region->MMLock);
-      ScopedLock L2(Region->FLLock);
-      getStats(Str, I, Region);
+      // TODO: Consider adding indentation to the regions of same size class.
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(I);
+      do {
+        ScopedLock L1(RegionInfoIter->MMLock);
+        ScopedLock L2(RegionInfoIter->FLLock);
+        getStats(Str, I, RegionInfoIter.get());
+        ++RegionInfoIter;
+      } while (!RegionInfoIter.end());
     }
   }
 
@@ -389,9 +523,12 @@ template <typename Config> class SizeClassAllocator64 {
         getPageSizeCached());
 
     for (uptr I = 1; I < NumClasses; I++) {
-      RegionInfo *Region = getRegionInfo(I);
-      ScopedLock L(Region->MMLock);
-      getRegionFragmentationInfo(Region, I, Str);
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(I);
+      do {
+        ScopedLock L(RegionInfoIter->MMLock);
+        getRegionFragmentationInfo(RegionInfoIter.get(), I, Str);
+        ++RegionInfoIter;
+      } while (!RegionInfoIter.end());
     }
   }
 
@@ -408,15 +545,20 @@ template <typename Config> class SizeClassAllocator64 {
   }
 
   uptr tryReleaseToOS(uptr ClassId, ReleaseToOS ReleaseType) {
-    RegionInfo *Region = getRegionInfo(ClassId);
-    // Note that the tryLock() may fail spuriously, given that it should rarely
-    // happen and page releasing is fine to skip, we don't take certain
-    // approaches to ensure one page release is done.
-    if (Region->MMLock.tryLock()) {
-      uptr BytesReleased = releaseToOSMaybe(Region, ClassId, ReleaseType);
-      Region->MMLock.unlock();
-      return BytesReleased;
-    }
+    auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(ClassId);
+
+    do {
+      // Note that the tryLock() may fail spuriously, given that it should
+      // rarely happen and page releasing is fine to skip, we don't take certain
+      // approaches to ensure one page release is done.
+      if (RegionInfoIter->MMLock.tryLock()) {
+        uptr BytesReleased =
+            releaseToOSMaybe(RegionInfoIter.get(), ClassId, ReleaseType);
+        RegionInfoIter->MMLock.unlock();
+        return BytesReleased;
+      }
+      ++RegionInfoIter;
+    } while (!RegionInfoIter.end());
     return 0;
   }
 
@@ -425,36 +567,54 @@ template <typename Config> class SizeClassAllocator64 {
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
-      RegionInfo *Region = getRegionInfo(I);
-      ScopedLock L(Region->MMLock);
-      TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType);
+      auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(I);
+
+      do {
+        ScopedLock L(RegionInfoIter->MMLock);
+        TotalReleasedBytes +=
+            releaseToOSMaybe(RegionInfoIter.get(), I, ReleaseType);
+        ++RegionInfoIter;
+      } while (!RegionInfoIter.end());
     }
     return TotalReleasedBytes;
   }
 
   const char *getRegionInfoArrayAddress() const {
-    return reinterpret_cast<const char *>(RegionInfoArray);
+    return reinterpret_cast<const char *>(
+        RegionInfoManager.getRawRegionInfoArray());
   }
 
-  static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
+  uptr getRegionInfoArraySize() {
+    if (Config::getEnableMultiRegions())
+      return 0;
+    return RegionInfoManager.getRawRegionInfoArraySize();
+  }
 
   uptr getCompactPtrBaseByClassId(uptr ClassId) {
-    return getRegionInfo(ClassId)->RegionBeg;
+    return RegionInfoManager.getCurRegionInfo(ClassId)->RegionBeg;
   }
 
   CompactPtrT compactPtr(uptr ClassId, uptr Ptr) {
     DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
+    if (DisablePtrCompaction)
+      return static_cast<CompactPtrT>(Ptr);
     return compactPtrInternal(getCompactPtrBaseByClassId(ClassId), Ptr);
   }
 
   void *decompactPtr(uptr ClassId, CompactPtrT CompactPtr) {
     DCHECK_LE(ClassId, SizeClassMap::LargestClassId);
+    if (DisablePtrCompaction)
+      return reinterpret_cast<void *>(CompactPtr);
     return reinterpret_cast<void *>(
         decompactPtrInternal(getCompactPtrBaseByClassId(ClassId), CompactPtr));
   }
 
   static BlockInfo findNearestBlock(const char *RegionInfoData,
                                     uptr Ptr) NO_THREAD_SAFETY_ANALYSIS {
+    if (Config::getEnableMultiRegions()) {
+      Printf("MultiRegions hasn't supported finding nearest block yet.\n");
+      return {};
+    }
     const RegionInfo *RegionInfoArray =
         reinterpret_cast<const RegionInfo *>(RegionInfoData);
 
@@ -555,18 +715,208 @@ template <typename Config> class SizeClassAllocator64 {
     bool isPopulatingFreeList GUARDED_BY(FLLock) = false;
   };
   struct RegionInfo : UnpaddedRegionInfo {
+    // This is only used when `Config::getEnableMultiRegions` is enabled and is
+    // guarded by the mutex in `RegionInfoManager`.
+    RegionInfo *Next = nullptr;
     char Padding[SCUDO_CACHE_LINE_SIZE -
-                 (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)] = {};
+                 ((sizeof(UnpaddedRegionInfo) + sizeof(RegionInfo *)) %
+                  SCUDO_CACHE_LINE_SIZE)] = {};
   };
   static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
 
-  RegionInfo *getRegionInfo(uptr ClassId) {
-    DCHECK_LT(ClassId, NumClasses);
-    return &RegionInfoArray[ClassId];
-  }
+  template <bool IsMultiRegions = false> struct RegionInfoAlloc {
+    RegionInfo *allocate() {
+      UNREACHABLE("RegionInfo is statically allocated");
+    }
 
-  uptr getRegionBaseByClassId(uptr ClassId) {
-    RegionInfo *Region = getRegionInfo(ClassId);
+    void verifyTheNumberOfAllocatedRegionInfo(uptr NumRegionInfo) {
+      DCHECK_EQ(NumRegionInfo, NumClasses);
+    }
+  };
+
+  template <> struct RegionInfoAlloc</*isMultiRegions=*/true> {
+    RegionInfo *allocate() {
+      ScopedLock L(M);
+      return S.pop();
+    }
+
+    void verifyTheNumberOfAllocatedRegionInfo(uptr NumRegionInfo) {
+      ScopedLock L(M);
+      DCHECK_EQ(NumRegionInfo, S.Size);
+    }
+
+    HybridMutex M;
+    // According to the following,
+    //   DR1351: If the brace-or-equal-initializer of a non-static data
+    //   member invokes a defaulted default constructor of its class or of an
+    //   enclosing class in a potentially evaluated subexpression, the program
+    //   is ill-formed.
+    // So we have to `outline` the `Size`/`Array` into another struct `Storage`.
+    struct Storage {
+      RegionInfo *pop() {
+        if (Size == NumEntries)
+          return nullptr;
+        return &Array[Size++];
+      }
+      // The amount memory used by this allocator is about (NumEntries *
+      // RegionSize). For example, region with size 256 KB will have 2GB space
+      // available.
+      // TODO(chiahungduan): Consider having this configurable.
+      static constexpr uptr NumEntries = 1UL << 13;
+      uptr Size = 0;
+      alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo Array[NumEntries];
+    } S GUARDED_BY(M);
+  };
+
+  template <bool IsMultiRegions = false> struct RegionInfoInterface {
+    struct RegionInfoIter {
+      RegionInfoIter(RegionInfo *Region) : CurRegionInfo(Region) {}
+      RegionInfo *operator->() { return CurRegionInfo; }
+      RegionInfoIter &operator++() {
+        CurRegionInfo = nullptr;
+        return *this;
+      }
+      RegionInfo *get() { return CurRegionInfo; }
+      bool end() { return CurRegionInfo == nullptr; }
+      RegionInfo *CurRegionInfo = nullptr;
+    };
+
+    void init(UNUSED RegionInfoAlloc<IsMultiRegions> &Allocator) {
+      // The RegionInfo storage is statically initialized.
+    }
+
+    ALWAYS_INLINE RegionInfo *getCurRegionInfo(uptr ClassId) {
+      DCHECK_LT(ClassId, NumClasses);
+      return &RegionInfoArray[ClassId];
+    }
+    ALWAYS_INLINE RegionInfoIter getRegionInfoIter(uptr ClassId) {
+      return RegionInfoIter(getCurRegionInfo(ClassId));
+    }
+
+    void pushRegionInfo(UNUSED RegionInfo *Region, UNUSED uptr ClassId) {
+      UNREACHABLE("Only MultiRegions supports this operation\n");
+    }
+    // TODO: Review the uses of `getRawRegionInfoArray` and
+    // `getRawRegionInfoArraySize` and see if we can deprecate them.
+    RegionInfo *getRawRegionInfoArray() { return RegionInfoArray; }
+    uptr getRawRegionInfoArraySize() {
+      return static_cast<uptr>(sizeof(RegionInfoArray));
+    }
+    void shuffle(u32 *Seed) {
+      scudo::shuffle(RegionInfoArray, NumClasses, Seed);
+    }
+
+    alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses];
+  };
+
+  template <> struct RegionInfoInterface</*isMultiRegions=*/true> {
+    struct RegionInfoIter {
+      RegionInfoIter(RegionInfo *Region, HybridMutex &RegionInfoListLock)
+          : CurRegionInfo(Region), M(RegionInfoListLock) {}
+      RegionInfo *operator->() { return CurRegionInfo; }
+      RegionInfoIter &operator++() {
+        ScopedLock L(M);
+        CurRegionInfo = CurRegionInfo->Next;
+        return *this;
+      }
+      RegionInfo *get() { return CurRegionInfo; }
+      bool end() { return CurRegionInfo == nullptr; }
+      RegionInfo *CurRegionInfo = nullptr;
+      HybridMutex &M;
+    };
+
+    void init(RegionInfoAlloc</*isMultiRegions=*/true> &Allocator) {
+      for (uptr I = 0; I < NumClasses; I++) {
+        RegionInfo *Region = Allocator.allocate();
+        LowestAddrRegionInfo[I].P = Region;
+        CurrentRegionInfo[I].P = Region;
+      }
+    }
+
+    // Return the last pushed RegionInfo. For one size class, the current
+    // RegionInfo is responsible for the page mapping and the other RegionInfos
+    // will have been exhausted already.
+    ALWAYS_INLINE RegionInfo *getCurRegionInfo(uptr ClassId) {
+      DCHECK_LT(ClassId, NumClasses);
+      return CurrentRegionInfo[ClassId].P;
+    }
+
+    ALWAYS_INLINE RegionInfoIter getRegionInfoIter(uptr ClassId) {
+      return RegionInfoIter(LowestAddrRegionInfo[ClassId].P,
+                            RegionInfoLock[ClassId]);
+    }
+
+    // RegionInfos for the same size class will be stored in the order of base
+    // address. Which means every RegionInfo visiting will be starting from
+    // lowest address and which aligns with how pointer grouping works.
+    void pushRegionInfo(RegionInfo *Region, uptr ClassId)
+        REQUIRES(Region->MMLock) {
+      DCHECK_LT(ClassId, NumClasses);
+      DCHECK(Region->MemMapInfo.MemMap.isAllocated());
+
+      // The creation of new region requires holding the MMLock of current
+      // region to ensure only one thread is allocating the new region.
+      CurrentRegionInfo[ClassId].P->MMLock.assertHeld();
+
+      RegionInfo *RegionCursor = LowestAddrRegionInfo[ClassId].P;
+      DCHECK_NE(RegionCursor, nullptr);
+
+      ScopedLock L(RegionInfoLock[ClassId]);
+
+      if (Region->RegionBeg < RegionCursor->RegionBeg) {
+        Region->Next = RegionCursor;
+        LowestAddrRegionInfo[ClassId].P = Region;
+      } else {
+        while (RegionCursor->Next != nullptr &&
+               Region->RegionBeg > RegionCursor->Next->RegionBeg) {
+          RegionCursor = RegionCursor->Next;
+        }
+
+        Region->Next = RegionCursor->Next;
+        RegionCursor->Next = Region;
+      }
+
+      if (SCUDO_DEBUG) {
+        RegionInfo *R = LowestAddrRegionInfo[ClassId].P;
+        while (R->Next != nullptr) {
+          DCHECK_LT(R->RegionBeg, R->Next->RegionBeg);
+          R = R->Next;
+        }
+      }
+
+      CurrentRegionInfo[ClassId].P = Region;
+    }
+    // Multiple-Regions doesn't support this.
+    RegionInfo *getRawRegionInfoArray() { return nullptr; }
+    uptr getRawRegionInfoArraySize() { return 0; }
+
+    void shuffle(u32 *Seed) {
+      if (SCUDO_DEBUG) {
+        // We don't support shuffling two arrays with same randomness. This is
+        // supposed to be done at the initialization stage so that we can simply
+        // update the `LowestAddrRegionInfo` by copying the `CurrentRegionInfo`.
+        for (uptr I = 0; I < NumClasses; ++I)
+          CHECK_EQ(CurrentRegionInfo[I].P, LowestAddrRegionInfo[I].P);
+      }
+      scudo::shuffle(CurrentRegionInfo, NumClasses, Seed);
+      memcpy(CurrentRegionInfo, LowestAddrRegionInfo,
+             sizeof(RegionInfoPointer) * NumClasses);
+    }
+
+    // Scudo requires the data member constant initializable. Array of raw
+    // pointers doesn't meet the condition. Therefore, wrap the pointer in the
+    // struct to make it a compound type which is constant intializable.
+    struct RegionInfoPointer {
+      RegionInfo *P = nullptr;
+    };
+
+    alignas(SCUDO_CACHE_LINE_SIZE)
+        RegionInfoPointer CurrentRegionInfo[NumClasses];
+    RegionInfoPointer LowestAddrRegionInfo[NumClasses];
+    HybridMutex RegionInfoLock[NumClasses];
+  };
+
+  uptr getRegionBase(RegionInfo *Region) {
     Region->MMLock.assertHeld();
 
     if (!Config::getEnableContiguousRegions() &&
@@ -577,18 +927,23 @@ template <typename Config> class SizeClassAllocator64 {
   }
 
   static CompactPtrT compactPtrInternal(uptr Base, uptr Ptr) {
+    DCHECK(!DisablePtrCompaction);
     return static_cast<CompactPtrT>((Ptr - Base) >> CompactPtrScale);
   }
 
   static uptr decompactPtrInternal(uptr Base, CompactPtrT CompactPtr) {
+    DCHECK(!DisablePtrCompaction);
     return Base + (static_cast<uptr>(CompactPtr) << CompactPtrScale);
   }
 
   static uptr compactPtrGroup(CompactPtrT CompactPtr) {
-    const uptr Mask = (static_cast<uptr>(1) << GroupScale) - 1;
+    const uptr ShiftScale = DisablePtrCompaction ? GroupSizeLog : GroupScale;
+    const uptr Mask = (static_cast<uptr>(1) << ShiftScale) - 1;
     return static_cast<uptr>(CompactPtr) & ~Mask;
   }
   static uptr decompactGroupBase(uptr Base, uptr CompactPtrGroupBase) {
+    if (DisablePtrCompaction)
+      return CompactPtrGroupBase;
     DCHECK_EQ(CompactPtrGroupBase % (static_cast<uptr>(1) << (GroupScale)), 0U);
     return Base + (CompactPtrGroupBase << CompactPtrScale);
   }
@@ -629,7 +984,17 @@ template <typename Config> class SizeClassAllocator64 {
 
   void pushBatchClassBlocks(RegionInfo *Region, CompactPtrT *Array, u32 Size)
       REQUIRES(Region->FLLock) {
-    DCHECK_EQ(Region, getRegionInfo(SizeClassMap::BatchClassId));
+    if (SCUDO_DEBUG) {
+      auto RegionIter =
+          RegionInfoManager.getRegionInfoIter(SizeClassMap::BatchClassId);
+      bool IsBatchClass = false;
+      do {
+        if (RegionIter.get() == Region)
+          IsBatchClass = true;
+        ++RegionIter;
+      } while (!RegionIter.end() && !IsBatchClass);
+      CHECK(IsBatchClass);
+    }
 
     // Free blocks are recorded by TransferBatch in freelist for all
     // size-classes. In addition, TransferBatch is allocated from BatchClassId.
@@ -1010,6 +1375,36 @@ template <typename Config> class SizeClassAllocator64 {
     return PopCount;
   }
 
+  RegionInfo *populateNewRegion(RegionInfo *Region, uptr ClassId)
+      REQUIRES(Region->MMLock) {
+    // Only when the current `RegionInfo` is exhaused, the allocation of a new
+    // region is feasible.
+    DCHECK_EQ(Region, RegionInfoManager.getCurRegionInfo(ClassId));
+    RegionInfo *NewRegion = RegionInfoAllocator.allocate();
+    if (NewRegion == nullptr)
+      return nullptr;
+
+    ReservedMemoryT ReservedMemory = {};
+    const uptr Alignment =
+        DisablePtrCompaction ? (1UL << GroupSizeLog) : getPageSizeCached();
+    if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize,
+                                        "scudo:primary_reserve", MAP_ALLOWNOMEM,
+                                        Alignment))) {
+      Printf("Can't populate a new region for size class %zu.\n",
+             getSizeByClassId(ClassId));
+      return nullptr;
+    }
+
+    ScopedLock MML(NewRegion->MMLock);
+    initRegion(NewRegion, ClassId,
+               ReservedMemory.dispatch(ReservedMemory.getBase(),
+                                       ReservedMemory.getCapacity()),
+               /*EnableRandomOffset=*/false);
+
+    RegionInfoManager.pushRegionInfo(NewRegion, ClassId);
+    return NewRegion;
+  }
+
   NOINLINE u16 populateFreeListAndPopBlocks(CacheT *C, uptr ClassId,
                                             RegionInfo *Region,
                                             CompactPtrT *ToArray,
@@ -1017,10 +1412,12 @@ template <typename Config> class SizeClassAllocator64 {
       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
     if (!Config::getEnableContiguousRegions() &&
         !Region->MemMapInfo.MemMap.isAllocated()) {
+      const uptr Alignment =
+          DisablePtrCompaction ? (1UL << GroupSizeLog) : getPageSizeCached();
       ReservedMemoryT ReservedMemory;
       if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize,
                                           "scudo:primary_reserve",
-                                          MAP_ALLOWNOMEM))) {
+                                          MAP_ALLOWNOMEM, Alignment))) {
         Printf("Can't reserve pages for size class %zu.\n",
                getSizeByClassId(ClassId));
         return 0U;
@@ -1043,7 +1440,7 @@ template <typename Config> class SizeClassAllocator64 {
       // Do the mmap for the user memory.
       const uptr MapSize =
           roundUp(TotalUserBytes - MappedUser, MapSizeIncrement);
-      const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
+      const uptr RegionBase = RegionBeg - getRegionBase(Region);
       if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
         Region->Exhausted = true;
         return 0U;
@@ -1074,8 +1471,10 @@ template <typename Config> class SizeClassAllocator64 {
 
     const uptr CompactPtrBase = getCompactPtrBaseByClassId(ClassId);
     uptr P = RegionBeg + Region->MemMapInfo.AllocatedUser;
-    for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
-      ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P);
+    for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) {
+      ShuffleArray[I] =
+          DisablePtrCompaction ? P : compactPtrInternal(CompactPtrBase, P);
+    }
 
     ScopedLock L(Region->FLLock);
 
@@ -1143,8 +1542,7 @@ template <typename Config> class SizeClassAllocator64 {
         Region->FreeListInfo.PushedBlocks, InUseBlocks, TotalChunks,
         Region->ReleaseInfo.RangesReleased,
         Region->ReleaseInfo.LastReleasedBytes >> 10,
-        RegionPushedBytesDelta >> 10, Region->RegionBeg,
-        getRegionBaseByClassId(ClassId));
+        RegionPushedBytesDelta >> 10, Region->RegionBeg, getRegionBase(Region));
   }
 
   void getRegionFragmentationInfo(RegionInfo *Region, uptr ClassId,
@@ -1259,7 +1657,7 @@ template <typename Config> class SizeClassAllocator64 {
     // ==================================================================== //
     // 4. Release the unused physical pages back to the OS.
     // ==================================================================== //
-    RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap,
+    RegionReleaseRecorder<MemMapT> Recorder(Region->MemMapInfo.MemMap,
                                             Region->RegionBeg,
                                             Context.getReleaseOffset());
     auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
@@ -1516,6 +1914,8 @@ template <typename Config> class SizeClassAllocator64 {
       REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
     const uptr GroupSize = (1UL << GroupSizeLog);
     auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
+      if (DisablePtrCompaction)
+        return static_cast<uptr>(CompactPtr);
       return decompactPtrInternal(CompactPtrBase, CompactPtr);
     };
 
@@ -1599,7 +1999,8 @@ template <typename Config> class SizeClassAllocator64 {
     constexpr uptr MaxUnusedSize = 8;
     CompactPtrT Blocks[MaxUnusedSize];
     u32 Idx = 0;
-    RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId);
+    RegionInfo *BatchClassRegion =
+        RegionInfoManager.getCurRegionInfo(SizeClassMap::BatchClassId);
     // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s
     // when we are manipulating the freelist of `BatchClassRegion`. Instead, we
     // should just push it back to the freelist when we merge two `BatchGroup`s.
@@ -1729,7 +2130,8 @@ template <typename Config> class SizeClassAllocator64 {
   // that size class.
   uptr SmallerBlockReleasePageDelta = 0;
   atomic_s32 ReleaseToOsIntervalMs = {};
-  alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses];
+  RegionInfoAlloc<Config::getEnableMultiRegions()> RegionInfoAllocator;
+  RegionInfoInterface<Config::getEnableMultiRegions()> RegionInfoManager;
 };
 
 } // namespace scudo
diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index b6f76a4d20585..310eee3be27b2 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -19,7 +19,7 @@ namespace scudo {
 
 template <typename MemMapT> class RegionReleaseRecorder {
 public:
-  RegionReleaseRecorder(MemMapT *RegionMemMap, uptr Base, uptr Offset = 0)
+  RegionReleaseRecorder(MemMapT RegionMemMap, uptr Base, uptr Offset = 0)
       : RegionMemMap(RegionMemMap), Base(Base), Offset(Offset) {}
 
   uptr getReleasedRangesCount() const { return ReleasedRangesCount; }
@@ -32,7 +32,7 @@ template <typename MemMapT> class RegionReleaseRecorder {
   // are offseted from `Base` + Offset.
   void releasePageRangeToOS(uptr From, uptr To) {
     const uptr Size = To - From;
-    RegionMemMap->releasePagesToOS(getBase() + Offset + From, Size);
+    RegionMemMap.releasePagesToOS(getBase() + Offset + From, Size);
     ReleasedRangesCount++;
     ReleasedBytes += Size;
   }
@@ -40,7 +40,7 @@ template <typename MemMapT> class RegionReleaseRecorder {
 private:
   uptr ReleasedRangesCount = 0;
   uptr ReleasedBytes = 0;
-  MemMapT *RegionMemMap = nullptr;
+  MemMapT RegionMemMap = {};
   uptr Base = 0;
   // The release offset from Base. This is used when we know a given range after
   // Base will not be released.
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index 16b19e807e11b..76308d7915012 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -210,6 +210,39 @@ struct TestConditionVariableConfig {
   };
   template <typename Config> using SecondaryT = scudo::MapAllocator<Config>;
 };
+
+struct TestMultiRegionsConfig {
+  static const bool MaySupportMemoryTagging = true;
+  template <class A>
+  using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs.
+
+  struct Primary {
+    using SizeClassMap = AndroidSizeClassMap;
+    static const bool EnableMultiRegions = true;
+    static const bool EnableRandomOffset = false;
+    static const uptr RegionSizeLog = 18U;
+    static const uptr GroupSizeLog = 18U;
+    static const uptr MapSizeIncrement = 1UL << 18;
+    static const s32 MinReleaseToOsIntervalMs = 1000;
+    static const s32 MaxReleaseToOsIntervalMs = 1000;
+  };
+  template <typename Config> using PrimaryT = SizeClassAllocator64<Config>;
+
+  struct Secondary {
+    struct Cache {
+      static const u32 EntriesArraySize = 256U;
+      static const u32 QuarantineSize = 32U;
+      static const u32 DefaultMaxEntriesCount = 32U;
+      static const uptr DefaultMaxEntrySize = 2UL << 20;
+      static const s32 MinReleaseToOsIntervalMs = 0;
+      static const s32 MaxReleaseToOsIntervalMs = 1000;
+    };
+    template <typename Config> using CacheT = MapAllocatorCache<Config>;
+  };
+
+  template <typename Config> using SecondaryT = MapAllocator<Config>;
+};
+
 } // namespace scudo
 
 #if SCUDO_FUCHSIA
@@ -219,7 +252,8 @@ struct TestConditionVariableConfig {
 #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                              \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, DefaultConfig)                          \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidConfig)                          \
-  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConditionVariableConfig)
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConditionVariableConfig)            \
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestMultiRegionsConfig)
 #endif
 
 #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE)                             \
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index 1cf3bb51db0e7..5c72da4256004 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -150,6 +150,24 @@ template <typename SizeClassMapT> struct TestConfig5 {
   };
 };
 
+template <typename SizeClassMapT> struct MultiRegionsConfig {
+  static const bool MaySupportMemoryTagging = true;
+  template <typename> using TSDRegistryT = void;
+  template <typename> using PrimaryT = void;
+  template <typename> using SecondaryT = void;
+
+  struct Primary {
+    using SizeClassMap = SizeClassMapT;
+    static const bool EnableMultiRegions = true;
+    static const scudo::uptr RegionSizeLog = 26U;
+    static const scudo::uptr GroupSizeLog = 18U;
+    static const bool EnableRandomOffset = false;
+    static const scudo::uptr MapSizeIncrement = 1UL << 18;
+    static const scudo::s32 MinReleaseToOsIntervalMs = 1000;
+    static const scudo::s32 MaxReleaseToOsIntervalMs = 1000;
+  };
+};
+
 template <template <typename> class BaseConfig, typename SizeClassMapT>
 struct Config : public BaseConfig<SizeClassMapT> {};
 
@@ -168,6 +186,10 @@ struct TestAllocator : public SizeClassAllocator<BaseConfig, SizeClassMapT> {
     this->verifyAllBlocksAreReleasedTestOnly();
     this->unmapTestOnly();
   }
+  // TODO: Remove this when we support `iterateOverBlocks` with multiple-regions
+  // mode.
+  static const bool EnableMultiRegions =
+      scudo::PrimaryConfig<BaseConfig<SizeClassMapT>>::getEnableMultiRegions();
 
   void *operator new(size_t size) {
     void *p = nullptr;
@@ -191,7 +213,8 @@ struct ScudoPrimaryTest : public Test {};
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig2)                            \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig3)                            \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig4)                            \
-  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig5)
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TestConfig5)                            \
+  SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, MultiRegionsConfig)
 #endif
 
 #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE)                             \
@@ -297,6 +320,8 @@ TEST(ScudoPrimaryTest, Primary64OOM) {
 
 SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryIterate) {
   using Primary = TestAllocator<TypeParam, scudo::DefaultSizeClassMap>;
+  if (Primary::EnableMultiRegions)
+    return;
   std::unique_ptr<Primary> Allocator(new Primary);
   Allocator->init(/*ReleaseToOsInterval=*/-1);
   typename Primary::CacheT Cache;

>From 3d6077b648efab61f2fbd7b4245cd325173e76a2 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Tue, 10 Sep 2024 21:40:15 +0000
Subject: [PATCH 3/6] Fix the description in the comment and changed to
 alignment pages

---
 compiler-rt/lib/scudo/standalone/allocator_config.def | 10 +++++-----
 compiler-rt/lib/scudo/standalone/primary64.h          | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/allocator_config.def b/compiler-rt/lib/scudo/standalone/allocator_config.def
index a14541f519f7c..781f84292149e 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_config.def
+++ b/compiler-rt/lib/scudo/standalone/allocator_config.def
@@ -87,15 +87,15 @@ PRIMARY_OPTIONAL(const bool, EnableRandomOffset, false)
 
 // This allows each size class to have multiple regions instead of one. Note
 // that this is an experimental option so it has a few constraints while using.
-//   a. Pointer compaction is diabled. Which means `CompactPtrT` needs to be the
-//      pointer integral type, i.e., uptr.
+//   a. Pointer compaction is disabled. Which means `CompactPtrT` needs to be
+//      a pointer integral type, i.e., uptr.
 //   b. `EnableRandomOffset` needs to be false. Pointer grouping requires
 //      the beginning of allocation address of a region to be aligned with
-//      `GroupSizeLog`. Without pointer compaction, it relies the region to be
-//      allocated with proper alignment and the random offset will break the
+//      `GroupSizeLog`. Without pointer compaction, it requires the region to be
+//      allocated with proper alignment and a random offset will break the
 //      assumption.
 //   c. Condition variable is not supported under this mode. This is still under
-//      developing.
+//      development.
 PRIMARY_OPTIONAL(const bool, EnableMultiRegions, false)
 
 PRIMARY_OPTIONAL(const s32, DefaultReleaseToOsIntervalMs, INT32_MIN)
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 37b3ec2ddddb3..dfde8ec1238e2 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -149,7 +149,7 @@ template <typename Config> class SizeClassAllocator64 {
       // Reserve the space required for the Primary.
       CHECK(ReservedMemory.create(/*Addr=*/0U, RegionSize * NumClasses,
                                   "scudo:primary_reserve", /*Flag=*/0,
-                                  Alignment));
+                                  Alignment / getPageSizeCached()));
       const uptr PrimaryBase = ReservedMemory.getBase();
 
       for (uptr I = 0; I < NumClasses; I++) {
@@ -1389,7 +1389,7 @@ template <typename Config> class SizeClassAllocator64 {
         DisablePtrCompaction ? (1UL << GroupSizeLog) : getPageSizeCached();
     if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize,
                                         "scudo:primary_reserve", MAP_ALLOWNOMEM,
-                                        Alignment))) {
+                                        Alignment / getPageSizeCached()))) {
       Printf("Can't populate a new region for size class %zu.\n",
              getSizeByClassId(ClassId));
       return nullptr;
@@ -1415,9 +1415,9 @@ template <typename Config> class SizeClassAllocator64 {
       const uptr Alignment =
           DisablePtrCompaction ? (1UL << GroupSizeLog) : getPageSizeCached();
       ReservedMemoryT ReservedMemory;
-      if (UNLIKELY(!ReservedMemory.create(/*Addr=*/0U, RegionSize,
-                                          "scudo:primary_reserve",
-                                          MAP_ALLOWNOMEM, Alignment))) {
+      if (UNLIKELY(!ReservedMemory.create(
+              /*Addr=*/0U, RegionSize, "scudo:primary_reserve", MAP_ALLOWNOMEM,
+              Alignment / getPageSizeCached()))) {
         Printf("Can't reserve pages for size class %zu.\n",
                getSizeByClassId(ClassId));
         return 0U;

>From 3762eea246a9f6323b9c1b6a136ddbac804060ab Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Tue, 10 Sep 2024 21:55:31 +0000
Subject: [PATCH 4/6] Change Alignment to AlignmentPages

---
 .../lib/scudo/standalone/mem_map_base.h       |  6 ++---
 .../lib/scudo/standalone/mem_map_fuchsia.h    |  2 +-
 .../lib/scudo/standalone/mem_map_linux.cpp    | 22 +++++++++++--------
 .../lib/scudo/standalone/mem_map_linux.h      |  2 +-
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/mem_map_base.h b/compiler-rt/lib/scudo/standalone/mem_map_base.h
index f4261f035d778..d65b6200b2ede 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_base.h
+++ b/compiler-rt/lib/scudo/standalone/mem_map_base.h
@@ -94,10 +94,10 @@ template <class Derived, typename MemMapTy> class ReservedMemory {
 
   // Reserve a chunk of memory at a suggested address.
   bool create(uptr Addr, uptr Size, const char *Name, uptr Flags = 0,
-              uptr Alignment = getPageSizeCached()) {
+              uptr AlignmentPages = 1) {
     DCHECK(!isCreated());
-    DCHECK_EQ(Alignment % getPageSizeCached(), 0U);
-    return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags, Alignment);
+    return invokeImpl(&Derived::createImpl, Addr, Size, Name, Flags,
+                      AlignmentPages);
   }
 
   // Release the entire reserved memory.
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h
index 3adab733645c4..d2272b31098ee 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h
+++ b/compiler-rt/lib/scudo/standalone/mem_map_fuchsia.h
@@ -58,7 +58,7 @@ class ReservedMemoryFuchsia final
   constexpr ReservedMemoryFuchsia() = default;
 
   bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags,
-                  uptr Alignment);
+                  uptr AlignmentPages);
   void releaseImpl();
   MemMapT dispatchImpl(uptr Addr, uptr Size);
   uptr getBaseImpl() { return Base; }
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp b/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp
index 943b528e379d0..3c5dccc37fe14 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp
+++ b/compiler-rt/lib/scudo/standalone/mem_map_linux.cpp
@@ -127,25 +127,29 @@ void MemMapLinux::releaseAndZeroPagesToOSImpl(uptr From, uptr Size) {
 }
 
 bool ReservedMemoryLinux::createImpl(uptr Addr, uptr Size, const char *Name,
-                                     uptr Flags, uptr Alignment) {
+                                     uptr Flags, uptr AlignmentPages) {
   ReservedMemoryLinux::MemMapT MemMap;
+  const bool NeedToAdjustAlignment = AlignmentPages != 1;
   uptr MapSize = Size;
-  if (Alignment != getPageSizeCached())
-    MapSize += Alignment;
-  if (!MemMap.map(Addr, MapSize, Name, Flags | MAP_NOACCESS))
-    return false;
 
-  if (Alignment != getPageSizeCached()) {
+  if (LIKELY(!NeedToAdjustAlignment)) {
+    if (!MemMap.map(Addr, MapSize, Name, Flags | MAP_NOACCESS))
+      return false;
+  } else {
+    const uptr Alignment = AlignmentPages * getPageSizeCached();
+    MapSize += Alignment;
+    if (!MemMap.map(Addr, MapSize, Name, Flags | MAP_NOACCESS))
+      return false;
     uptr Offset = MemMap.getBase() % Alignment;
     if (Offset != 0) {
       Offset = Alignment - Offset;
       MemMap.unmap(MemMap.getBase(), Offset);
     }
     MemMap.unmap(MemMap.getBase() + Size, MemMap.getCapacity() - Size);
-  }
 
-  DCHECK_EQ(MemMap.getBase() % Alignment, 0);
-  DCHECK_EQ(MemMap.getCapacity(), Size);
+    DCHECK_EQ(MemMap.getBase() % Alignment, 0);
+    DCHECK_EQ(MemMap.getCapacity(), Size);
+  }
 
   MapBase = MemMap.getBase();
   MapCapacity = MemMap.getCapacity();
diff --git a/compiler-rt/lib/scudo/standalone/mem_map_linux.h b/compiler-rt/lib/scudo/standalone/mem_map_linux.h
index 9f61d8d1f47ef..10f0050fd6323 100644
--- a/compiler-rt/lib/scudo/standalone/mem_map_linux.h
+++ b/compiler-rt/lib/scudo/standalone/mem_map_linux.h
@@ -52,7 +52,7 @@ class ReservedMemoryLinux final
 
   // These threes are specific to `ReservedMemory`.
   bool createImpl(uptr Addr, uptr Size, const char *Name, uptr Flags,
-                  uptr Alignment);
+                  uptr AlignmentPages);
   void releaseImpl();
   MemMapT dispatchImpl(uptr Addr, uptr Size);
 

>From ecc261d4e971e976c43d1058683b59e0c05de8f0 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Tue, 17 Sep 2024 19:13:55 +0000
Subject: [PATCH 5/6] Address review comments

---
 compiler-rt/lib/scudo/standalone/primary64.h | 39 ++++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index dfde8ec1238e2..349b9c650f97a 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -59,14 +59,13 @@ template <typename Config> class SizeClassAllocator64 {
   // Local cache stores the pointers in the type of compacted pointer and the
   // compaction is done by calculating the offset to the base address of a
   // region. Currently, we don't support decompacting through multiple regions
-  // because of the concern of performance and so we disable the pointer
-  // compaction.
+  // duo to performance concern, we disable the pointer compaction.
   // TODO(chiahungduan): Allow local cache store the raw pointer and keep
   // storing the compacted pointers in each region to save memory.
   static const bool DisablePtrCompaction = Config::getEnableMultiRegions();
   static_assert(!DisablePtrCompaction || sizeof(CompactPtrT) == sizeof(uptr),
                 "Pointer compaction is disabled, `CompactPtrT` needs to be the "
-                "same size of `uptr`");
+                "same size as `uptr`");
   typedef SizeClassAllocator64<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
   typedef TransferBatch<ThisT> TransferBatchT;
@@ -143,7 +142,8 @@ template <typename Config> class SizeClassAllocator64 {
       // Block grouping requires the base address of a Region to be aligned
       // with GrouopSize and pointer is compacted according to the offset to the
       // base of a region so it always meets the requirement. As a result when
-      // the compaction is disabled, it relies the base address to be aligned.
+      // the compaction is disabled, it relies on the base address to be
+      // aligned.
       const uptr Alignment =
           DisablePtrCompaction ? (1UL << GroupSizeLog) : PageSize;
       // Reserve the space required for the Primary.
@@ -358,10 +358,9 @@ template <typename Config> class SizeClassAllocator64 {
     };
 
     // When multiple-regions is enabled, we need to sort the array to dispatch
-    // the blocks to different regions efficiently. Thus even we don't put
-    // BatchClass into groups, sorting is still necessary and it'll be handled
-    // later in the function.
-    // TODO: Reorder the use of variable
+    // the blocks to different regions efficiently. If we don't put BatchClass
+    // into groups, sorting is still necessary and it'll be handled later in the
+    // function.
     RegionInfo *Region = RegionInfoManager.getCurRegionInfo(ClassId);
     if (ClassId == SizeClassMap::BatchClassId &&
         !Config::getEnableMultiRegions()) {
@@ -548,9 +547,9 @@ template <typename Config> class SizeClassAllocator64 {
     auto RegionInfoIter = RegionInfoManager.getRegionInfoIter(ClassId);
 
     do {
-      // Note that the tryLock() may fail spuriously, given that it should
-      // rarely happen and page releasing is fine to skip, we don't take certain
-      // approaches to ensure one page release is done.
+      // Note that the tryLock() can fail under certain circumstances. Since
+      // this should be a rare occurrence, there is no need to do anything to
+      // force at least one `releaseToOSMaybe()` is called().
       if (RegionInfoIter->MMLock.tryLock()) {
         uptr BytesReleased =
             releaseToOSMaybe(RegionInfoIter.get(), ClassId, ReleaseType);
@@ -612,7 +611,7 @@ template <typename Config> class SizeClassAllocator64 {
   static BlockInfo findNearestBlock(const char *RegionInfoData,
                                     uptr Ptr) NO_THREAD_SAFETY_ANALYSIS {
     if (Config::getEnableMultiRegions()) {
-      Printf("MultiRegions hasn't supported finding nearest block yet.\n");
+      Printf("MultiRegions doesn't supported finding nearest block yet.\n");
       return {};
     }
     const RegionInfo *RegionInfoArray =
@@ -758,8 +757,8 @@ template <typename Config> class SizeClassAllocator64 {
           return nullptr;
         return &Array[Size++];
       }
-      // The amount memory used by this allocator is about (NumEntries *
-      // RegionSize). For example, region with size 256 KB will have 2GB space
+      // The amount of memory used by this allocator is about (NumEntries *
+      // RegionSize). For example, a 256 KB region will have 2GB space
       // available.
       // TODO(chiahungduan): Consider having this configurable.
       static constexpr uptr NumEntries = 1UL << 13;
@@ -846,15 +845,15 @@ template <typename Config> class SizeClassAllocator64 {
                             RegionInfoLock[ClassId]);
     }
 
-    // RegionInfos for the same size class will be stored in the order of base
-    // address. Which means every RegionInfo visiting will be starting from
-    // lowest address and which aligns with how pointer grouping works.
+    // RegionInfos for the same size class will be ordered by base address.
+    // Which means every RegionInfo visiting will be starting from lowest
+    // address and which aligns with how pointer grouping works.
     void pushRegionInfo(RegionInfo *Region, uptr ClassId)
         REQUIRES(Region->MMLock) {
       DCHECK_LT(ClassId, NumClasses);
       DCHECK(Region->MemMapInfo.MemMap.isAllocated());
 
-      // The creation of new region requires holding the MMLock of current
+      // The creation of a new region requires holding the MMLock of the current
       // region to ensure only one thread is allocating the new region.
       CurrentRegionInfo[ClassId].P->MMLock.assertHeld();
 
@@ -903,8 +902,8 @@ template <typename Config> class SizeClassAllocator64 {
              sizeof(RegionInfoPointer) * NumClasses);
     }
 
-    // Scudo requires the data member constant initializable. Array of raw
-    // pointers doesn't meet the condition. Therefore, wrap the pointer in the
+    // Scudo requires the data member constant initializable. An array of raw
+    // pointers doesn't meet this condition. Therefore, wrap the pointer in the
     // struct to make it a compound type which is constant intializable.
     struct RegionInfoPointer {
       RegionInfo *P = nullptr;

>From afe733cad38df32bd34d3d2dba0f0601c4dee8f7 Mon Sep 17 00:00:00 2001
From: Chia-hung Duan <chiahungduan at google.com>
Date: Fri, 13 Jun 2025 20:32:22 +0000
Subject: [PATCH 6/6] [scudo] Make block storage in TransferBatch trailing
 objects

This allows us to change the number of blocks stored according to the
size of BatchClass.

Also change the name `TransferBatch` to `Batch` given that it's never
the unit of transferring blocks.
---
 .../lib/scudo/standalone/allocator_common.h   |  26 ++--
 compiler-rt/lib/scudo/standalone/primary32.h  | 100 +++++++-------
 compiler-rt/lib/scudo/standalone/primary64.h  | 125 +++++++++---------
 compiler-rt/lib/scudo/standalone/release.h    |   4 +-
 .../scudo/standalone/tests/primary_test.cpp   |   4 +-
 5 files changed, 131 insertions(+), 128 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/allocator_common.h b/compiler-rt/lib/scudo/standalone/allocator_common.h
index 2b77516ad11ca..c37dc7a263fba 100644
--- a/compiler-rt/lib/scudo/standalone/allocator_common.h
+++ b/compiler-rt/lib/scudo/standalone/allocator_common.h
@@ -14,7 +14,7 @@
 
 namespace scudo {
 
-template <class SizeClassAllocator> struct TransferBatch {
+template <class SizeClassAllocator> struct Batch {
   typedef typename SizeClassAllocator::SizeClassMap SizeClassMap;
   typedef typename SizeClassAllocator::CompactPtrT CompactPtrT;
 
@@ -22,19 +22,19 @@ template <class SizeClassAllocator> struct TransferBatch {
   void setFromArray(CompactPtrT *Array, u16 N) {
     DCHECK_LE(N, MaxNumCached);
     Count = N;
-    memcpy(Batch, Array, sizeof(Batch[0]) * Count);
+    memcpy(Blocks, Array, sizeof(Blocks[0]) * Count);
   }
   void appendFromArray(CompactPtrT *Array, u16 N) {
     DCHECK_LE(N, MaxNumCached - Count);
-    memcpy(Batch + Count, Array, sizeof(Batch[0]) * N);
+    memcpy(Blocks + Count, Array, sizeof(Blocks[0]) * N);
     // u16 will be promoted to int by arithmetic type conversion.
     Count = static_cast<u16>(Count + N);
   }
-  void appendFromTransferBatch(TransferBatch *B, u16 N) {
+  void appendFromBatch(Batch *B, u16 N) {
     DCHECK_LE(N, MaxNumCached - Count);
     DCHECK_GE(B->Count, N);
     // Append from the back of `B`.
-    memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
+    memcpy(Blocks + Count, B->Blocks + (B->Count - N), sizeof(Blocks[0]) * N);
     // u16 will be promoted to int by arithmetic type conversion.
     Count = static_cast<u16>(Count + N);
     B->Count = static_cast<u16>(B->Count - N);
@@ -43,29 +43,29 @@ template <class SizeClassAllocator> struct TransferBatch {
   bool empty() { return Count == 0; }
   void add(CompactPtrT P) {
     DCHECK_LT(Count, MaxNumCached);
-    Batch[Count++] = P;
+    Blocks[Count++] = P;
   }
   void moveToArray(CompactPtrT *Array) {
-    memcpy(Array, Batch, sizeof(Batch[0]) * Count);
+    memcpy(Array, Blocks, sizeof(Blocks[0]) * Count);
     clear();
   }
 
   void moveNToArray(CompactPtrT *Array, u16 N) {
     DCHECK_LE(N, Count);
-    memcpy(Array, Batch + Count - N, sizeof(Batch[0]) * N);
+    memcpy(Array, Blocks + Count - N, sizeof(Blocks[0]) * N);
     Count = static_cast<u16>(Count - N);
   }
   u16 getCount() const { return Count; }
   bool isEmpty() const { return Count == 0U; }
   CompactPtrT get(u16 I) const {
     DCHECK_LE(I, Count);
-    return Batch[I];
+    return Blocks[I];
   }
-  TransferBatch *Next;
+  Batch *Next;
 
 private:
-  CompactPtrT Batch[MaxNumCached];
   u16 Count;
+  CompactPtrT Blocks[];
 };
 
 // A BatchGroup is used to collect blocks. Each group has a group id to
@@ -83,8 +83,8 @@ template <class SizeClassAllocator> struct BatchGroup {
   // This is used to track how many bytes are not in-use since last time we
   // tried to release pages.
   uptr BytesInBGAtLastCheckpoint;
-  // Blocks are managed by TransferBatch in a list.
-  SinglyLinkedList<TransferBatch<SizeClassAllocator>> Batches;
+  // Blocks are managed by Batch in a list.
+  SinglyLinkedList<Batch<SizeClassAllocator>> Batches;
 };
 
 } // namespace scudo
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 57b762d1baf77..d5a6103619358 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -34,7 +34,7 @@ namespace scudo {
 // predictable address pattern (the predictability increases with the block
 // size).
 //
-// Regions for size class 0 are special and used to hold TransferBatches, which
+// Regions for size class 0 are special and used to hold Batches, which
 // allow to transfer arrays of pointers from the global size class freelist to
 // the thread specific freelist for said class, and back.
 //
@@ -53,15 +53,20 @@ template <typename Config> class SizeClassAllocator32 {
                 "");
   typedef SizeClassAllocator32<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
-  typedef TransferBatch<ThisT> TransferBatchT;
+  typedef Batch<ThisT> BatchT;
   typedef BatchGroup<ThisT> BatchGroupT;
 
-  static_assert(sizeof(BatchGroupT) <= sizeof(TransferBatchT),
-                "BatchGroupT uses the same class size as TransferBatchT");
+  static constexpr uptr getSizeOfBatchClass() {
+    const uptr HeaderSize = sizeof(BatchT);
+    return HeaderSize + sizeof(CompactPtrT) * BatchT::MaxNumCached;
+  }
+
+  static_assert(sizeof(BatchGroupT) <= getSizeOfBatchClass(),
+                "BatchGroupT also uses BatchClass");
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
-               ? sizeof(TransferBatchT)
+               ? getSizeOfBatchClass()
                : SizeClassMap::getSizeByClassId(ClassId);
   }
 
@@ -124,7 +129,7 @@ template <typename Config> class SizeClassAllocator32 {
 
   // When all blocks are freed, it has to be the same size as `AllocatedUser`.
   void verifyAllBlocksAreReleasedTestOnly() {
-    // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass.
+    // `BatchGroup` and `Batch` also use the blocks from BatchClass.
     uptr BatchClassUsedInFreeLists = 0;
     for (uptr I = 0; I < NumClasses; I++) {
       // We have to count BatchClassUsedInFreeLists in other regions first.
@@ -134,7 +139,7 @@ template <typename Config> class SizeClassAllocator32 {
       ScopedLock L1(Sci->Mutex);
       uptr TotalBlocks = 0;
       for (BatchGroupT &BG : Sci->FreeListInfo.BlockList) {
-        // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
+        // `BG::Batches` are `Batches`. +1 for `BatchGroup`.
         BatchClassUsedInFreeLists += BG.Batches.size() + 1;
         for (const auto &It : BG.Batches)
           TotalBlocks += It.getCount();
@@ -153,7 +158,7 @@ template <typename Config> class SizeClassAllocator32 {
         for (const auto &It : BG.Batches)
           TotalBlocks += It.getCount();
       } else {
-        // `BatchGroup` with empty freelist doesn't have `TransferBatch` record
+        // `BatchGroup` with empty freelist doesn't have `Batch` record
         // itself.
         ++TotalBlocks;
       }
@@ -478,13 +483,13 @@ template <typename Config> class SizeClassAllocator32 {
       REQUIRES(Sci->Mutex) {
     DCHECK_EQ(Sci, getSizeClassInfo(SizeClassMap::BatchClassId));
 
-    // Free blocks are recorded by TransferBatch in freelist for all
-    // size-classes. In addition, TransferBatch is allocated from BatchClassId.
+    // Free blocks are recorded by Batch in freelist for all
+    // size-classes. In addition, Batch is allocated from BatchClassId.
     // In order not to use additional block to record the free blocks in
-    // BatchClassId, they are self-contained. I.e., A TransferBatch records the
+    // BatchClassId, they are self-contained. I.e., A Batch records the
     // block address of itself. See the figure below:
     //
-    // TransferBatch at 0xABCD
+    // Batch at 0xABCD
     // +----------------------------+
     // | Free blocks' addr          |
     // | +------+------+------+     |
@@ -492,25 +497,25 @@ template <typename Config> class SizeClassAllocator32 {
     // | +------+------+------+     |
     // +----------------------------+
     //
-    // When we allocate all the free blocks in the TransferBatch, the block used
-    // by TransferBatch is also free for use. We don't need to recycle the
-    // TransferBatch. Note that the correctness is maintained by the invariant,
+    // When we allocate all the free blocks in the Batch, the block used
+    // by Batch is also free for use. We don't need to recycle the
+    // Batch. Note that the correctness is maintained by the invariant,
     //
-    //   Each popBlocks() request returns the entire TransferBatch. Returning
-    //   part of the blocks in a TransferBatch is invalid.
+    //   Each popBlocks() request returns the entire Batch. Returning
+    //   part of the blocks in a Batch is invalid.
     //
-    // This ensures that TransferBatch won't leak the address itself while it's
+    // This ensures that Batch won't leak the address itself while it's
     // still holding other valid data.
     //
     // Besides, BatchGroup is also allocated from BatchClassId and has its
-    // address recorded in the TransferBatch too. To maintain the correctness,
+    // address recorded in the Batch too. To maintain the correctness,
     //
-    //   The address of BatchGroup is always recorded in the last TransferBatch
+    //   The address of BatchGroup is always recorded in the last Batch
     //   in the freelist (also imply that the freelist should only be
-    //   updated with push_front). Once the last TransferBatch is popped,
+    //   updated with push_front). Once the last Batch is popped,
     //   the block used by BatchGroup is also free for use.
     //
-    // With this approach, the blocks used by BatchGroup and TransferBatch are
+    // With this approach, the blocks used by BatchGroup and Batch are
     // reusable and don't need additional space for them.
 
     Sci->FreeListInfo.PushedBlocks += Size;
@@ -542,12 +547,12 @@ template <typename Config> class SizeClassAllocator32 {
     //   1. just allocated a new `BatchGroup`.
     //   2. Only 1 block is pushed when the freelist is empty.
     if (BG->Batches.empty()) {
-      // Construct the `TransferBatch` on the last element.
-      TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
+      // Construct the `Batch` on the last element.
+      BatchT *TB = reinterpret_cast<BatchT *>(
           decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
       TB->clear();
-      // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
-      // recorded in the TransferBatch.
+      // As mentioned above, addresses of `Batch` and `BatchGroup` are
+      // recorded in the Batch.
       TB->add(Array[Size - 1]);
       TB->add(
           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
@@ -558,14 +563,14 @@ template <typename Config> class SizeClassAllocator32 {
       BG->Batches.push_front(TB);
     }
 
-    TransferBatchT *CurBatch = BG->Batches.front();
+    BatchT *CurBatch = BG->Batches.front();
     DCHECK_NE(CurBatch, nullptr);
 
     for (u32 I = 0; I < Size;) {
       u16 UnusedSlots =
           static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
       if (UnusedSlots == 0) {
-        CurBatch = reinterpret_cast<TransferBatchT *>(
+        CurBatch = reinterpret_cast<BatchT *>(
             decompactPtr(SizeClassMap::BatchClassId, Array[I]));
         CurBatch->clear();
         // Self-contained
@@ -595,7 +600,7 @@ template <typename Config> class SizeClassAllocator32 {
   //                            TB
   //
   // Each BlockGroup(BG) will associate with unique group id and the free blocks
-  // are managed by a list of TransferBatch(TB). To reduce the time of inserting
+  // are managed by a list of Batch(TB). To reduce the time of inserting
   // blocks, BGs are sorted and the input `Array` are supposed to be sorted so
   // that we can get better performance of maintaining sorted property.
   // Use `SameGroup=true` to indicate that all blocks in the array are from the
@@ -612,22 +617,21 @@ template <typename Config> class SizeClassAllocator32 {
       BatchGroupT *BG =
           reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatchT *TB =
-          reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
+      BatchT *TB = reinterpret_cast<BatchT *>(C->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
+      BG->MaxCachedPerBatch = BatchT::MaxNumCached;
 
       return BG;
     };
 
     auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) {
-      SinglyLinkedList<TransferBatchT> &Batches = BG->Batches;
-      TransferBatchT *CurBatch = Batches.front();
+      SinglyLinkedList<BatchT> &Batches = BG->Batches;
+      BatchT *CurBatch = Batches.front();
       DCHECK_NE(CurBatch, nullptr);
 
       for (u32 I = 0; I < Size;) {
@@ -635,8 +639,7 @@ template <typename Config> class SizeClassAllocator32 {
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch =
-              reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
+          CurBatch = reinterpret_cast<BatchT *>(C->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -718,7 +721,7 @@ template <typename Config> class SizeClassAllocator32 {
     if (Sci->FreeListInfo.BlockList.empty())
       return 0U;
 
-    SinglyLinkedList<TransferBatchT> &Batches =
+    SinglyLinkedList<BatchT> &Batches =
         Sci->FreeListInfo.BlockList.front()->Batches;
 
     if (Batches.empty()) {
@@ -727,8 +730,8 @@ template <typename Config> class SizeClassAllocator32 {
       Sci->FreeListInfo.BlockList.pop_front();
 
       // Block used by `BatchGroup` is from BatchClassId. Turn the block into
-      // `TransferBatch` with single block.
-      TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
+      // `Batch` with single block.
+      BatchT *TB = reinterpret_cast<BatchT *>(BG);
       ToArray[0] =
           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
       Sci->FreeListInfo.PoppedBlocks += 1;
@@ -736,18 +739,18 @@ template <typename Config> class SizeClassAllocator32 {
     }
 
     // So far, instead of always filling the blocks to `MaxBlockCount`, we only
-    // examine single `TransferBatch` to minimize the time spent on the primary
-    // allocator. Besides, the sizes of `TransferBatch` and
+    // examine single `Batch` to minimize the time spent on the primary
+    // allocator. Besides, the sizes of `Batch` and
     // `CacheT::getMaxCached()` may also impact the time spent on accessing the
     // primary allocator.
     // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
-    // blocks and/or adjust the size of `TransferBatch` according to
+    // blocks and/or adjust the size of `Batch` according to
     // `CacheT::getMaxCached()`.
-    TransferBatchT *B = Batches.front();
+    BatchT *B = Batches.front();
     DCHECK_NE(B, nullptr);
     DCHECK_GT(B->getCount(), 0U);
 
-    // BachClassId should always take all blocks in the TransferBatch. Read the
+    // BachClassId should always take all blocks in the Batch. Read the
     // comment in `pushBatchClassBlocks()` for more details.
     const u16 PopCount = ClassId == SizeClassMap::BatchClassId
                              ? B->getCount()
@@ -758,7 +761,7 @@ template <typename Config> class SizeClassAllocator32 {
     // done without holding `Mutex`.
     if (B->empty()) {
       Batches.pop_front();
-      // `TransferBatch` of BatchClassId is self-contained, no need to
+      // `Batch` of BatchClassId is self-contained, no need to
       // deallocate. Read the comment in `pushBatchClassBlocks()` for more
       // details.
       if (ClassId != SizeClassMap::BatchClassId)
@@ -771,7 +774,7 @@ template <typename Config> class SizeClassAllocator32 {
         // We don't keep BatchGroup with zero blocks to avoid empty-checking
         // while allocating. Note that block used for constructing BatchGroup is
         // recorded as free blocks in the last element of BatchGroup::Batches.
-        // Which means, once we pop the last TransferBatch, the block is
+        // Which means, once we pop the last Batch, the block is
         // implicitly deallocated.
         if (ClassId != SizeClassMap::BatchClassId)
           C->deallocate(SizeClassMap::BatchClassId, BG);
@@ -817,8 +820,7 @@ template <typename Config> class SizeClassAllocator32 {
             static_cast<u32>((RegionSize - Offset) / Size));
     DCHECK_GT(NumberOfBlocks, 0U);
 
-    constexpr u32 ShuffleArraySize =
-        MaxNumBatches * TransferBatchT::MaxNumCached;
+    constexpr u32 ShuffleArraySize = MaxNumBatches * BatchT::MaxNumCached;
     // Fill the transfer batches and put them in the size-class freelist. We
     // need to randomize the blocks for security purposes, so we first fill a
     // local array that we then shuffle before populating the batches.
@@ -1098,7 +1100,7 @@ template <typename Config> class SizeClassAllocator32 {
       if (AllocatedGroupSize == 0)
         continue;
 
-      // TransferBatches are pushed in front of BG.Batches. The first one may
+      // Batches are pushed in front of BG.Batches. The first one may
       // not have all caches used.
       const uptr NumBlocks = (BG.Batches.size() - 1) * BG.MaxCachedPerBatch +
                              BG.Batches.front()->getCount();
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 349b9c650f97a..ddfed9a7939d2 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -38,7 +38,7 @@ namespace scudo {
 // they belong to. The Blocks created are shuffled to prevent predictable
 // address patterns (the predictability increases with the size of the Blocks).
 //
-// The 1st Region (for size class 0) holds the TransferBatches. This is a
+// The 1st Region (for size class 0) holds the Batches. This is a
 // structure used to transfer arrays of available pointers from the class size
 // freelist to the thread specific freelist, and back.
 //
@@ -68,15 +68,21 @@ template <typename Config> class SizeClassAllocator64 {
                 "same size as `uptr`");
   typedef SizeClassAllocator64<Config> ThisT;
   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
-  typedef TransferBatch<ThisT> TransferBatchT;
+  typedef Batch<ThisT> BatchT;
   typedef BatchGroup<ThisT> BatchGroupT;
 
-  static_assert(sizeof(BatchGroupT) <= sizeof(TransferBatchT),
-                "BatchGroupT uses the same class size as TransferBatchT");
+  static constexpr uptr getSizeOfBatchClass() {
+    const uptr HeaderSize = sizeof(BatchT);
+    return roundUp(HeaderSize + sizeof(CompactPtrT) * BatchT::MaxNumCached,
+                   1U << CompactPtrScale);
+  }
+
+  static_assert(sizeof(BatchGroupT) <= getSizeOfBatchClass(),
+                "BatchGroupT also uses BatchClass");
 
   static uptr getSizeByClassId(uptr ClassId) {
     return (ClassId == SizeClassMap::BatchClassId)
-               ? roundUp(sizeof(TransferBatchT), 1U << CompactPtrScale)
+               ? getSizeOfBatchClass()
                : SizeClassMap::getSizeByClassId(ClassId);
   }
 
@@ -193,7 +199,7 @@ template <typename Config> class SizeClassAllocator64 {
   void verifyAllBlocksAreReleasedTestOnly() {
     uptr NumRegionInfo = 0;
     // TODO: Verify all pointers are belong to the right region
-    // `BatchGroup` and `TransferBatch` also use the blocks from BatchClass.
+    // `BatchGroup` and `Batch` also use the blocks from BatchClass.
     uptr BatchClassUsedInFreeLists = 0;
     for (uptr I = 0; I < NumClasses; I++) {
       // We have to count BatchClassUsedInFreeLists in other regions first.
@@ -209,7 +215,7 @@ template <typename Config> class SizeClassAllocator64 {
         const uptr BlockSize = getSizeByClassId(I);
         uptr TotalBlocks = 0;
         for (BatchGroupT &BG : RegionInfoIter->FreeListInfo.BlockList) {
-          // `BG::Batches` are `TransferBatches`. +1 for `BatchGroup`.
+          // `BG::Batches` are `Batches`. +1 for `BatchGroup`.
           BatchClassUsedInFreeLists += BG.Batches.size() + 1;
           for (const auto &It : BG.Batches)
             TotalBlocks += It.getCount();
@@ -239,7 +245,7 @@ template <typename Config> class SizeClassAllocator64 {
           for (const auto &It : BG.Batches)
             TotalBlocks += It.getCount();
         } else {
-          // `BatchGroup` with empty freelist doesn't have `TransferBatch`
+          // `BatchGroup` with empty freelist doesn't have `Batch`
           // record itself.
           ++TotalBlocks;
         }
@@ -995,13 +1001,13 @@ template <typename Config> class SizeClassAllocator64 {
       CHECK(IsBatchClass);
     }
 
-    // Free blocks are recorded by TransferBatch in freelist for all
-    // size-classes. In addition, TransferBatch is allocated from BatchClassId.
+    // Free blocks are recorded by Batch in freelist for all
+    // size-classes. In addition, Batch is allocated from BatchClassId.
     // In order not to use additional block to record the free blocks in
-    // BatchClassId, they are self-contained. I.e., A TransferBatch records the
+    // BatchClassId, they are self-contained. I.e., A Batch records the
     // block address of itself. See the figure below:
     //
-    // TransferBatch at 0xABCD
+    // Batch at 0xABCD
     // +----------------------------+
     // | Free blocks' addr          |
     // | +------+------+------+     |
@@ -1009,25 +1015,25 @@ template <typename Config> class SizeClassAllocator64 {
     // | +------+------+------+     |
     // +----------------------------+
     //
-    // When we allocate all the free blocks in the TransferBatch, the block used
-    // by TransferBatch is also free for use. We don't need to recycle the
-    // TransferBatch. Note that the correctness is maintained by the invariant,
+    // When we allocate all the free blocks in the Batch, the block used
+    // by Batch is also free for use. We don't need to recycle the
+    // Batch. Note that the correctness is maintained by the invariant,
     //
-    //   Each popBlocks() request returns the entire TransferBatch. Returning
-    //   part of the blocks in a TransferBatch is invalid.
+    //   Each popBlocks() request returns the entire Batch. Returning
+    //   part of the blocks in a Batch is invalid.
     //
-    // This ensures that TransferBatch won't leak the address itself while it's
+    // This ensures that Batch won't leak the address itself while it's
     // still holding other valid data.
     //
     // Besides, BatchGroup is also allocated from BatchClassId and has its
-    // address recorded in the TransferBatch too. To maintain the correctness,
+    // address recorded in the Batch too. To maintain the correctness,
     //
-    //   The address of BatchGroup is always recorded in the last TransferBatch
+    //   The address of BatchGroup is always recorded in the last Batch
     //   in the freelist (also imply that the freelist should only be
-    //   updated with push_front). Once the last TransferBatch is popped,
+    //   updated with push_front). Once the last Batch is popped,
     //   the block used by BatchGroup is also free for use.
     //
-    // With this approach, the blocks used by BatchGroup and TransferBatch are
+    // With this approach, the blocks used by BatchGroup and Batch are
     // reusable and don't need additional space for them.
 
     Region->FreeListInfo.PushedBlocks += Size;
@@ -1059,12 +1065,12 @@ template <typename Config> class SizeClassAllocator64 {
     //   1. just allocated a new `BatchGroup`.
     //   2. Only 1 block is pushed when the freelist is empty.
     if (BG->Batches.empty()) {
-      // Construct the `TransferBatch` on the last element.
-      TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(
+      // Construct the `Batch` on the last element.
+      BatchT *TB = reinterpret_cast<BatchT *>(
           decompactPtr(SizeClassMap::BatchClassId, Array[Size - 1]));
       TB->clear();
-      // As mentioned above, addresses of `TransferBatch` and `BatchGroup` are
-      // recorded in the TransferBatch.
+      // As mentioned above, addresses of `Batch` and `BatchGroup` are
+      // recorded in the Batch.
       TB->add(Array[Size - 1]);
       TB->add(
           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(BG)));
@@ -1075,14 +1081,14 @@ template <typename Config> class SizeClassAllocator64 {
       BG->Batches.push_front(TB);
     }
 
-    TransferBatchT *CurBatch = BG->Batches.front();
+    BatchT *CurBatch = BG->Batches.front();
     DCHECK_NE(CurBatch, nullptr);
 
     for (u32 I = 0; I < Size;) {
       u16 UnusedSlots =
           static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
       if (UnusedSlots == 0) {
-        CurBatch = reinterpret_cast<TransferBatchT *>(
+        CurBatch = reinterpret_cast<BatchT *>(
             decompactPtr(SizeClassMap::BatchClassId, Array[I]));
         CurBatch->clear();
         // Self-contained
@@ -1113,7 +1119,7 @@ template <typename Config> class SizeClassAllocator64 {
   //                            TB
   //
   // Each BlockGroup(BG) will associate with unique group id and the free blocks
-  // are managed by a list of TransferBatch(TB). To reduce the time of inserting
+  // are managed by a list of Batch(TB). To reduce the time of inserting
   // blocks, BGs are sorted and the input `Array` are supposed to be sorted so
   // that we can get better performance of maintaining sorted property.
   // Use `SameGroup=true` to indicate that all blocks in the array are from the
@@ -1128,22 +1134,21 @@ template <typename Config> class SizeClassAllocator64 {
       BatchGroupT *BG =
           reinterpret_cast<BatchGroupT *>(C->getBatchClassBlock());
       BG->Batches.clear();
-      TransferBatchT *TB =
-          reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
+      BatchT *TB = reinterpret_cast<BatchT *>(C->getBatchClassBlock());
       TB->clear();
 
       BG->CompactPtrGroupBase = CompactPtrGroupBase;
       BG->Batches.push_front(TB);
       BG->PushedBlocks = 0;
       BG->BytesInBGAtLastCheckpoint = 0;
-      BG->MaxCachedPerBatch = TransferBatchT::MaxNumCached;
+      BG->MaxCachedPerBatch = BatchT::MaxNumCached;
 
       return BG;
     };
 
     auto InsertBlocks = [&](BatchGroupT *BG, CompactPtrT *Array, u32 Size) {
-      SinglyLinkedList<TransferBatchT> &Batches = BG->Batches;
-      TransferBatchT *CurBatch = Batches.front();
+      SinglyLinkedList<BatchT> &Batches = BG->Batches;
+      BatchT *CurBatch = Batches.front();
       DCHECK_NE(CurBatch, nullptr);
 
       for (u32 I = 0; I < Size;) {
@@ -1151,8 +1156,7 @@ template <typename Config> class SizeClassAllocator64 {
         u16 UnusedSlots =
             static_cast<u16>(BG->MaxCachedPerBatch - CurBatch->getCount());
         if (UnusedSlots == 0) {
-          CurBatch =
-              reinterpret_cast<TransferBatchT *>(C->getBatchClassBlock());
+          CurBatch = reinterpret_cast<BatchT *>(C->getBatchClassBlock());
           CurBatch->clear();
           Batches.push_front(CurBatch);
           UnusedSlots = BG->MaxCachedPerBatch;
@@ -1309,7 +1313,7 @@ template <typename Config> class SizeClassAllocator64 {
     if (Region->FreeListInfo.BlockList.empty())
       return 0U;
 
-    SinglyLinkedList<TransferBatchT> &Batches =
+    SinglyLinkedList<BatchT> &Batches =
         Region->FreeListInfo.BlockList.front()->Batches;
 
     if (Batches.empty()) {
@@ -1318,8 +1322,8 @@ template <typename Config> class SizeClassAllocator64 {
       Region->FreeListInfo.BlockList.pop_front();
 
       // Block used by `BatchGroup` is from BatchClassId. Turn the block into
-      // `TransferBatch` with single block.
-      TransferBatchT *TB = reinterpret_cast<TransferBatchT *>(BG);
+      // `Batch` with single block.
+      BatchT *TB = reinterpret_cast<BatchT *>(BG);
       ToArray[0] =
           compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(TB));
       Region->FreeListInfo.PoppedBlocks += 1;
@@ -1327,18 +1331,18 @@ template <typename Config> class SizeClassAllocator64 {
     }
 
     // So far, instead of always filling blocks to `MaxBlockCount`, we only
-    // examine single `TransferBatch` to minimize the time spent in the primary
-    // allocator. Besides, the sizes of `TransferBatch` and
+    // examine single `Batch` to minimize the time spent in the primary
+    // allocator. Besides, the sizes of `Batch` and
     // `CacheT::getMaxCached()` may also impact the time spent on accessing the
     // primary allocator.
     // TODO(chiahungduan): Evaluate if we want to always prepare `MaxBlockCount`
-    // blocks and/or adjust the size of `TransferBatch` according to
+    // blocks and/or adjust the size of `Batch` according to
     // `CacheT::getMaxCached()`.
-    TransferBatchT *B = Batches.front();
+    BatchT *B = Batches.front();
     DCHECK_NE(B, nullptr);
     DCHECK_GT(B->getCount(), 0U);
 
-    // BachClassId should always take all blocks in the TransferBatch. Read the
+    // BachClassId should always take all blocks in the Batch. Read the
     // comment in `pushBatchClassBlocks()` for more details.
     const u16 PopCount = ClassId == SizeClassMap::BatchClassId
                              ? B->getCount()
@@ -1349,7 +1353,7 @@ template <typename Config> class SizeClassAllocator64 {
     // done without holding `FLLock`.
     if (B->empty()) {
       Batches.pop_front();
-      // `TransferBatch` of BatchClassId is self-contained, no need to
+      // `Batch` of BatchClassId is self-contained, no need to
       // deallocate. Read the comment in `pushBatchClassBlocks()` for more
       // details.
       if (ClassId != SizeClassMap::BatchClassId)
@@ -1362,7 +1366,7 @@ template <typename Config> class SizeClassAllocator64 {
         // We don't keep BatchGroup with zero blocks to avoid empty-checking
         // while allocating. Note that block used for constructing BatchGroup is
         // recorded as free blocks in the last element of BatchGroup::Batches.
-        // Which means, once we pop the last TransferBatch, the block is
+        // Which means, once we pop the last Batch, the block is
         // implicitly deallocated.
         if (ClassId != SizeClassMap::BatchClassId)
           C->deallocate(SizeClassMap::BatchClassId, BG);
@@ -1463,8 +1467,7 @@ template <typename Config> class SizeClassAllocator64 {
                              Size));
     DCHECK_GT(NumberOfBlocks, 0);
 
-    constexpr u32 ShuffleArraySize =
-        MaxNumBatches * TransferBatchT::MaxNumCached;
+    constexpr u32 ShuffleArraySize = MaxNumBatches * BatchT::MaxNumCached;
     CompactPtrT ShuffleArray[ShuffleArraySize];
     DCHECK_LE(NumberOfBlocks, ShuffleArraySize);
 
@@ -1776,7 +1779,7 @@ template <typename Config> class SizeClassAllocator64 {
       DCHECK_LE(Region->RegionBeg, BatchGroupBase);
       DCHECK_GE(AllocatedUserEnd, BatchGroupBase);
       DCHECK_EQ((Region->RegionBeg - BatchGroupBase) % GroupSize, 0U);
-      // TransferBatches are pushed in front of BG.Batches. The first one may
+      // Batches are pushed in front of BG.Batches. The first one may
       // not have all caches used.
       const uptr NumBlocks = (BG->Batches.size() - 1) * BG->MaxCachedPerBatch +
                              BG->Batches.front()->getCount();
@@ -2029,7 +2032,7 @@ template <typename Config> class SizeClassAllocator64 {
       }
 
       BatchGroupT *Cur = GroupsToRelease.front();
-      TransferBatchT *UnusedTransferBatch = nullptr;
+      BatchT *UnusedBatch = nullptr;
       GroupsToRelease.pop_front();
 
       if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) {
@@ -2039,36 +2042,35 @@ template <typename Config> class SizeClassAllocator64 {
         BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint;
         const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch;
 
-        // Note that the first TransferBatches in both `Batches` may not be
-        // full and only the first TransferBatch can have non-full blocks. Thus
+        // Note that the first Batches in both `Batches` may not be
+        // full and only the first Batch can have non-full blocks. Thus
         // we have to merge them before appending one to another.
         if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) {
           BG->Batches.append_back(&Cur->Batches);
         } else {
-          TransferBatchT *NonFullBatch = Cur->Batches.front();
+          BatchT *NonFullBatch = Cur->Batches.front();
           Cur->Batches.pop_front();
           const u16 NonFullBatchCount = NonFullBatch->getCount();
           // The remaining Batches in `Cur` are full.
           BG->Batches.append_back(&Cur->Batches);
 
           if (BG->Batches.front()->getCount() == MaxCachedPerBatch) {
-            // Only 1 non-full TransferBatch, push it to the front.
+            // Only 1 non-full Batch, push it to the front.
             BG->Batches.push_front(NonFullBatch);
           } else {
             const u16 NumBlocksToMove = static_cast<u16>(
                 Min(static_cast<u16>(MaxCachedPerBatch -
                                      BG->Batches.front()->getCount()),
                     NonFullBatchCount));
-            BG->Batches.front()->appendFromTransferBatch(NonFullBatch,
-                                                         NumBlocksToMove);
+            BG->Batches.front()->appendFromBatch(NonFullBatch, NumBlocksToMove);
             if (NonFullBatch->isEmpty())
-              UnusedTransferBatch = NonFullBatch;
+              UnusedBatch = NonFullBatch;
             else
               BG->Batches.push_front(NonFullBatch);
           }
         }
 
-        const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U;
+        const u32 NeededSlots = UnusedBatch == nullptr ? 1U : 2U;
         if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) {
           ScopedLock L(BatchClassRegion->FLLock);
           pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
@@ -2078,10 +2080,9 @@ template <typename Config> class SizeClassAllocator64 {
         }
         Blocks[Idx++] =
             compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(Cur));
-        if (UnusedTransferBatch) {
-          Blocks[Idx++] =
-              compactPtr(SizeClassMap::BatchClassId,
-                         reinterpret_cast<uptr>(UnusedTransferBatch));
+        if (UnusedBatch) {
+          Blocks[Idx++] = compactPtr(SizeClassMap::BatchClassId,
+                                     reinterpret_cast<uptr>(UnusedBatch));
         }
         Prev = BG;
         BG = BG->Next;
diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index 310eee3be27b2..d3afdee67b77d 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -536,8 +536,8 @@ struct PageReleaseContext {
     return true;
   }
 
-  template <class TransferBatchT, typename DecompactPtrT>
-  bool markFreeBlocksInRegion(const IntrusiveList<TransferBatchT> &FreeList,
+  template <class BatchT, typename DecompactPtrT>
+  bool markFreeBlocksInRegion(const IntrusiveList<BatchT> &FreeList,
                               DecompactPtrT DecompactPtr, const uptr Base,
                               const uptr RegionIndex, const uptr RegionSize,
                               bool MayContainLastBlockInRegion) {
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index 5c72da4256004..073dd2fee9cd6 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -392,8 +392,8 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) {
         auto Pair = V.back();
         Cache.deallocate(Pair.first, Pair.second);
         V.pop_back();
-        // This increases the chance of having non-full TransferBatches and it
-        // will jump into the code path of merging TransferBatches.
+        // This increases the chance of having non-full Batches and it
+        // will jump into the code path of merging Batches.
         if (std::rand() % 8 == 0)
           Cache.drain();
       }