[compiler-rt] 19c26a7 - [scudo] Finer lock granularity in Region of SizeClassAllocator64

Tue Jun 20 11:37:09 PDT 2023

Author: Chia-hung Duan
Date: 2023-06-20T18:34:48Z
New Revision: 19c26a7c03559a6c7ab1968fcf98e39fd4a86714

URL: https://github.com/llvm/llvm-project/commit/19c26a7c03559a6c7ab1968fcf98e39fd4a86714
DIFF: https://github.com/llvm/llvm-project/commit/19c26a7c03559a6c7ab1968fcf98e39fd4a86714.diff

LOG: [scudo] Finer lock granularity in Region of SizeClassAllocator64

In this CL, we introduce two new locks, MMLock for MemMap operations and
FLLock for freelist operations.

MMLock will be used when we want to manipulate pages. For example,
mapping more pages through populateFreeList() and releaseToOSMaybe().

FLLock will be used when we want to access the freelist. For example,
pushBlocks() and popBatch().

With the new locks, they increase the parallelism of the operations
mentioned above. For example, populateFreeList() won't block the
pushBlocks() when it's still doing the system call for more pages.

We also enforce lock hierarchy to avoid deadlock, MMLock is required to
be held before FLLock if you have to lock both of them. We don't store
the lock owner, therefore, we rely static thread-safey annotation to
detect any violation.

Differential Revision: https://reviews.llvm.org/D149140

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/primary64.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index e28213ac4c278..d01d31b0f8768 100644

--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -151,26 +151,44 @@ template <typename Config> class SizeClassAllocator64 {
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     RegionInfo *Region = getRegionInfo(ClassId);
-    bool PrintStats = false;
+
     {
-      ScopedLock L(Region->Mutex);
+      ScopedLock L(Region->FLLock);
       TransferBatch *B = popBatchImpl(C, ClassId, Region);
       if (LIKELY(B)) {
         Region->FreeListInfo.PoppedBlocks += B->getCount();
         return B;
       }
+    }
+
+    bool PrintStats = false;
+
+    while (true) {
+      // When two threads compete for `Region->MMLock`, we only want one of them
+      // to call populateFreeList(). To avoid both of them doing that, always
+      // check the freelist before mapping new pages.
+      //
+      // TODO(chiahungduan): Use a condition variable so that we don't need to
+      // hold `Region->MMLock` here.
+      ScopedLock ML(Region->MMLock);
+      {
+        ScopedLock FL(Region->FLLock);
+        TransferBatch *B = popBatchImpl(C, ClassId, Region);
+        if (LIKELY(B)) {
+          Region->FreeListInfo.PoppedBlocks += B->getCount();
+          return B;
+        }
+      }
 
       const bool RegionIsExhausted = Region->Exhausted;
-      if (UNLIKELY(RegionIsExhausted ||
-                   !populateFreeList(C, ClassId, Region))) {
-        PrintStats = !RegionIsExhausted && Region->Exhausted;
-      } else {
-        B = popBatchImpl(C, ClassId, Region);
-        // if `populateFreeList` succeeded, we are supposed to get free blocks.
-        DCHECK_NE(B, nullptr);
-        Region->FreeListInfo.PoppedBlocks += B->getCount();
-        return B;
+      if (!RegionIsExhausted) {
+        // TODO: Make sure the one who does populateFreeList() gets one Batch
+        // here.
+        populateFreeList(C, ClassId, Region);
       }
+      PrintStats = !RegionIsExhausted && Region->Exhausted;
+      if (Region->Exhausted)
+        break;
     }
 
     // Note that `getStats()` requires locking each region so we can't call it
@@ -183,6 +201,7 @@ template <typename Config> class SizeClassAllocator64 {
           RegionSize >> 20, getSizeByClassId(ClassId));
       Str.output();
     }
+
     return nullptr;
   }
 
@@ -193,45 +212,44 @@ template <typename Config> class SizeClassAllocator64 {
 
     RegionInfo *Region = getRegionInfo(ClassId);
     if (ClassId == SizeClassMap::BatchClassId) {
-      bool PrintStats = false;
-      {
-        ScopedLock L(Region->Mutex);
-        // Constructing a batch group in the free list will use two blocks in
-        // BatchClassId. If we are pushing BatchClassId blocks, we will use the
-        // blocks in the array directly (can't delegate local cache which will
-        // cause a recursive allocation). However, The number of free blocks may
-        // be less than two. Therefore, populate the free list before inserting
-        // the blocks.
-        const bool NeedToRefill =
-            Size == 1U && Region->FreeListInfo.BlockList.empty();
-        // If BatchClass has been exhausted, the program should have been
-        // aborted.
-        DCHECK(!Region->Exhausted);
-
-        if (UNLIKELY(
-                NeedToRefill &&
-                !populateFreeList(C, SizeClassMap::BatchClassId, Region))) {
-          PrintStats = true;
-        } else {
-          pushBlocksImpl(C, SizeClassMap::BatchClassId, Region, Array, Size);
-          Region->FreeListInfo.PushedBlocks += Size;
+      // Constructing a batch group in the free list will use two blocks in
+      // BatchClassId. If we are pushing BatchClassId blocks, we will use the
+      // blocks in the array directly (can't delegate local cache which will
+      // cause a recursive allocation). However, The number of free blocks may
+      // be less than two. Therefore, populate the freelist before inserting the
+      // blocks.
+      while (true) {
+        // TODO(chiahungduan): Move the lock right before the call of
+        // populateFreeList() by using a condition variable. See more details in
+        // the comment of popBatch().
+        ScopedLock L(Region->MMLock);
+        {
+          ScopedLock L(Region->FLLock);
+          const bool NeedToRefill =
+              Size == 1U && Region->FreeListInfo.BlockList.empty();
+          if (!NeedToRefill) {
+            pushBlocksImpl(C, SizeClassMap::BatchClassId, Region, Array, Size);
+            Region->FreeListInfo.PushedBlocks += Size;
+            return;
+          }
         }
-      }
 
-      // Note that `getStats()` requires the lock of each region so we can't
-      // call it while locking the Region->Mutex in the above.
-      if (UNLIKELY(PrintStats)) {
-        ScopedString Str;
-        getStats(&Str);
-        Str.append(
-            "Scudo OOM: The process has exhausted %zuM for size class %zu.\n",
-            RegionSize >> 20, getSizeByClassId(ClassId));
-        Str.output();
-        // Theoretically, BatchClass shouldn't be used up. Abort immediately
-        // when it happens.
-        reportOutOfBatchClass();
+        if (UNLIKELY(!populateFreeList(C, SizeClassMap::BatchClassId, Region)))
+          break;
       }
 
+      // At here, it hints that the BatchClass region has been used up. Dump the
+      // region stats before the program shutdown.
+      ScopedString Str;
+      getStats(&Str);
+      Str.append(
+          "Scudo OOM: The process has exhausted %zuM for size class %zu.\n",
+          RegionSize >> 20, getSizeByClassId(ClassId));
+      Str.output();
+      // Theoretically, BatchClass shouldn't be used up. Abort immediately
+      // when it happens.
+      reportOutOfBatchClass();
+
       return;
     }
 
@@ -253,12 +271,21 @@ template <typename Config> class SizeClassAllocator64 {
       Array[J] = Cur;
     }
 
-    ScopedLock L(Region->Mutex);
-    pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup);
+    {
+      ScopedLock L(Region->FLLock);
+      pushBlocksImpl(C, ClassId, Region, Array, Size, SameGroup);
+      Region->FreeListInfo.PushedBlocks += Size;
+    }
 
-    Region->FreeListInfo.PushedBlocks += Size;
-    if (ClassId != SizeClassMap::BatchClassId)
+    // Only non-BatchClass will be here, try to release the pages in the region.
+
+    // Note that the tryLock() may fail spuriously, given that it should rarely
+    // happen and page releasing is fine to skip, we don't take certain
+    // approaches to ensure one page release is done.
+    if (Region->MMLock.tryLock()) {
       releaseToOSMaybe(Region, ClassId);
+      Region->MMLock.unlock();
+    }
   }
 
   void disable() NO_THREAD_SAFETY_ANALYSIS {
@@ -266,17 +293,21 @@ template <typename Config> class SizeClassAllocator64 {
     for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
       if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
         continue;
-      getRegionInfo(static_cast<uptr>(I))->Mutex.lock();
+      getRegionInfo(static_cast<uptr>(I))->MMLock.lock();
+      getRegionInfo(static_cast<uptr>(I))->FLLock.lock();
     }
-    getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock();
+    getRegionInfo(SizeClassMap::BatchClassId)->MMLock.lock();
+    getRegionInfo(SizeClassMap::BatchClassId)->FLLock.lock();
   }
 
   void enable() NO_THREAD_SAFETY_ANALYSIS {
-    getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock();
+    getRegionInfo(SizeClassMap::BatchClassId)->FLLock.unlock();
+    getRegionInfo(SizeClassMap::BatchClassId)->MMLock.unlock();
     for (uptr I = 0; I < NumClasses; I++) {
       if (I == SizeClassMap::BatchClassId)
         continue;
-      getRegionInfo(I)->Mutex.unlock();
+      getRegionInfo(I)->FLLock.unlock();
+      getRegionInfo(I)->MMLock.unlock();
     }
   }
 
@@ -288,7 +319,8 @@ template <typename Config> class SizeClassAllocator64 {
       // TODO: The call of `iterateOverBlocks` requires disabling
       // SizeClassAllocator64. We may consider locking each region on demand
       // only.
-      Region->Mutex.assertHeld();
+      Region->FLLock.assertHeld();
+      Region->MMLock.assertHeld();
       const uptr BlockSize = getSizeByClassId(I);
       const uptr From = Region->RegionBeg;
       const uptr To = From + Region->MemMapInfo.AllocatedUser;
@@ -304,11 +336,15 @@ template <typename Config> class SizeClassAllocator64 {
     uptr PushedBlocks = 0;
     for (uptr I = 0; I < NumClasses; I++) {
       RegionInfo *Region = getRegionInfo(I);
-      ScopedLock L(Region->Mutex);
-      if (Region->MemMapInfo.MappedUser)
+      {
+        ScopedLock L(Region->MMLock);
         TotalMapped += Region->MemMapInfo.MappedUser;
-      PoppedBlocks += Region->FreeListInfo.PoppedBlocks;
-      PushedBlocks += Region->FreeListInfo.PushedBlocks;
+      }
+      {
+        ScopedLock L(Region->FLLock);
+        PoppedBlocks += Region->FreeListInfo.PoppedBlocks;
+        PushedBlocks += Region->FreeListInfo.PushedBlocks;
+      }
     }
     Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu "
                 "allocations; remains %zu\n",
@@ -317,7 +353,8 @@ template <typename Config> class SizeClassAllocator64 {
 
     for (uptr I = 0; I < NumClasses; I++) {
       RegionInfo *Region = getRegionInfo(I);
-      ScopedLock L(Region->Mutex);
+      ScopedLock L1(Region->MMLock);
+      ScopedLock L2(Region->FLLock);
       getStats(Str, I, Region, 0);
     }
   }
@@ -340,7 +377,7 @@ template <typename Config> class SizeClassAllocator64 {
       if (I == SizeClassMap::BatchClassId)
         continue;
       RegionInfo *Region = getRegionInfo(I);
-      ScopedLock L(Region->Mutex);
+      ScopedLock L(Region->MMLock);
       TotalReleasedBytes += releaseToOSMaybe(Region, I, ReleaseType);
     }
     return TotalReleasedBytes;
@@ -378,7 +415,7 @@ template <typename Config> class SizeClassAllocator64 {
       if (I == SizeClassMap::BatchClassId)
         continue;
       uptr Begin = RegionInfoArray[I].RegionBeg;
-      // TODO(chiahungduan): In fact, We need to lock the RegionInfo::Mutex.
+      // TODO(chiahungduan): In fact, We need to lock the RegionInfo::MMLock.
       // However, the RegionInfoData is passed with const qualifier and lock the
       // mutex requires modifying RegionInfoData, which means we need to remove
       // the const qualifier. This may lead to another undefined behavior (The
@@ -453,16 +490,19 @@ template <typename Config> class SizeClassAllocator64 {
   };
 
   struct UnpaddedRegionInfo {
-    HybridMutex Mutex;
-    // This is initialized before thread creation.
+    // Mutex for operations on freelist
+    HybridMutex FLLock;
+    // Mutex for memmap operations
+    HybridMutex MMLock ACQUIRED_BEFORE(FLLock);
+    // `RegionBeg` is initialized before thread creation and won't be changed.
     uptr RegionBeg = 0;
-    u32 RandState GUARDED_BY(Mutex) = 0;
-    BlocksInfo FreeListInfo GUARDED_BY(Mutex);
-    PagesInfo MemMapInfo GUARDED_BY(Mutex);
+    u32 RandState GUARDED_BY(MMLock) = 0;
+    BlocksInfo FreeListInfo GUARDED_BY(FLLock);
+    PagesInfo MemMapInfo GUARDED_BY(MMLock);
     // The minimum size of pushed blocks to trigger page release.
-    uptr TryReleaseThreshold GUARDED_BY(Mutex) = 0;
-    ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex) = {};
-    bool Exhausted GUARDED_BY(Mutex) = false;
+    uptr TryReleaseThreshold GUARDED_BY(MMLock) = 0;
+    ReleaseToOsInfo ReleaseInfo GUARDED_BY(MMLock) = {};
+    bool Exhausted GUARDED_BY(MMLock) = false;
   };
   struct RegionInfo : UnpaddedRegionInfo {
     char Padding[SCUDO_CACHE_LINE_SIZE -
@@ -538,7 +578,7 @@ template <typename Config> class SizeClassAllocator64 {
   // The region mutex needs to be held while calling this method.
   void pushBlocksImpl(CacheT *C, uptr ClassId, RegionInfo *Region,
                       CompactPtrT *Array, u32 Size, bool SameGroup = false)
-      REQUIRES(Region->Mutex) {
+      REQUIRES(Region->FLLock) {
     DCHECK_GT(Size, 0U);
 
     auto CreateGroup = [&](uptr CompactPtrGroupBase) {
@@ -713,7 +753,7 @@ template <typename Config> class SizeClassAllocator64 {
   //
   // The region mutex needs to be held while calling this method.
   TransferBatch *popBatchImpl(CacheT *C, uptr ClassId, RegionInfo *Region)
-      REQUIRES(Region->Mutex) {
+      REQUIRES(Region->FLLock) {
     if (Region->FreeListInfo.BlockList.empty())
       return nullptr;
 
@@ -743,7 +783,7 @@ template <typename Config> class SizeClassAllocator64 {
   }
 
   NOINLINE bool populateFreeList(CacheT *C, uptr ClassId, RegionInfo *Region)
-      REQUIRES(Region->Mutex) {
+      REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
     const uptr Size = getSizeByClassId(ClassId);
     const u16 MaxCount = TransferBatch::getMaxCached(Size);
 
@@ -796,6 +836,8 @@ template <typename Config> class SizeClassAllocator64 {
     for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
       ShuffleArray[I] = compactPtrInternal(CompactPtrBase, P);
 
+    ScopedLock L(Region->FLLock);
+
     if (ClassId != SizeClassMap::BatchClassId) {
       u32 N = 1;
       uptr CurGroup = compactPtrGroup(ShuffleArray[0]);
@@ -827,7 +869,7 @@ template <typename Config> class SizeClassAllocator64 {
   }
 
   void getStats(ScopedString *Str, uptr ClassId, RegionInfo *Region, uptr Rss)
-      REQUIRES(Region->Mutex) {
+      REQUIRES(Region->MMLock, Region->FLLock) {
     if (Region->MemMapInfo.MappedUser == 0)
       return;
     const uptr InUse =
@@ -848,7 +890,10 @@ template <typename Config> class SizeClassAllocator64 {
 
   NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
                                  ReleaseToOS ReleaseType = ReleaseToOS::Normal)
-      REQUIRES(Region->Mutex) {
+      REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
+    // TODO(chiahungduan): Release `FLLock` when doing releaseFreeMemoryToOS().
+    ScopedLock L(Region->FLLock);
+
     const uptr BlockSize = getSizeByClassId(ClassId);
     const uptr PageSize = getPageSizeCached();