[compiler-rt] 5172beb - [scudo] Release pages of larger block more frequently

Wed May 31 13:14:42 PDT 2023

Author: Chia-hung Duan
Date: 2023-05-31T20:14:04Z
New Revision: 5172beb4fa204ebbfda831cca60c39b160faf2a6

URL: https://github.com/llvm/llvm-project/commit/5172beb4fa204ebbfda831cca60c39b160faf2a6
DIFF: https://github.com/llvm/llvm-project/commit/5172beb4fa204ebbfda831cca60c39b160faf2a6.diff

LOG: [scudo] Release pages of larger block more frequently

Release pages for large block (size greater than a page) is faster than
the small blocks. Besides, larger blocks are supposed not to be used
so often like smaller blocks which means we may hold several pages used
by large block and rarely get chance to release them if there's no
explicit M_PURGE call. Therefore, relax the release-interval condition
for large block.

This also fixes the assumption that FORCE_ALL should always try page
release.

Differential Revision: https://reviews.llvm.org/D151290

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h
    compiler-rt/lib/scudo/standalone/tests/primary_test.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index b3d6e53dfca26..6d306effe34ee 100644

--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -131,6 +131,16 @@ template <typename Config> class SizeClassAllocator32 {
     return CompactPtrGroupBase;
   }
 
+  ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) {
+    const uptr PageSize = getPageSizeCached();
+    return BlockSize < PageSize / 16U;
+  }
+
+  ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) {
+    const uptr PageSize = getPageSizeCached();
+    return BlockSize > PageSize;
+  }
+
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
     DCHECK_LT(ClassId, NumClasses);
     SizeClassInfo *Sci = getSizeClassInfo(ClassId);
@@ -740,12 +750,8 @@ template <typename Config> class SizeClassAllocator32 {
     if (UNLIKELY(BytesInFreeList == 0))
       return 0;
 
-    bool MaySkip = false;
-
-    if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
+    if (BytesInFreeList <= Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint)
       Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
-      MaySkip = true;
-    }
 
     // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
     // so that we won't underestimate the releasable pages. For example, the
@@ -765,34 +771,38 @@ template <typename Config> class SizeClassAllocator32 {
     // (BytesInFreeListAtLastCheckpoint - BytesInFreeList).
     const uptr PushedBytesDelta =
         BytesInFreeList - Sci->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
-    if (PushedBytesDelta < PageSize)
-      MaySkip = true;
+    if (PushedBytesDelta < PageSize && ReleaseType != ReleaseToOS::ForceAll)
+      return 0;
 
     const bool CheckDensity =
-        BlockSize < PageSize / 16U && ReleaseType != ReleaseToOS::ForceAll;
+        isSmallBlock(BlockSize) && ReleaseType != ReleaseToOS::ForceAll;
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (CheckDensity) {
-      if (ReleaseType == ReleaseToOS::Normal &&
-          PushedBytesDelta < Sci->AllocatedUser / 16U) {
-        MaySkip = true;
-      }
-    }
-
-    if (MaySkip && ReleaseType != ReleaseToOS::ForceAll)
-      return 0;
+    if (CheckDensity && ReleaseType == ReleaseToOS::Normal)
+      if (PushedBytesDelta < Sci->AllocatedUser / 16U)
+        return 0;
 
     if (ReleaseType == ReleaseToOS::Normal) {
       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
       if (IntervalMs < 0)
         return 0;
-      if (Sci->ReleaseInfo.LastReleaseAtNs +
-              static_cast<u64>(IntervalMs) * 1000000 >
-          getMonotonicTimeFast()) {
-        return 0; // Memory was returned recently.
+
+      // The constant 8 here is selected from profiling some apps and the number
+      // of unreleased pages in the large size classes is around 16 pages or
+      // more. Choose half of it as a heuristic and which also avoids page
+      // release every time for every pushBlocks() attempt by large blocks.
+      const bool ByPassReleaseInterval =
+          isLargeBlock(BlockSize) && PushedBytesDelta > 8 * PageSize;
+      if (!ByPassReleaseInterval) {
+        if (Sci->ReleaseInfo.LastReleaseAtNs +
+                static_cast<u64>(IntervalMs) * 1000000 >
+            getMonotonicTimeFast()) {
+          // Memory was returned recently.
+          return 0;
+        }
       }
-    }
+    } // if (ReleaseType == ReleaseToOS::Normal)
 
     const uptr First = Sci->MinRegionIndex;
     const uptr Last = Sci->MaxRegionIndex;
@@ -835,7 +845,7 @@ template <typename Config> class SizeClassAllocator32 {
         continue;
       }
       const uptr PushedBytesDelta = BytesInBG - BG.BytesInBGAtLastCheckpoint;
-      if (PushedBytesDelta < PageSize)
+      if (ReleaseType != ReleaseToOS::ForceAll && PushedBytesDelta < PageSize)
         continue;
 
       // Given the randomness property, we try to release the pages only if the

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index d3a1aea740033..2c76c5f6e5c69 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -506,6 +506,11 @@ template <typename Config> class SizeClassAllocator64 {
     return BlockSize < PageSize / 16U;
   }
 
+  ALWAYS_INLINE static bool isLargeBlock(uptr BlockSize) {
+    const uptr PageSize = getPageSizeCached();
+    return BlockSize > PageSize;
+  }
+
   // Push the blocks to their batch group. The layout will be like,
   //
   // FreeList - > BG -> BG -> BG
@@ -848,8 +853,6 @@ template <typename Config> class SizeClassAllocator64 {
     if (UNLIKELY(BytesInFreeList == 0))
       return 0;
 
-    bool MaySkip = false;
-
     // Always update `BytesInFreeListAtLastCheckpoint` with the smallest value
     // so that we won't underestimate the releasable pages. For example, the
     // following is the region usage,
@@ -869,38 +872,43 @@ template <typename Config> class SizeClassAllocator64 {
     if (BytesInFreeList <=
         Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint) {
       Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
-      MaySkip = true;
     }
 
     const uptr RegionPushedBytesDelta =
         BytesInFreeList - Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint;
-    if (RegionPushedBytesDelta < PageSize)
-      MaySkip = true;
+    if (RegionPushedBytesDelta < PageSize &&
+        ReleaseType != ReleaseToOS::ForceAll) {
+      return 0;
+    }
 
     const bool CheckDensity = isSmallBlock(BlockSize);
     // Releasing smaller blocks is expensive, so we want to make sure that a
     // significant amount of bytes are free, and that there has been a good
     // amount of batches pushed to the freelist before attempting to release.
-    if (CheckDensity) {
-      if (ReleaseType == ReleaseToOS::Normal &&
-          RegionPushedBytesDelta < Region->TryReleaseThreshold) {
-        MaySkip = true;
-      }
-    }
-
-    if (MaySkip && ReleaseType != ReleaseToOS::ForceAll)
-      return 0;
+    if (CheckDensity && ReleaseType == ReleaseToOS::Normal)
+      if (RegionPushedBytesDelta < Region->TryReleaseThreshold)
+        return 0;
 
     if (ReleaseType == ReleaseToOS::Normal) {
       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
       if (IntervalMs < 0)
         return 0;
-      if (Region->ReleaseInfo.LastReleaseAtNs +
-              static_cast<u64>(IntervalMs) * 1000000 >
-          getMonotonicTimeFast()) {
-        return 0; // Memory was returned recently.
+
+      // The constant 8 here is selected from profiling some apps and the number
+      // of unreleased pages in the large size classes is around 16 pages or
+      // more. Choose half of it as a heuristic and which also avoids page
+      // release every time for every pushBlocks() attempt by large blocks.
+      const bool ByPassReleaseInterval =
+          isLargeBlock(BlockSize) && RegionPushedBytesDelta > 8 * PageSize;
+      if (!ByPassReleaseInterval) {
+        if (Region->ReleaseInfo.LastReleaseAtNs +
+                static_cast<u64>(IntervalMs) * 1000000 >
+            getMonotonicTimeFast()) {
+          // Memory was returned recently.
+          return 0;
+        }
       }
-    }
+    } // if (ReleaseType == ReleaseToOS::Normal)
 
     const uptr GroupSize = (1U << GroupSizeLog);
     const uptr AllocatedUserEnd = Region->AllocatedUser + Region->RegionBeg;

diff  --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index 51a7038ac7870..fbb0cfe28344b 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -322,7 +322,7 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, ReleaseToOS) {
   EXPECT_NE(P, nullptr);
   Cache.deallocate(ClassId, P);
   Cache.destroy(nullptr);
-  EXPECT_GT(Allocator->releaseToOS(scudo::ReleaseToOS::Force), 0U);
+  EXPECT_GT(Allocator->releaseToOS(scudo::ReleaseToOS::ForceAll), 0U);
 }
 
 SCUDO_TYPED_TEST(ScudoPrimaryTest, MemoryGroup) {