[compiler-rt] [scudo] Add partial chunk heuristic to retrieval algorithm. (PR #104807)

Mon Aug 19 08:52:34 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Joshua Baehring (JoshuaMBa)

<details>
<summary>Changes</summary>

Previously the secondary cache retrieval algorithm would not allow retrievals of memory chunks where the number of unused bytes would be greater than than `MaxUnusedCachePages
* PageSize` bytes. This meant that even if a memory chunk satisfied the requirements of the optimal fit algorithm, it may not be returned. This remains true if memory tagging is enabled. However, if memory tagging is disabled, a new heuristic has been put in place. Specifically, If a memory chunk is a non-optimal fit, the cache retrieval algorithm will attempt to release the excess memory to force a cache hit while keeping RSS down.

In the event that a memory chunk is a non-optimal fit, the retrieval algorithm will release excess memory as long as the amount of memory to be released is less than or equal to 16 KB. If the amount of memory to be released exceeds 16 KB, the retrieval algorithm will not consider that cached memory chunk valid for retrieval.

---
Full diff: https://github.com/llvm/llvm-project/pull/104807.diff


2 Files Affected:

- (modified) compiler-rt/lib/scudo/standalone/secondary.h (+86-28) 
- (modified) compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp (+4-2) 


``````````diff

diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 27f8697db7838f..f6a0edbf5b5ecb 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -72,6 +72,7 @@ namespace {
 struct CachedBlock {
   static constexpr u16 CacheIndexMax = UINT16_MAX;
   static constexpr u16 InvalidEntry = CacheIndexMax;
+  static constexpr uptr ReleaseMemoryUpperBound = 1 << 14;
 
   uptr CommitBase = 0;
   uptr CommitSize = 0;
@@ -90,8 +91,9 @@ struct CachedBlock {
 template <typename Config> class MapAllocatorNoCache {
 public:
   void init(UNUSED s32 ReleaseToOsInterval) {}
-  CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
-                       UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
+  CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
+                       UNUSED uptr Alignment, UNUSED uptr HeadersSize,
+                       UNUSED uptr &EntryHeaderPos) {
     return {};
   }
   void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,13 +336,14 @@ class MapAllocatorCache {
     }
   }
 
-  CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
-                       uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+  CachedBlock retrieve(uptr MaxAllowedFragmentedBytes, uptr Size,
+                       uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+      EXCLUDES(Mutex) {
     const uptr PageSize = getPageSizeCached();
     // 10% of the requested size proved to be the optimal choice for
     // retrieving cached blocks after testing several options.
     constexpr u32 FragmentedBytesDivisor = 10;
-    bool Found = false;
+    bool FoundOptimalFit = false;
     CachedBlock Entry;
     EntryHeaderPos = 0;
     {
@@ -348,47 +351,95 @@ class MapAllocatorCache {
       CallsToRetrieve++;
       if (EntriesCount == 0)
         return {};
-      u32 OptimalFitIndex = 0;
+      u16 OptimalFitIndex = CachedBlock::InvalidEntry;
       uptr MinDiff = UINTPTR_MAX;
-      for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+      //  Since allocation sizes don't always match cached memory chunk sizes
+      //  we allow some memory to be unused (called fragmented bytes). The
+      //  amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+      //
+      //        CommitBase                CommitBase + CommitSize
+      //          V                              V
+      //      +---+------------+-----------------+---+
+      //      |   |            |                 |   |
+      //      +---+------------+-----------------+---+
+      //      ^                ^                     ^
+      //    Guard         EntryHeaderPos          Guard-page-end
+      //    page-begin
+      //
+      //  [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+      //  well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+      //  MaxAllowedFragmentedBytes, the cached memory chunk is not considered
+      //  valid for retrieval.
+      for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
            I = Entries[I].Next) {
         const uptr CommitBase = Entries[I].CommitBase;
         const uptr CommitSize = Entries[I].CommitSize;
         const uptr AllocPos =
             roundDown(CommitBase + CommitSize - Size, Alignment);
         const uptr HeaderPos = AllocPos - HeadersSize;
-        if (HeaderPos > CommitBase + CommitSize)
-          continue;
-        if (HeaderPos < CommitBase ||
-            AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+        if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
           continue;
-        }
-        Found = true;
+
         const uptr Diff = HeaderPos - CommitBase;
-        // immediately use a cached block if it's size is close enough to the
-        // requested size.
-        const uptr MaxAllowedFragmentedBytes =
-            (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
-        if (Diff <= MaxAllowedFragmentedBytes) {
-          OptimalFitIndex = I;
-          EntryHeaderPos = HeaderPos;
-          break;
-        }
-        // keep track of the smallest cached block
-        // that is greater than (AllocSize + HeaderSize)
-        if (Diff > MinDiff)
+
+        if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
           continue;
-        OptimalFitIndex = I;
+
         MinDiff = Diff;
+        OptimalFitIndex = I;
         EntryHeaderPos = HeaderPos;
+
+        const uptr OptimalFitThesholdBytes =
+            (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
+        if (Diff <= OptimalFitThesholdBytes) {
+          FoundOptimalFit = true;
+          break;
+        }
       }
-      if (Found) {
+      if (OptimalFitIndex != CachedBlock::InvalidEntry) {
         Entry = Entries[OptimalFitIndex];
         remove(OptimalFitIndex);
         SuccessfulRetrieves++;
       }
     }
 
+    //  The difference between the retrieved memory chunk and the request
+    //  size is at most MaxAllowedFragmentedBytes
+    //
+    //  /     MaxAllowedFragmentedBytes      \
+    // +--------------------------+-----------+
+    // |                          |           |
+    // +--------------------------+-----------+
+    //  \ Bytes to be released   /      ^
+    //                                  |
+    //                         (may or may not have commited)
+    //
+    //   The maximum number of bytes released to the OS is capped by
+    //   ReleaseMemoryUpperBound
+    //
+    //   * ReleaseMemoryUpperBound default is currently 16 KB
+    //        - We arrived at this value after noticing that mapping
+    //        in larger memory regions performs better than releasing
+    //        memory and forcing a cache hit. According to the data,
+    //        it suggests that beyond 16KB, the release execution time is
+    //        longer than the map execution time. In this way, the default
+    //        is dependent on the platform.
+    //
+    //   TODO : Considering to make ReleaseMemoryUpperBound configurable since
+    //   the release to OS API can vary across systems.
+    if (!FoundOptimalFit && Entry.Time != 0) {
+      const uptr FragmentedBytes = EntryHeaderPos - Entry.CommitBase;
+      const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
+      if (FragmentedBytes > MaxUnusedCacheBytes) {
+        uptr BytesToRelease =
+            roundUp(Min<uptr>(CachedBlock::ReleaseMemoryUpperBound,
+                              FragmentedBytes - MaxUnusedCacheBytes),
+                    PageSize);
+        Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
+      }
+    }
+
     return Entry;
   }
 
@@ -659,8 +710,15 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
                                            FillContentsMode FillContents) {
   CachedBlock Entry;
   uptr EntryHeaderPos;
+  uptr MaxAllowedFragmentedBytes;
+  const uptr PageSize = getPageSizeCached();
+
+  MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
+  if (LIKELY(!useMemoryTagging<Config>(Options)))
+    MaxAllowedFragmentedBytes += CachedBlock::ReleaseMemoryUpperBound;
 
-  Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
+  Entry = Cache.retrieve(MaxAllowedFragmentedBytes, Size, Alignment,
+                         getHeadersSize(), EntryHeaderPos);
   if (!Entry.isValid())
     return nullptr;
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
index e85b6abdb36d22..d7a31673ce5702 100644
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -327,7 +327,8 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
   for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
     scudo::uptr EntryHeaderPos;
     scudo::CachedBlock Entry =
-        Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
+        Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
+                        PageSize, 0, EntryHeaderPos);
     EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
   }
 
@@ -351,7 +352,8 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
   for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
     scudo::uptr EntryHeaderPos;
     RetrievedEntries.push_back(
-        Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
+        Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
+                        PageSize, 0, EntryHeaderPos));
     EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/104807