[compiler-rt] [scudo] Add partial chunk heuristic to retrieval algorithm. (PR #105009)

Tue Aug 20 08:25:11 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Joshua Baehring (JoshuaMBa)

<details>
<summary>Changes</summary>

Previously the secondary cache retrieval algorithm would not allow retrievals of memory chunks where the number of unused bytes would be greater than than `MaxUnusedCachePages * PageSize` bytes. This meant that even if a memory chunk satisfied the requirements of the optimal fit algorithm, it may not be returned. This remains true if memory tagging is enabled. However, if memory tagging is disabled, a new heuristic has been put in place. Specifically, If a memory chunk is a non-optimal fit, the cache retrieval algorithm will attempt to release the excess memory to force a cache hit while keeping RSS down.

In the event that a memory chunk is a non-optimal fit, the retrieval algorithm will release excess memory as long as the amount of memory to be released is less than or equal to 4 Pages. If the amount of memory to be released exceeds 4 Pages, the retrieval algorithm will not consider that cached memory chunk valid for retrieval.

---
Full diff: https://github.com/llvm/llvm-project/pull/105009.diff


2 Files Affected:

- (modified) compiler-rt/lib/scudo/standalone/secondary.h (+95-30) 
- (modified) compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp (+34-4) 


``````````diff

diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 27f8697db7838f..2adea34c13b426 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -72,6 +72,14 @@ namespace {
 struct CachedBlock {
   static constexpr u16 CacheIndexMax = UINT16_MAX;
   static constexpr u16 InvalidEntry = CacheIndexMax;
+  //   * MaxCachePagesToRelease default is currently 4
+  //        - We arrived at this value after noticing that mapping
+  //        in larger memory regions performs better than releasing
+  //        memory and forcing a cache hit. According to the data,
+  //        it suggests that beyond 16KB, the release execution time is
+  //        longer than the map execution time. In this way, the default
+  //        is dependent on the platform.
+  static constexpr uptr MaxCachePagesToRelease = 4U;
 
   uptr CommitBase = 0;
   uptr CommitSize = 0;
@@ -90,8 +98,9 @@ struct CachedBlock {
 template <typename Config> class MapAllocatorNoCache {
 public:
   void init(UNUSED s32 ReleaseToOsInterval) {}
-  CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
-                       UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
+  CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
+                       UNUSED uptr Alignment, UNUSED uptr HeadersSize,
+                       UNUSED uptr &EntryHeaderPos) {
     return {};
   }
   void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,13 +343,14 @@ class MapAllocatorCache {
     }
   }
 
-  CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
-                       uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+  CachedBlock retrieve(uptr MaxAllowedFragmentedBytes, uptr Size,
+                       uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+      EXCLUDES(Mutex) {
     const uptr PageSize = getPageSizeCached();
     // 10% of the requested size proved to be the optimal choice for
     // retrieving cached blocks after testing several options.
     constexpr u32 FragmentedBytesDivisor = 10;
-    bool Found = false;
+    bool FoundOptimalFit = false;
     CachedBlock Entry;
     EntryHeaderPos = 0;
     {
@@ -348,47 +358,90 @@ class MapAllocatorCache {
       CallsToRetrieve++;
       if (EntriesCount == 0)
         return {};
-      u32 OptimalFitIndex = 0;
+      u16 RetrievedIndex = CachedBlock::InvalidEntry;
       uptr MinDiff = UINTPTR_MAX;
-      for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+      //  Since allocation sizes don't always match cached memory chunk sizes
+      //  we allow some memory to be unused (called fragmented bytes). The
+      //  amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+      //
+      //        CommitBase                CommitBase + CommitSize
+      //          V                              V
+      //      +---+------------+-----------------+---+
+      //      |   |            |                 |   |
+      //      +---+------------+-----------------+---+
+      //      ^                ^                     ^
+      //    Guard         EntryHeaderPos          Guard-page-end
+      //    page-begin
+      //
+      //  [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+      //  well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+      //  MaxAllowedFragmentedBytes, the cached memory chunk is not considered
+      //  valid for retrieval.
+      for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
            I = Entries[I].Next) {
         const uptr CommitBase = Entries[I].CommitBase;
         const uptr CommitSize = Entries[I].CommitSize;
         const uptr AllocPos =
             roundDown(CommitBase + CommitSize - Size, Alignment);
         const uptr HeaderPos = AllocPos - HeadersSize;
-        if (HeaderPos > CommitBase + CommitSize)
+        if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
           continue;
-        if (HeaderPos < CommitBase ||
-            AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+
+        const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;
+
+        if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
           continue;
-        }
-        Found = true;
-        const uptr Diff = HeaderPos - CommitBase;
-        // immediately use a cached block if it's size is close enough to the
-        // requested size.
-        const uptr MaxAllowedFragmentedBytes =
+
+        MinDiff = Diff;
+        RetrievedIndex = I;
+        EntryHeaderPos = HeaderPos;
+
+        const uptr OptimalFitThesholdBytes =
             (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
-        if (Diff <= MaxAllowedFragmentedBytes) {
-          OptimalFitIndex = I;
-          EntryHeaderPos = HeaderPos;
+        if (Diff <= OptimalFitThesholdBytes) {
+          FoundOptimalFit = true;
           break;
         }
-        // keep track of the smallest cached block
-        // that is greater than (AllocSize + HeaderSize)
-        if (Diff > MinDiff)
-          continue;
-        OptimalFitIndex = I;
-        MinDiff = Diff;
-        EntryHeaderPos = HeaderPos;
       }
-      if (Found) {
-        Entry = Entries[OptimalFitIndex];
-        remove(OptimalFitIndex);
+      if (RetrievedIndex != CachedBlock::InvalidEntry) {
+        Entry = Entries[RetrievedIndex];
+        remove(RetrievedIndex);
         SuccessfulRetrieves++;
       }
     }
 
+    //  The difference between the retrieved memory chunk and the request
+    //  size is at most MaxAllowedFragmentedBytes
+    //
+    //  /     MaxAllowedFragmentedBytes      \
+    // +--------------------------+-----------+
+    // |                          |           |
+    // +--------------------------+-----------+
+    //  \ Bytes to be released   /      ^
+    //                                  |
+    //                         (may or may not be commited)
+    //
+    //   The maximum number of bytes released to the OS is capped by
+    //   MaxCachePagesToRelease
+    //
+    //   TODO : Consider making MaxCachePagesToRelease configurable since
+    //   the release to OS API can vary across systems.
+    if (!FoundOptimalFit && Entry.Time != 0) {
+      const uptr FragmentedBytes =
+          roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase;
+      const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
+      if (FragmentedBytes > MaxUnusedCacheBytes) {
+        const uptr MaxCacheBytesToRelease =
+            CachedBlock::MaxCachePagesToRelease * PageSize;
+        uptr BytesToRelease =
+            roundUp(Min<uptr>(MaxCacheBytesToRelease,
+                              FragmentedBytes - MaxUnusedCacheBytes),
+                    PageSize);
+        Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
+      }
+    }
+
     return Entry;
   }
 
@@ -659,8 +712,20 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
                                            FillContentsMode FillContents) {
   CachedBlock Entry;
   uptr EntryHeaderPos;
+  uptr MaxAllowedFragmentedBytes;
+  const uptr PageSize = getPageSizeCached();
+
+  if (LIKELY(!useMemoryTagging<Config>(Options))) {
+    const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
+    const uptr MaxCacheBytesToRelease =
+        CachedBlock::MaxCachePagesToRelease * PageSize;
+    MaxAllowedFragmentedBytes = MaxUnusedCacheBytes + MaxCacheBytesToRelease;
+  } else {
+    MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
+  }
 
-  Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
+  Entry = Cache.retrieve(MaxAllowedFragmentedBytes, Size, Alignment,
+                         getHeadersSize(), EntryHeaderPos);
   if (!Entry.isValid())
     return nullptr;
 
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
index e85b6abdb36d22..1f4cab26550cce 100644
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test {
   std::unique_ptr<CacheT> Cache = std::make_unique<CacheT>();
 
   const scudo::uptr PageSize = scudo::getPageSizeCached();
-  // The current test allocation size is set to the minimum size
-  // needed for the scudo allocator to fall back to the secondary allocator
+  // The current test allocation size is set to the maximum
+  // cache entry size
   static constexpr scudo::uptr TestAllocSize =
       CacheConfig::getDefaultMaxEntrySize();
 
@@ -327,7 +327,8 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
   for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
     scudo::uptr EntryHeaderPos;
     scudo::CachedBlock Entry =
-        Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
+        Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
+                        PageSize, 0, EntryHeaderPos);
     EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
   }
 
@@ -336,6 +337,34 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
     MemMap.unmap();
 }
 
+TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) {
+  const scudo::uptr MaxUnusedCacheBytes = PageSize;
+  const scudo::uptr MaxCacheBytesToRelease =
+      scudo::CachedBlock::MaxCachePagesToRelease * PageSize;
+  const scudo::uptr FragmentedBytes =
+      MaxUnusedCacheBytes + MaxCacheBytesToRelease;
+
+  scudo::uptr EntryHeaderPos;
+  scudo::CachedBlock Entry;
+  scudo::MemMapT MemMap = allocate(PageSize + FragmentedBytes);
+  Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(),
+               MemMap.getBase(), MemMap);
+
+  // FragmentedBytes > MaxAllowedFragmentedBytes so PageSize
+  // cannot be retrieved from the cache
+  Entry = Cache->retrieve(/*MaxAllowedFragmentedBytes=*/0, PageSize, PageSize,
+                          0, EntryHeaderPos);
+  EXPECT_FALSE(Entry.isValid());
+
+  // FragmentedBytes == MaxAllowedFragmentedBytes so PageSize
+  // can be retrieved from the cache
+  Entry =
+      Cache->retrieve(FragmentedBytes, PageSize, PageSize, 0, EntryHeaderPos);
+  EXPECT_TRUE(Entry.isValid());
+
+  MemMap.unmap();
+}
+
 TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
   std::vector<scudo::MemMapT> MemMaps;
   // Fill the cache above MaxEntriesCount to force an eviction
@@ -351,7 +380,8 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
   for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
     scudo::uptr EntryHeaderPos;
     RetrievedEntries.push_back(
-        Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
+        Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
+                        PageSize, 0, EntryHeaderPos));
     EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
   }
 

``````````

</details>


https://github.com/llvm/llvm-project/pull/105009