[compiler-rt] [scudo] Add partial chunk heuristic to retrieval algorithm. (PR #104807)

Mon Aug 19 11:05:04 PDT 2024

================
@@ -334,61 +336,110 @@ class MapAllocatorCache {
     }
   }
 
-  CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
-                       uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+  CachedBlock retrieve(uptr MaxAllowedFragmentedBytes, uptr Size,
+                       uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+      EXCLUDES(Mutex) {
     const uptr PageSize = getPageSizeCached();
     // 10% of the requested size proved to be the optimal choice for
     // retrieving cached blocks after testing several options.
     constexpr u32 FragmentedBytesDivisor = 10;
-    bool Found = false;
+    bool FoundOptimalFit = false;
     CachedBlock Entry;
     EntryHeaderPos = 0;
     {
       ScopedLock L(Mutex);
       CallsToRetrieve++;
       if (EntriesCount == 0)
         return {};
-      u32 OptimalFitIndex = 0;
+      u16 OptimalFitIndex = CachedBlock::InvalidEntry;
       uptr MinDiff = UINTPTR_MAX;
-      for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+      //  Since allocation sizes don't always match cached memory chunk sizes
+      //  we allow some memory to be unused (called fragmented bytes). The
+      //  amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+      //
+      //        CommitBase                CommitBase + CommitSize
+      //          V                              V
+      //      +---+------------+-----------------+---+
+      //      |   |            |                 |   |
+      //      +---+------------+-----------------+---+
+      //      ^                ^                     ^
+      //    Guard         EntryHeaderPos          Guard-page-end
+      //    page-begin
+      //
+      //  [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+      //  well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+      //  MaxAllowedFragmentedBytes, the cached memory chunk is not considered
+      //  valid for retrieval.
+      for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
            I = Entries[I].Next) {
         const uptr CommitBase = Entries[I].CommitBase;
         const uptr CommitSize = Entries[I].CommitSize;
         const uptr AllocPos =
             roundDown(CommitBase + CommitSize - Size, Alignment);
         const uptr HeaderPos = AllocPos - HeadersSize;
-        if (HeaderPos > CommitBase + CommitSize)
-          continue;
-        if (HeaderPos < CommitBase ||
-            AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+        if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
           continue;
-        }
-        Found = true;
+
         const uptr Diff = HeaderPos - CommitBase;
-        // immediately use a cached block if it's size is close enough to the
-        // requested size.
-        const uptr MaxAllowedFragmentedBytes =
-            (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
-        if (Diff <= MaxAllowedFragmentedBytes) {
-          OptimalFitIndex = I;
-          EntryHeaderPos = HeaderPos;
-          break;
-        }
-        // keep track of the smallest cached block
-        // that is greater than (AllocSize + HeaderSize)
-        if (Diff > MinDiff)
+
+        if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
           continue;
-        OptimalFitIndex = I;
+
         MinDiff = Diff;
+        OptimalFitIndex = I;
         EntryHeaderPos = HeaderPos;
+
+        const uptr OptimalFitThesholdBytes =
+            (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
+        if (Diff <= OptimalFitThesholdBytes) {
+          FoundOptimalFit = true;
+          break;
+        }
       }
-      if (Found) {
+      if (OptimalFitIndex != CachedBlock::InvalidEntry) {
         Entry = Entries[OptimalFitIndex];
         remove(OptimalFitIndex);
         SuccessfulRetrieves++;
       }
     }
 
+    //  The difference between the retrieved memory chunk and the request
+    //  size is at most MaxAllowedFragmentedBytes
+    //
+    //  /     MaxAllowedFragmentedBytes      \
+    // +--------------------------+-----------+
+    // |                          |           |
+    // +--------------------------+-----------+
+    //  \ Bytes to be released   /      ^
+    //                                  |
+    //                         (may or may not have commited)
+    //
+    //   The maximum number of bytes released to the OS is capped by
+    //   ReleaseMemoryUpperBound
+    //
+    //   * ReleaseMemoryUpperBound default is currently 16 KB
+    //        - We arrived at this value after noticing that mapping
+    //        in larger memory regions performs better than releasing
+    //        memory and forcing a cache hit. According to the data,
+    //        it suggests that beyond 16KB, the release execution time is
+    //        longer than the map execution time. In this way, the default
+    //        is dependent on the platform.
+    //
+    //   TODO : Considering to make ReleaseMemoryUpperBound configurable since
+    //   the release to OS API can vary across systems.
+    if (!FoundOptimalFit && Entry.Time != 0) {
+      const uptr FragmentedBytes = EntryHeaderPos - Entry.CommitBase;
----------------
JoshuaMBa wrote:

Ok, sounds good to me.

https://github.com/llvm/llvm-project/pull/104807