[compiler-rt] [scudo] Add partial chunk heuristic to retrieval algorithm. (PR #105009)

Thu Aug 22 09:23:56 PDT 2024

================
@@ -334,61 +345,107 @@ class MapAllocatorCache {
     }
   }
 
-  CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
-                       uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+  CachedBlock retrieve(uptr MaxAllowedFragmentedPages, uptr Size,
+                       uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+      EXCLUDES(Mutex) {
     const uptr PageSize = getPageSizeCached();
     // 10% of the requested size proved to be the optimal choice for
     // retrieving cached blocks after testing several options.
     constexpr u32 FragmentedBytesDivisor = 10;
-    bool Found = false;
+    bool FoundOptimalFit = false;
     CachedBlock Entry;
     EntryHeaderPos = 0;
     {
       ScopedLock L(Mutex);
       CallsToRetrieve++;
       if (EntriesCount == 0)
         return {};
-      u32 OptimalFitIndex = 0;
+      u16 RetrievedIndex = CachedBlock::InvalidEntry;
       uptr MinDiff = UINTPTR_MAX;
-      for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+      //  Since allocation sizes don't always match cached memory chunk sizes
+      //  we allow some memory to be unused (called fragmented bytes). The
+      //  amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+      //
+      //        CommitBase                CommitBase + CommitSize
+      //          V                              V
+      //      +---+------------+-----------------+---+
+      //      |   |            |                 |   |
+      //      +---+------------+-----------------+---+
+      //      ^                ^                     ^
+      //    Guard         EntryHeaderPos          Guard-page-end
+      //    page-begin
+      //
+      //  [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+      //  well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+      //  MaxAllowedFragmentedPages * PageSize, the cached memory chunk is
+      //  not considered valid for retrieval.
+      for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
            I = Entries[I].Next) {
         const uptr CommitBase = Entries[I].CommitBase;
         const uptr CommitSize = Entries[I].CommitSize;
         const uptr AllocPos =
             roundDown(CommitBase + CommitSize - Size, Alignment);
         const uptr HeaderPos = AllocPos - HeadersSize;
-        if (HeaderPos > CommitBase + CommitSize)
+        if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
           continue;
-        if (HeaderPos < CommitBase ||
-            AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+
+        const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;
+
+        if (Diff > MaxAllowedFragmentedPages * PageSize || Diff >= MinDiff)
           continue;
-        }
-        Found = true;
-        const uptr Diff = HeaderPos - CommitBase;
-        // immediately use a cached block if it's size is close enough to the
-        // requested size.
-        const uptr MaxAllowedFragmentedBytes =
+
+        MinDiff = Diff;
+        RetrievedIndex = I;
+        EntryHeaderPos = HeaderPos;
+
+        // Immediately use a cached block if its size is close enough to the
+        // requested size
+        const uptr OptimalFitThesholdBytes =
             (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
-        if (Diff <= MaxAllowedFragmentedBytes) {
-          OptimalFitIndex = I;
-          EntryHeaderPos = HeaderPos;
+        if (Diff <= OptimalFitThesholdBytes) {
+          FoundOptimalFit = true;
           break;
         }
-        // keep track of the smallest cached block
-        // that is greater than (AllocSize + HeaderSize)
-        if (Diff > MinDiff)
-          continue;
-        OptimalFitIndex = I;
-        MinDiff = Diff;
-        EntryHeaderPos = HeaderPos;
       }
-      if (Found) {
-        Entry = Entries[OptimalFitIndex];
-        remove(OptimalFitIndex);
+      if (RetrievedIndex != CachedBlock::InvalidEntry) {
+        Entry = Entries[RetrievedIndex];
+        remove(RetrievedIndex);
         SuccessfulRetrieves++;
       }
     }
 
+    //  The difference between the retrieved memory chunk and the request
+    //  size is at most MaxAllowedFragmentedPages
+    //
+    //  / MaxAllowedFragmentedPages * PageSize \
+    // +--------------------------+-------------+
+    // |                          |             |
+    // +--------------------------+-------------+
+    //  \ Bytes to be released   /        ^
+    //                                    |
+    //                           (may or may not be committed)
+    //
+    //   The maximum number of bytes released to the OS is capped by
+    //   MaxReleasedCachePages
+    //
+    //   TODO : Consider making MaxReleasedCachePages configurable since
+    //   the release to OS API can vary across systems.
+    if (!FoundOptimalFit && Entry.Time != 0) {
----------------
JoshuaMBa wrote:

Before we merge this, just wanted to highlight the `!FoundOptimalFit` condition here. We still have a limit of `MaxAllowedFragmentedPages`, but I just wanted to note that because of this logic, in the event that 10% of the request size is greater than `MaxUnreleasedCacheBytes`, the amount of unused, potentially committed memory may slightly exceed `MaxUnreleasedCacheBytes` because we don't perform releases for optimal fits. If we want to play it safe, I can remove the `!FoundOptimalFit` condition and maybe it gets added in a future pull request.

https://github.com/llvm/llvm-project/pull/105009