[compiler-rt] c753a30 - [scudo][standalone] Various improvements wrt RSS

Kostya Kortchinsky via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 12:26:42 PST 2020


Author: Kostya Kortchinsky
Date: 2020-02-26T12:25:43-08:00
New Revision: c753a306fd1ad4a7e5c61367225abce86ac29cd7

URL: https://github.com/llvm/llvm-project/commit/c753a306fd1ad4a7e5c61367225abce86ac29cd7
DIFF: https://github.com/llvm/llvm-project/commit/c753a306fd1ad4a7e5c61367225abce86ac29cd7.diff

LOG: [scudo][standalone] Various improvements wrt RSS

Summary:
This patch includes several changes to reduce the overall footprint
of the allocator:
- for realloc'd chunks: only keep the same chunk when lowering the size
  if the delta is within a page worth of bytes;
- when draining a cache: drain the beginning, not the end; we add pointers
  at the end, so that meant we were draining the most recently added
  pointers;
- change the release code to account for an freed up last page: when
  scanning the pages, we were looking for pages fully covered by blocks;
  in the event of the last page, if it's only partially covered, we
  wouldn't mark it as releasable - even what follows the last chunk is
  all 0s. So now mark the rest of the page as releasable, and adapt the
  test;
- add a missing `setReleaseToOsIntervalMs` to the cacheless secondary;
- adjust the Android classes based on more captures thanks to pcc@'s
  tool.

Reviewers: pcc, cferris, hctim, eugenis

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D75142

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/combined.h
    compiler-rt/lib/scudo/standalone/local_cache.h
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h
    compiler-rt/lib/scudo/standalone/release.h
    compiler-rt/lib/scudo/standalone/secondary.h
    compiler-rt/lib/scudo/standalone/size_class_map.h
    compiler-rt/lib/scudo/standalone/tests/release_test.cpp
    compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index f49fc9aac84c..8456dc82d20e 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -449,6 +449,12 @@ class Allocator {
   void *reallocate(void *OldPtr, uptr NewSize, uptr Alignment = MinAlignment) {
     initThreadMaybe();
 
+    if (UNLIKELY(NewSize >= MaxAllowedMallocSize)) {
+      if (Options.MayReturnNull)
+        return nullptr;
+      reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize);
+    }
+
     void *OldTaggedPtr = OldPtr;
     OldPtr = untagPointerMaybe(OldPtr);
 
@@ -502,9 +508,7 @@ class Allocator {
     // reasonable delta), we just keep the old block, and update the chunk
     // header to reflect the size change.
     if (reinterpret_cast<uptr>(OldPtr) + NewSize <= BlockEnd) {
-      const uptr Delta =
-          OldSize < NewSize ? NewSize - OldSize : OldSize - NewSize;
-      if (Delta <= SizeClassMap::MaxSize / 2) {
+      if (NewSize > OldSize || (OldSize - NewSize) < getPageSizeCached()) {
         Chunk::UnpackedHeader NewHeader = OldHeader;
         NewHeader.SizeOrUnusedBytes =
             (ClassId ? NewSize

diff  --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index b08abd3e5d9b..a6425fc6d1ea 100644
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -165,13 +165,14 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
 
   NOINLINE void drain(PerClass *C, uptr ClassId) {
     const u32 Count = Min(C->MaxCount / 2, C->Count);
-    const uptr FirstIndexToDrain = C->Count - Count;
-    TransferBatch *B = createBatch(ClassId, C->Chunks[FirstIndexToDrain]);
+    TransferBatch *B = createBatch(ClassId, C->Chunks[0]);
     if (UNLIKELY(!B))
       reportOutOfMemory(
           SizeClassAllocator::getSizeByClassId(SizeClassMap::BatchClassId));
-    B->setFromArray(&C->Chunks[FirstIndexToDrain], Count);
+    B->setFromArray(&C->Chunks[0], Count);
     C->Count -= Count;
+    for (uptr I = 0; I < C->Count; I++)
+      C->Chunks[I] = C->Chunks[I + Count];
     Allocator->pushBatch(ClassId, B);
   }
 };

diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index b50f91d492ed..e3376e746ebd 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -386,11 +386,11 @@ class SizeClassAllocator32 {
         (Sci->Stats.PoppedBlocks - Sci->Stats.PushedBlocks) * BlockSize;
     if (BytesInFreeList < PageSize)
       return 0; // No chance to release anything.
-    if ((Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) *
-            BlockSize <
-        PageSize) {
+    const uptr BytesPushed =
+        (Sci->Stats.PushedBlocks - Sci->ReleaseInfo.PushedBlocksAtLastRelease) *
+        BlockSize;
+    if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
-    }
 
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
@@ -407,12 +407,13 @@ class SizeClassAllocator32 {
     // iterate multiple times over the same freelist if a ClassId spans multiple
     // regions. But it will have to do for now.
     uptr TotalReleasedBytes = 0;
+    const uptr Size = (RegionSize / BlockSize) * BlockSize;
     for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) {
       if (PossibleRegions[I] - 1U == ClassId) {
         const uptr Region = I * RegionSize;
         ReleaseRecorder Recorder(Region);
-        releaseFreeMemoryToOS(Sci->FreeList, Region, RegionSize / PageSize,
-                              BlockSize, &Recorder);
+        releaseFreeMemoryToOS(Sci->FreeList, Region, Size, BlockSize,
+                              &Recorder);
         if (Recorder.getReleasedRangesCount() > 0) {
           Sci->ReleaseInfo.PushedBlocksAtLastRelease = Sci->Stats.PushedBlocks;
           Sci->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 188f3082aee2..e1560001f7f2 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -395,12 +395,11 @@ class SizeClassAllocator64 {
         (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
     if (BytesInFreeList < PageSize)
       return 0; // No chance to release anything.
-    if ((Region->Stats.PushedBlocks -
-         Region->ReleaseInfo.PushedBlocksAtLastRelease) *
-            BlockSize <
-        PageSize) {
+    const uptr BytesPushed = (Region->Stats.PushedBlocks -
+                              Region->ReleaseInfo.PushedBlocksAtLastRelease) *
+                             BlockSize;
+    if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
-    }
 
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
@@ -415,8 +414,7 @@ class SizeClassAllocator64 {
 
     ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data);
     releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg,
-                          roundUpTo(Region->AllocatedUser, PageSize) / PageSize,
-                          BlockSize, &Recorder);
+                          Region->AllocatedUser, BlockSize, &Recorder);
 
     if (Recorder.getReleasedRangesCount() > 0) {
       Region->ReleaseInfo.PushedBlocksAtLastRelease =

diff  --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index c4f679711073..323bf9db6dca 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -107,7 +107,8 @@ class PackedCounterArray {
 
   void incRange(uptr From, uptr To) const {
     DCHECK_LE(From, To);
-    for (uptr I = From; I <= To; I++)
+    const uptr Top = Min(To + 1, N);
+    for (uptr I = From; I < Top; I++)
       inc(I);
   }
 
@@ -166,8 +167,7 @@ template <class ReleaseRecorderT> class FreePagesRangeTracker {
 template <class TransferBatchT, class ReleaseRecorderT>
 NOINLINE void
 releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
-                      uptr AllocatedPagesCount, uptr BlockSize,
-                      ReleaseRecorderT *Recorder) {
+                      uptr Size, uptr BlockSize, ReleaseRecorderT *Recorder) {
   const uptr PageSize = getPageSizeCached();
 
   // Figure out the number of chunks per page and whether we can take a fast
@@ -204,12 +204,13 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
     }
   }
 
-  PackedCounterArray Counters(AllocatedPagesCount, FullPagesBlockCountMax);
+  const uptr PagesCount = roundUpTo(Size, PageSize) / PageSize;
+  PackedCounterArray Counters(PagesCount, FullPagesBlockCountMax);
   if (!Counters.isAllocated())
     return;
 
   const uptr PageSizeLog = getLog2(PageSize);
-  const uptr End = Base + AllocatedPagesCount * PageSize;
+  const uptr RoundedSize = PagesCount << PageSizeLog;
 
   // Iterate over free chunks and count how many free chunks affect each
   // allocated page.
@@ -223,11 +224,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
           (It.getCount() != 0) &&
           (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
       for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
-        const uptr P = reinterpret_cast<uptr>(It.get(I));
-        if (P >= Base && P < End)
-          Counters.inc((P - Base) >> PageSizeLog);
+        const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
+        // This takes care of P < Base and P >= Base + RoundedSize.
+        if (P < RoundedSize)
+          Counters.inc(P >> PageSizeLog);
       }
     }
+    for (uptr P = Size; P < RoundedSize; P += BlockSize)
+      Counters.inc(P >> PageSizeLog);
   } else {
     // In all other cases chunks might affect more than one page.
     for (const auto &It : FreeList) {
@@ -236,12 +240,15 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
           (It.getCount() != 0) &&
           (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
       for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
-        const uptr P = reinterpret_cast<uptr>(It.get(I));
-        if (P >= Base && P < End)
-          Counters.incRange((P - Base) >> PageSizeLog,
-                            (P - Base + BlockSize - 1) >> PageSizeLog);
+        const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
+        // This takes care of P < Base and P >= Base + RoundedSize.
+        if (P < RoundedSize)
+          Counters.incRange(P >> PageSizeLog,
+                            (P + BlockSize - 1) >> PageSizeLog);
       }
     }
+    for (uptr P = Size; P < RoundedSize; P += BlockSize)
+      Counters.incRange(P >> PageSizeLog, (P + BlockSize - 1) >> PageSizeLog);
   }
 
   // Iterate over pages detecting ranges of pages with chunk Counters equal

diff  --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 8ae8108b2eaa..9d5f130f2d45 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -60,6 +60,7 @@ class MapAllocatorNoCache {
   void disable() {}
   void enable() {}
   void releaseToOS() {}
+  void setReleaseToOsIntervalMs(UNUSED s32 Interval) {}
 };
 
 template <uptr MaxEntriesCount = 32U, uptr MaxEntrySize = 1UL << 19,

diff  --git a/compiler-rt/lib/scudo/standalone/size_class_map.h b/compiler-rt/lib/scudo/standalone/size_class_map.h
index 3bbd165289e6..5ed8e2845b38 100644
--- a/compiler-rt/lib/scudo/standalone/size_class_map.h
+++ b/compiler-rt/lib/scudo/standalone/size_class_map.h
@@ -170,39 +170,37 @@ class TableSizeClassMap : public SizeClassMapBase<Config> {
 
 struct AndroidSizeClassConfig {
 #if SCUDO_WORDSIZE == 64U
-  // Measured using a system_server profile.
   static const uptr NumBits = 7;
   static const uptr MinSizeLog = 4;
   static const uptr MidSizeLog = 6;
   static const uptr MaxSizeLog = 16;
   static const u32 MaxNumCachedHint = 14;
-  static const uptr MaxBytesCachedLog = 14;
+  static const uptr MaxBytesCachedLog = 13;
 
   static constexpr u32 Classes[] = {
-      0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000a0,
-      0x000b0, 0x000e0, 0x00110, 0x00130, 0x001a0, 0x00240, 0x00320, 0x00430,
-      0x00640, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01150, 0x01ad0, 0x02190,
-      0x03610, 0x04010, 0x04510, 0x04d10, 0x05a10, 0x07310, 0x09610, 0x10010,
+      0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00090, 0x000b0,
+      0x000c0, 0x000e0, 0x00120, 0x00160, 0x001c0, 0x00250, 0x00320, 0x00450,
+      0x00670, 0x00830, 0x00a10, 0x00c30, 0x01010, 0x01210, 0x01bd0, 0x02210,
+      0x02d90, 0x03790, 0x04010, 0x04810, 0x05a10, 0x07310, 0x08210, 0x10010,
   };
   static const uptr SizeDelta = 16;
 #else
-  // Measured using a dex2oat profile.
   static const uptr NumBits = 8;
   static const uptr MinSizeLog = 4;
-  static const uptr MidSizeLog = 8;
+  static const uptr MidSizeLog = 7;
   static const uptr MaxSizeLog = 16;
   static const u32 MaxNumCachedHint = 14;
-  static const uptr MaxBytesCachedLog = 14;
+  static const uptr MaxBytesCachedLog = 13;
 
   static constexpr u32 Classes[] = {
       0x00020, 0x00030, 0x00040, 0x00050, 0x00060, 0x00070, 0x00080, 0x00090,
-      0x000a0, 0x000b0, 0x000c0, 0x000d0, 0x000e0, 0x000f0, 0x00100, 0x00110,
-      0x00120, 0x00140, 0x00150, 0x00170, 0x00190, 0x001c0, 0x001f0, 0x00220,
-      0x00240, 0x00260, 0x002a0, 0x002e0, 0x00310, 0x00340, 0x00380, 0x003b0,
-      0x003e0, 0x00430, 0x00490, 0x00500, 0x00570, 0x005f0, 0x00680, 0x00720,
-      0x007d0, 0x00890, 0x00970, 0x00a50, 0x00b80, 0x00cb0, 0x00e30, 0x00fb0,
-      0x011b0, 0x01310, 0x01470, 0x01790, 0x01b50, 0x01fd0, 0x02310, 0x02690,
-      0x02b10, 0x02fd0, 0x03610, 0x03e10, 0x04890, 0x05710, 0x06a90, 0x10010,
+      0x000a0, 0x000b0, 0x000c0, 0x000e0, 0x000f0, 0x00110, 0x00120, 0x00130,
+      0x00150, 0x00160, 0x00170, 0x00190, 0x001d0, 0x00210, 0x00240, 0x002a0,
+      0x00330, 0x00370, 0x003a0, 0x00400, 0x00430, 0x004a0, 0x00530, 0x00610,
+      0x00730, 0x00840, 0x00910, 0x009c0, 0x00a60, 0x00b10, 0x00ca0, 0x00e00,
+      0x00fb0, 0x01030, 0x01130, 0x011f0, 0x01490, 0x01650, 0x01930, 0x02010,
+      0x02190, 0x02490, 0x02850, 0x02d50, 0x03010, 0x03210, 0x03c90, 0x04090,
+      0x04510, 0x04810, 0x05c10, 0x06f10, 0x07310, 0x08010, 0x0c010, 0x10010,
   };
   static const uptr SizeDelta = 16;
 #endif

diff  --git a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp
index 22d73d09d53d..a7478f47479d 100644
--- a/compiler-rt/lib/scudo/standalone/tests/release_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/release_test.cpp
@@ -147,14 +147,14 @@ template <class SizeClassMap> struct FreeBatch {
 
 template <class SizeClassMap> void testReleaseFreeMemoryToOS() {
   typedef FreeBatch<SizeClassMap> Batch;
-  const scudo::uptr AllocatedPagesCount = 1024;
+  const scudo::uptr PagesCount = 1024;
   const scudo::uptr PageSize = scudo::getPageSizeCached();
   std::mt19937 R;
   scudo::u32 RandState = 42;
 
   for (scudo::uptr I = 1; I <= SizeClassMap::LargestClassId; I++) {
     const scudo::uptr BlockSize = SizeClassMap::getSizeByClassId(I);
-    const scudo::uptr MaxBlocks = AllocatedPagesCount * PageSize / BlockSize;
+    const scudo::uptr MaxBlocks = PagesCount * PageSize / BlockSize;
 
     // Generate the random free list.
     std::vector<scudo::uptr> FreeArray;
@@ -190,7 +190,7 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() {
 
     // Release the memory.
     ReleasedPagesRecorder Recorder;
-    releaseFreeMemoryToOS(FreeList, 0, AllocatedPagesCount, BlockSize,
+    releaseFreeMemoryToOS(FreeList, 0, MaxBlocks * BlockSize, BlockSize,
                           &Recorder);
 
     // Verify that there are no released pages touched by used chunks and all
@@ -202,7 +202,7 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() {
     scudo::uptr CurrentBlock = 0;
     InFreeRange = false;
     scudo::uptr CurrentFreeRangeStart = 0;
-    for (scudo::uptr I = 0; I <= MaxBlocks; I++) {
+    for (scudo::uptr I = 0; I < MaxBlocks; I++) {
       const bool IsFreeBlock =
           FreeBlocks.find(CurrentBlock) != FreeBlocks.end();
       if (IsFreeBlock) {
@@ -238,6 +238,19 @@ template <class SizeClassMap> void testReleaseFreeMemoryToOS() {
       CurrentBlock += BlockSize;
     }
 
+    if (InFreeRange) {
+      scudo::uptr P = scudo::roundUpTo(CurrentFreeRangeStart, PageSize);
+      const scudo::uptr EndPage =
+          scudo::roundUpTo(MaxBlocks * BlockSize, PageSize);
+      while (P + PageSize <= EndPage) {
+        const bool PageReleased =
+            Recorder.ReportedPages.find(P) != Recorder.ReportedPages.end();
+        EXPECT_EQ(true, PageReleased);
+        VerifiedReleasedPages++;
+        P += PageSize;
+      }
+    }
+
     EXPECT_EQ(Recorder.ReportedPages.size(), VerifiedReleasedPages);
 
     while (!FreeList.empty()) {

diff  --git a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp
index c9e173f8e539..88859ded5b27 100644
--- a/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp
@@ -28,7 +28,6 @@ TEST(ScudoSizeClassMapTest, AndroidSizeClassMap) {
   testSizeClassMap<scudo::AndroidSizeClassMap>();
 }
 
-
 struct OneClassSizeClassConfig {
   static const scudo::uptr NumBits = 1;
   static const scudo::uptr MinSizeLog = 5;


        


More information about the llvm-commits mailing list