[compiler-rt] a0e8642 - [scudo][standalone] Do not fill 32b regions at once

Wed Mar 4 14:23:27 PST 2020

Author: Kostya Kortchinsky
Date: 2020-03-04T14:22:24-08:00
New Revision: a0e86420ae72e4dc38c3a341943d4f14139e1e4b

URL: https://github.com/llvm/llvm-project/commit/a0e86420ae72e4dc38c3a341943d4f14139e1e4b
DIFF: https://github.com/llvm/llvm-project/commit/a0e86420ae72e4dc38c3a341943d4f14139e1e4b.diff

LOG: [scudo][standalone] Do not fill 32b regions at once

Summary:
For the 32b primary, whenever we created a region, we would fill it
all at once (eg: create all the transfer batches for all the blocks
in that region). This wasn't ideal as all the potential blocks in
a newly created region might not be consummed right away, and it was
using extra memory (and release cycles) to keep all those free
blocks.

So now we keep track of the current region for a given class, and
how filled it is, carving out at most `MaxNumBatches` worth of
blocks at a time.

Additionally, lower `MaxNumBatches` on Android from 8 to 4. This
lowers the randomness of blocks, which isn't ideal for security, but
keeps things more clumped up for PSS/RSS accounting purposes.

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers, #llvm

Differential Revision: https://reviews.llvm.org/D75551

Added: 
    

Modified: 
    compiler-rt/lib/scudo/standalone/primary32.h
    compiler-rt/lib/scudo/standalone/primary64.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index e3376e746ebd..9dac49899913 100644

--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -72,10 +72,10 @@ class SizeClassAllocator32 {
     MinRegionIndex = NumRegions; // MaxRegionIndex is already initialized to 0.
 
     u32 Seed;
+    const u64 Time = getMonotonicTime();
     if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
-      Seed =
-          static_cast<u32>(getMonotonicTime() ^
-                           (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
+      Seed = static_cast<u32>(
+          Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
     const uptr PageSize = getPageSizeCached();
     for (uptr I = 0; I < NumClasses; I++) {
       SizeClassInfo *Sci = getSizeClassInfo(I);
@@ -83,6 +83,8 @@ class SizeClassAllocator32 {
       // See comment in the 64-bit primary about releasing smaller size classes.
       Sci->CanRelease = (I != SizeClassMap::BatchClassId) &&
                         (getSizeByClassId(I) >= (PageSize / 32));
+      if (Sci->CanRelease)
+        Sci->ReleaseInfo.LastReleaseAtNs = Time;
     }
     setReleaseToOsIntervalMs(ReleaseToOsInterval);
   }
@@ -208,6 +210,7 @@ class SizeClassAllocator32 {
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr RegionSize = 1UL << RegionSizeLog;
   static const uptr NumRegions = SCUDO_MMAP_RANGE_SIZE >> RegionSizeLog;
+  static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U;
   typedef FlatByteMap<NumRegions> ByteMap;
 
   struct SizeClassStats {
@@ -225,6 +228,8 @@ class SizeClassAllocator32 {
   struct ALIGNED(SCUDO_CACHE_LINE_SIZE) SizeClassInfo {
     HybridMutex Mutex;
     SinglyLinkedList<TransferBatch> FreeList;
+    uptr CurrentRegion;
+    uptr CurrentRegionAllocated;
     SizeClassStats Stats;
     bool CanRelease;
     u32 RandState;
@@ -315,21 +320,50 @@ class SizeClassAllocator32 {
 
   NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
                                            SizeClassInfo *Sci) {
-    const uptr Region = allocateRegion(ClassId);
-    if (UNLIKELY(!Region))
-      return nullptr;
-    C->getStats().add(StatMapped, RegionSize);
+    uptr Region;
+    uptr Offset;
+    // If the size-class currently has a region associated to it, use it. The
+    // newly created blocks will be located after the currently allocated memory
+    // for that region (up to RegionSize). Otherwise, create a new region, where
+    // the new blocks will be carved from the beginning.
+    if (Sci->CurrentRegion) {
+      Region = Sci->CurrentRegion;
+      DCHECK_GT(Sci->CurrentRegionAllocated, 0U);
+      Offset = Sci->CurrentRegionAllocated;
+    } else {
+      DCHECK_EQ(Sci->CurrentRegionAllocated, 0U);
+      Region = allocateRegion(ClassId);
+      if (UNLIKELY(!Region))
+        return nullptr;
+      C->getStats().add(StatMapped, RegionSize);
+      Sci->CurrentRegion = Region;
+      Offset = 0;
+    }
+
     const uptr Size = getSizeByClassId(ClassId);
     const u32 MaxCount = TransferBatch::getMaxCached(Size);
-    DCHECK_GT(MaxCount, 0);
-    const uptr NumberOfBlocks = RegionSize / Size;
-    DCHECK_GT(NumberOfBlocks, 0);
+    DCHECK_GT(MaxCount, 0U);
+    // The maximum number of blocks we should carve in the region is dictated
+    // by the maximum number of batches we want to fill, and the amount of
+    // memory left in the current region (we use the lowest of the two). This
+    // will not be 0 as we ensure that a region can at least hold one block (via
+    // static_assert and at the end of this function).
+    const u32 NumberOfBlocks =
+        Min(MaxNumBatches * MaxCount,
+            static_cast<u32>((RegionSize - Offset) / Size));
+    DCHECK_GT(NumberOfBlocks, 0U);
+
     TransferBatch *B = nullptr;
-    constexpr u32 ShuffleArraySize = 8U * TransferBatch::MaxNumCached;
+    constexpr u32 ShuffleArraySize =
+        MaxNumBatches * TransferBatch::MaxNumCached;
+    // Fill the transfer batches and put them in the size-class freelist. We
+    // need to randomize the blocks for security purposes, so we first fill a
+    // local array that we then shuffle before populating the batches.
     void *ShuffleArray[ShuffleArraySize];
     u32 Count = 0;
     const uptr AllocatedUser = Size * NumberOfBlocks;
-    for (uptr I = Region; I < Region + AllocatedUser; I += Size) {
+    for (uptr I = Region + Offset; I < Region + Offset + AllocatedUser;
+         I += Size) {
       ShuffleArray[Count++] = reinterpret_cast<void *>(I);
       if (Count == ShuffleArraySize) {
         if (UNLIKELY(!populateBatches(C, Sci, ClassId, &B, MaxCount,
@@ -352,9 +386,18 @@ class SizeClassAllocator32 {
     DCHECK_GT(B->getCount(), 0);
 
     C->getStats().add(StatFree, AllocatedUser);
+    DCHECK_LE(Sci->CurrentRegionAllocated + AllocatedUser, RegionSize);
+    // If there is not enough room in the region currently associated to fit
+    // more blocks, we deassociate the region by resetting CurrentRegion and
+    // CurrentRegionAllocated. Otherwise, update the allocated amount.
+    if (RegionSize - (Sci->CurrentRegionAllocated + AllocatedUser) < Size) {
+      Sci->CurrentRegion = 0;
+      Sci->CurrentRegionAllocated = 0;
+    } else {
+      Sci->CurrentRegionAllocated += AllocatedUser;
+    }
     Sci->AllocatedUser += AllocatedUser;
-    if (Sci->CanRelease)
-      Sci->ReleaseInfo.LastReleaseAtNs = getMonotonicTime();
+
     return B;
   }
 
@@ -407,10 +450,15 @@ class SizeClassAllocator32 {
     // iterate multiple times over the same freelist if a ClassId spans multiple
     // regions. But it will have to do for now.
     uptr TotalReleasedBytes = 0;
-    const uptr Size = (RegionSize / BlockSize) * BlockSize;
+    const uptr MaxSize = (RegionSize / BlockSize) * BlockSize;
     for (uptr I = MinRegionIndex; I <= MaxRegionIndex; I++) {
       if (PossibleRegions[I] - 1U == ClassId) {
         const uptr Region = I * RegionSize;
+        // If the region is the one currently associated to the size-class, we
+        // only need to release up to CurrentRegionAllocated, MaxSize otherwise.
+        const uptr Size = (Region == Sci->CurrentRegion)
+                              ? Sci->CurrentRegionAllocated
+                              : MaxSize;
         ReleaseRecorder Recorder(Region);
         releaseFreeMemoryToOS(Sci->FreeList, Region, Size, BlockSize,
                               &Recorder);

diff  --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index e1560001f7f2..8e0224e0450c 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -69,14 +69,16 @@ class SizeClassAllocator64 {
         map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data));
 
     u32 Seed;
+    const u64 Time = getMonotonicTime();
     if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
-      Seed = static_cast<u32>(getMonotonicTime() ^ (PrimaryBase >> 12));
+      Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12));
     const uptr PageSize = getPageSizeCached();
     for (uptr I = 0; I < NumClasses; I++) {
       RegionInfo *Region = getRegionInfo(I);
       // The actual start of a region is offseted by a random number of pages.
       Region->RegionBeg =
           getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
+      Region->RandState = getRandomU32(&Seed);
       // Releasing smaller size classes doesn't necessarily yield to a
       // meaningful RSS impact: there are more blocks per page, they are
       // randomized around, and thus pages are less likely to be entirely empty.
@@ -86,7 +88,8 @@ class SizeClassAllocator64 {
       // TODO(kostyak): make the lower limit a runtime option
       Region->CanRelease = (I != SizeClassMap::BatchClassId) &&
                            (getSizeByClassId(I) >= (PageSize / 32));
-      Region->RandState = getRandomU32(&Seed);
+      if (Region->CanRelease)
+        Region->ReleaseInfo.LastReleaseAtNs = Time;
     }
     setReleaseToOsIntervalMs(ReleaseToOsInterval);
 
@@ -214,7 +217,7 @@ class SizeClassAllocator64 {
   // Call map for user memory with at least this size.
   static const uptr MapSizeIncrement = 1UL << 18;
   // Fill at most this number of batches from the newly map'd memory.
-  static const u32 MaxNumBatches = 8U;
+  static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U;
 
   struct RegionStats {
     uptr PoppedBlocks;
@@ -357,8 +360,6 @@ class SizeClassAllocator64 {
     C->getStats().add(StatFree, AllocatedUser);
     Region->AllocatedUser += AllocatedUser;
     Region->Exhausted = false;
-    if (Region->CanRelease)
-      Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime();
 
     return B;
   }