[llvm-branch-commits] [compiler-rt] 1dbf2c9 - [scudo][standalone] Allow the release of smaller sizes
Kostya Kortchinsky via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Dec 17 10:06:19 PST 2020
Author: Kostya Kortchinsky
Date: 2020-12-17T10:01:57-08:00
New Revision: 1dbf2c96bce93e0a954806d9bdcafcb702a06672
URL: https://github.com/llvm/llvm-project/commit/1dbf2c96bce93e0a954806d9bdcafcb702a06672
DIFF: https://github.com/llvm/llvm-project/commit/1dbf2c96bce93e0a954806d9bdcafcb702a06672.diff
LOG: [scudo][standalone] Allow the release of smaller sizes
Initially we were avoiding the release of smaller size classes due to
the fact that it was an expensive operation, particularly on 32-bit
platforms. With a lot of batches, and given that there are a lot of
blocks per page, this was a lengthy operation with little results.
There has been some improvements since then to the 32-bit release,
and we still have some criterias preventing us from wasting time
(eg, 9x% free blocks in the class size, etc).
Allowing to release blocks < 128 bytes helps in situations where a lot
of small chunks would not have been reclaimed if not for a forced
reclaiming.
Additionally change some `CHECK` to `DCHECK` and rearrange a bit the
code.
I didn't experience any regressions in my benchmarks.
Differential Revision: https://reviews.llvm.org/D93141
Added:
Modified:
compiler-rt/lib/scudo/standalone/primary32.h
compiler-rt/lib/scudo/standalone/primary64.h
compiler-rt/lib/scudo/standalone/release.h
Removed:
################################################################################
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 016a96bd2dfa..0db95d2e1f11 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -76,17 +76,12 @@ class SizeClassAllocator32 {
if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
Seed = static_cast<u32>(
Time ^ (reinterpret_cast<uptr>(SizeClassInfoArray) >> 6));
- const uptr PageSize = getPageSizeCached();
for (uptr I = 0; I < NumClasses; I++) {
SizeClassInfo *Sci = getSizeClassInfo(I);
Sci->RandState = getRandomU32(&Seed);
// Sci->MaxRegionIndex is already initialized to 0.
Sci->MinRegionIndex = NumRegions;
- // See comment in the 64-bit primary about releasing smaller size classes.
- Sci->CanRelease = (I != SizeClassMap::BatchClassId) &&
- (getSizeByClassId(I) >= (PageSize / 32));
- if (Sci->CanRelease)
- Sci->ReleaseInfo.LastReleaseAtNs = Time;
+ Sci->ReleaseInfo.LastReleaseAtNs = Time;
}
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
}
@@ -137,7 +132,7 @@ class SizeClassAllocator32 {
ScopedLock L(Sci->Mutex);
Sci->FreeList.push_front(B);
Sci->Stats.PushedBlocks += B->getCount();
- if (Sci->CanRelease)
+ if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Sci, ClassId);
}
@@ -217,6 +212,8 @@ class SizeClassAllocator32 {
uptr releaseToOS() {
uptr TotalReleasedBytes = 0;
for (uptr I = 0; I < NumClasses; I++) {
+ if (I == SizeClassMap::BatchClassId)
+ continue;
SizeClassInfo *Sci = getSizeClassInfo(I);
ScopedLock L(Sci->Mutex);
TotalReleasedBytes += releaseToOSMaybe(Sci, I, /*Force=*/true);
@@ -262,7 +259,6 @@ class SizeClassAllocator32 {
uptr CurrentRegion;
uptr CurrentRegionAllocated;
SizeClassStats Stats;
- bool CanRelease;
u32 RandState;
uptr AllocatedUser;
// Lowest & highest region index allocated for this size class, to avoid
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index f6c4d8cf8bb0..f9854cbfd4d6 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -80,17 +80,7 @@ class SizeClassAllocator64 {
Region->RegionBeg =
getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
Region->RandState = getRandomU32(&Seed);
- // Releasing smaller size classes doesn't necessarily yield to a
- // meaningful RSS impact: there are more blocks per page, they are
- // randomized around, and thus pages are less likely to be entirely empty.
- // On top of this, attempting to release those require more iterations and
- // memory accesses which ends up being fairly costly. The current lower
- // limit is mostly arbitrary and based on empirical observations.
- // TODO(kostyak): make the lower limit a runtime option
- Region->CanRelease = (I != SizeClassMap::BatchClassId) &&
- (getSizeByClassId(I) >= (PageSize / 32));
- if (Region->CanRelease)
- Region->ReleaseInfo.LastReleaseAtNs = Time;
+ Region->ReleaseInfo.LastReleaseAtNs = Time;
}
setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
@@ -129,7 +119,7 @@ class SizeClassAllocator64 {
ScopedLock L(Region->Mutex);
Region->FreeList.push_front(B);
Region->Stats.PushedBlocks += B->getCount();
- if (Region->CanRelease)
+ if (ClassId != SizeClassMap::BatchClassId)
releaseToOSMaybe(Region, ClassId);
}
@@ -201,6 +191,8 @@ class SizeClassAllocator64 {
uptr releaseToOS() {
uptr TotalReleasedBytes = 0;
for (uptr I = 0; I < NumClasses; I++) {
+ if (I == SizeClassMap::BatchClassId)
+ continue;
RegionInfo *Region = getRegionInfo(I);
ScopedLock L(Region->Mutex);
TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true);
@@ -291,7 +283,6 @@ class SizeClassAllocator64 {
HybridMutex Mutex;
SinglyLinkedList<TransferBatch> FreeList;
RegionStats Stats;
- bool CanRelease;
bool Exhausted;
u32 RandState;
uptr RegionBeg;
@@ -417,7 +408,7 @@ class SizeClassAllocator64 {
const uptr BlockSize = getSizeByClassId(ClassId);
const uptr PageSize = getPageSizeCached();
- CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
+ DCHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
const uptr BytesInFreeList =
Region->AllocatedUser -
(Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index cd9e66d63b36..5c11da2200e9 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -52,20 +52,20 @@ class PackedCounterArray {
PackedCounterArray(uptr NumberOfRegions, uptr CountersPerRegion,
uptr MaxValue)
: Regions(NumberOfRegions), NumCounters(CountersPerRegion) {
- CHECK_GT(Regions, 0);
- CHECK_GT(NumCounters, 0);
- CHECK_GT(MaxValue, 0);
+ DCHECK_GT(Regions, 0);
+ DCHECK_GT(NumCounters, 0);
+ DCHECK_GT(MaxValue, 0);
constexpr uptr MaxCounterBits = sizeof(*Buffer) * 8UL;
// Rounding counter storage size up to the power of two allows for using
// bit shifts calculating particular counter's Index and offset.
const uptr CounterSizeBits =
roundUpToPowerOfTwo(getMostSignificantSetBitIndex(MaxValue) + 1);
- CHECK_LE(CounterSizeBits, MaxCounterBits);
+ DCHECK_LE(CounterSizeBits, MaxCounterBits);
CounterSizeBitsLog = getLog2(CounterSizeBits);
CounterMask = ~(static_cast<uptr>(0)) >> (MaxCounterBits - CounterSizeBits);
const uptr PackingRatio = MaxCounterBits >> CounterSizeBitsLog;
- CHECK_GT(PackingRatio, 0);
+ DCHECK_GT(PackingRatio, 0);
PackingRatioLog = getLog2(PackingRatio);
BitOffsetMask = PackingRatio - 1;
@@ -235,20 +235,14 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
if (BlockSize <= PageSize && PageSize % BlockSize == 0) {
// Each chunk affects one page only.
for (const auto &It : FreeList) {
- // If dealing with a TransferBatch, the first pointer of the batch will
- // point to the batch itself, we do not want to mark this for release as
- // the batch is in use, so skip the first entry.
- const bool IsTransferBatch =
- (It.getCount() != 0) &&
- (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
- for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
+ for (u32 I = 0; I < It.getCount(); I++) {
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
// This takes care of P < Base and P >= Base + RoundedSize.
- if (P < RoundedSize) {
- const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
- const uptr PInRegion = P - RegionIndex * RegionSize;
- Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
- }
+ if (UNLIKELY(P >= RoundedSize))
+ continue;
+ const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
+ const uptr PInRegion = P - RegionIndex * RegionSize;
+ Counters.inc(RegionIndex, PInRegion >> PageSizeLog);
}
}
} else {
@@ -256,27 +250,23 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
DCHECK_GE(RegionSize, BlockSize);
const uptr LastBlockInRegion = ((RegionSize / BlockSize) - 1U) * BlockSize;
for (const auto &It : FreeList) {
- // See TransferBatch comment above.
- const bool IsTransferBatch =
- (It.getCount() != 0) &&
- (reinterpret_cast<uptr>(It.get(0)) == reinterpret_cast<uptr>(&It));
- for (u32 I = IsTransferBatch ? 1 : 0; I < It.getCount(); I++) {
+ for (u32 I = 0; I < It.getCount(); I++) {
const uptr P = reinterpret_cast<uptr>(It.get(I)) - Base;
// This takes care of P < Base and P >= Base + RoundedSize.
- if (P < RoundedSize) {
- const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
- uptr PInRegion = P - RegionIndex * RegionSize;
- Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
- (PInRegion + BlockSize - 1) >> PageSizeLog);
- // The last block in a region might straddle a page, so if it's
- // free, we mark the following "pretend" memory block(s) as free.
- if (PInRegion == LastBlockInRegion) {
+ if (UNLIKELY(P >= RoundedSize))
+ continue;
+ const uptr RegionIndex = NumberOfRegions == 1U ? 0 : P / RegionSize;
+ uptr PInRegion = P - RegionIndex * RegionSize;
+ Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
+ (PInRegion + BlockSize - 1) >> PageSizeLog);
+ // The last block in a region might straddle a page, so if it's
+ // free, we mark the following "pretend" memory block(s) as free.
+ if (PInRegion == LastBlockInRegion) {
+ PInRegion += BlockSize;
+ while (PInRegion < RoundedRegionSize) {
+ Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
+ (PInRegion + BlockSize - 1) >> PageSizeLog);
PInRegion += BlockSize;
- while (PInRegion < RoundedRegionSize) {
- Counters.incRange(RegionIndex, PInRegion >> PageSizeLog,
- (PInRegion + BlockSize - 1) >> PageSizeLog);
- PInRegion += BlockSize;
- }
}
}
}
@@ -327,7 +317,6 @@ releaseFreeMemoryToOS(const IntrusiveList<TransferBatchT> &FreeList, uptr Base,
}
}
PrevPageBoundary = PageBoundary;
-
RangeTracker.processNextPage(Counters.get(I, J) == BlocksPerPage);
}
}
More information about the llvm-branch-commits
mailing list