[compiler-rt] 643bf6c - [scudo] Add partial chunk heuristic to retrieval algorithm. (#105009)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 26 10:44:42 PDT 2024
Author: Joshua Baehring
Date: 2024-08-26T10:44:39-07:00
New Revision: 643bf6cb01bc3faa54c5510c904ea3bdcb4bf42f
URL: https://github.com/llvm/llvm-project/commit/643bf6cb01bc3faa54c5510c904ea3bdcb4bf42f
DIFF: https://github.com/llvm/llvm-project/commit/643bf6cb01bc3faa54c5510c904ea3bdcb4bf42f.diff
LOG: [scudo] Add partial chunk heuristic to retrieval algorithm. (#105009)
Previously the secondary cache retrieval algorithm would not allow
retrievals of memory chunks where the number of unused bytes would be
greater than than `MaxUnreleasedCachePages * PageSize` bytes. This meant
that even if a memory chunk satisfied the requirements of the optimal
fit algorithm, it may not be returned. This remains true if memory
tagging is enabled. However, if memory tagging is disabled, a new
heuristic has been put in place. Specifically, If a memory chunk is a
non-optimal fit, the cache retrieval algorithm will attempt to release
the excess memory to force a cache hit while keeping RSS down.
In the event that a memory chunk is a non-optimal fit, the retrieval
algorithm will release excess memory as long as the amount of memory to
be released is less than or equal to 4 Pages. If the amount of memory to
be released exceeds 4 Pages, the retrieval algorithm will not consider
that cached memory chunk valid for retrieval.
This change also addresses an alignment issue in a test case submitted
in #104807.
Added:
Modified:
compiler-rt/lib/scudo/standalone/secondary.h
compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
Removed:
################################################################################
diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 27f8697db7838f..985e2392641ae2 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -72,6 +72,15 @@ namespace {
struct CachedBlock {
static constexpr u16 CacheIndexMax = UINT16_MAX;
static constexpr u16 InvalidEntry = CacheIndexMax;
+ // * MaxReleasedCachePages default is currently 4
+ // - We arrived at this value after noticing that mapping
+ // in larger memory regions performs better than releasing
+ // memory and forcing a cache hit. According to the data,
+ // it suggests that beyond 4 pages, the release execution time is
+ // longer than the map execution time. In this way, the default
+ // is dependent on the platform.
+ // TODO: set MaxReleasedCachePages back to 4U
+ static constexpr uptr MaxReleasedCachePages = 0U;
uptr CommitBase = 0;
uptr CommitSize = 0;
@@ -90,8 +99,9 @@ struct CachedBlock {
template <typename Config> class MapAllocatorNoCache {
public:
void init(UNUSED s32 ReleaseToOsInterval) {}
- CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
- UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
+ CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
+ UNUSED uptr Alignment, UNUSED uptr HeadersSize,
+ UNUSED uptr &EntryHeaderPos) {
return {};
}
void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -121,7 +131,7 @@ template <typename Config> class MapAllocatorNoCache {
}
};
-static const uptr MaxUnusedCachePages = 4U;
+static const uptr MaxUnreleasedCachePages = 4U;
template <typename Config>
bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize,
@@ -151,9 +161,11 @@ bool mapSecondary(const Options &Options, uptr CommitBase, uptr CommitSize,
}
}
- const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
- if (useMemoryTagging<Config>(Options) && CommitSize > MaxUnusedCacheBytes) {
- const uptr UntaggedPos = Max(AllocPos, CommitBase + MaxUnusedCacheBytes);
+ const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize;
+ if (useMemoryTagging<Config>(Options) &&
+ CommitSize > MaxUnreleasedCacheBytes) {
+ const uptr UntaggedPos =
+ Max(AllocPos, CommitBase + MaxUnreleasedCacheBytes);
return MemMap.remap(CommitBase, UntaggedPos - CommitBase, "scudo:secondary",
MAP_MEMTAG | Flags) &&
MemMap.remap(UntaggedPos, CommitBase + CommitSize - UntaggedPos,
@@ -334,13 +346,13 @@ class MapAllocatorCache {
}
}
- CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
- uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+ CachedBlock retrieve(uptr MaxAllowedFragmentedPages, uptr Size,
+ uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+ EXCLUDES(Mutex) {
const uptr PageSize = getPageSizeCached();
// 10% of the requested size proved to be the optimal choice for
// retrieving cached blocks after testing several options.
constexpr u32 FragmentedBytesDivisor = 10;
- bool Found = false;
CachedBlock Entry;
EntryHeaderPos = 0;
{
@@ -348,47 +360,100 @@ class MapAllocatorCache {
CallsToRetrieve++;
if (EntriesCount == 0)
return {};
- u32 OptimalFitIndex = 0;
+ u16 RetrievedIndex = CachedBlock::InvalidEntry;
uptr MinDiff = UINTPTR_MAX;
- for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+ // Since allocation sizes don't always match cached memory chunk sizes
+ // we allow some memory to be unused (called fragmented bytes). The
+ // amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+ //
+ // CommitBase CommitBase + CommitSize
+ // V V
+ // +---+------------+-----------------+---+
+ // | | | | |
+ // +---+------------+-----------------+---+
+ // ^ ^ ^
+ // Guard EntryHeaderPos Guard-page-end
+ // page-begin
+ //
+ // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+ // well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+ // MaxAllowedFragmentedPages * PageSize, the cached memory chunk is
+ // not considered valid for retrieval.
+ for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
I = Entries[I].Next) {
const uptr CommitBase = Entries[I].CommitBase;
const uptr CommitSize = Entries[I].CommitSize;
const uptr AllocPos =
roundDown(CommitBase + CommitSize - Size, Alignment);
const uptr HeaderPos = AllocPos - HeadersSize;
+ const uptr MaxAllowedFragmentedBytes =
+ MaxAllowedFragmentedPages * PageSize;
if (HeaderPos > CommitBase + CommitSize)
continue;
+ // TODO: Remove AllocPos > CommitBase + MaxAllowedFragmentedBytes
+ // and replace with Diff > MaxAllowedFragmentedBytes
if (HeaderPos < CommitBase ||
- AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+ AllocPos > CommitBase + MaxAllowedFragmentedBytes) {
continue;
}
- Found = true;
- const uptr Diff = HeaderPos - CommitBase;
- // immediately use a cached block if it's size is close enough to the
- // requested size.
- const uptr MaxAllowedFragmentedBytes =
- (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
- if (Diff <= MaxAllowedFragmentedBytes) {
- OptimalFitIndex = I;
- EntryHeaderPos = HeaderPos;
- break;
- }
- // keep track of the smallest cached block
+
+ const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;
+
+ // Keep track of the smallest cached block
// that is greater than (AllocSize + HeaderSize)
- if (Diff > MinDiff)
+ if (Diff >= MinDiff)
continue;
- OptimalFitIndex = I;
+
MinDiff = Diff;
+ RetrievedIndex = I;
EntryHeaderPos = HeaderPos;
+
+ // Immediately use a cached block if its size is close enough to the
+ // requested size
+ const uptr OptimalFitThesholdBytes =
+ (CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
+ if (Diff <= OptimalFitThesholdBytes)
+ break;
}
- if (Found) {
- Entry = Entries[OptimalFitIndex];
- remove(OptimalFitIndex);
+ if (RetrievedIndex != CachedBlock::InvalidEntry) {
+ Entry = Entries[RetrievedIndex];
+ remove(RetrievedIndex);
SuccessfulRetrieves++;
}
}
+ // The
diff erence between the retrieved memory chunk and the request
+ // size is at most MaxAllowedFragmentedPages
+ //
+ // / MaxAllowedFragmentedPages * PageSize \
+ // +--------------------------+-------------+
+ // | | |
+ // +--------------------------+-------------+
+ // \ Bytes to be released / ^
+ // |
+ // (may or may not be committed)
+ //
+ // The maximum number of bytes released to the OS is capped by
+ // MaxReleasedCachePages
+ //
+ // TODO : Consider making MaxReleasedCachePages configurable since
+ // the release to OS API can vary across systems.
+ if (Entry.Time != 0) {
+ const uptr FragmentedBytes =
+ roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase;
+ const uptr MaxUnreleasedCacheBytes = MaxUnreleasedCachePages * PageSize;
+ if (FragmentedBytes > MaxUnreleasedCacheBytes) {
+ const uptr MaxReleasedCacheBytes =
+ CachedBlock::MaxReleasedCachePages * PageSize;
+ uptr BytesToRelease =
+ roundUp(Min<uptr>(MaxReleasedCacheBytes,
+ FragmentedBytes - MaxUnreleasedCacheBytes),
+ PageSize);
+ Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
+ }
+ }
+
return Entry;
}
@@ -659,8 +724,13 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
FillContentsMode FillContents) {
CachedBlock Entry;
uptr EntryHeaderPos;
+ uptr MaxAllowedFragmentedPages = MaxUnreleasedCachePages;
+
+ if (UNLIKELY(useMemoryTagging<Config>(Options)))
+ MaxAllowedFragmentedPages += CachedBlock::MaxReleasedCachePages;
- Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
+ Entry = Cache.retrieve(MaxAllowedFragmentedPages, Size, Alignment,
+ getHeadersSize(), EntryHeaderPos);
if (!Entry.isValid())
return nullptr;
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
index e85b6abdb36d22..3638f1c36ddd9b 100644
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test {
std::unique_ptr<CacheT> Cache = std::make_unique<CacheT>();
const scudo::uptr PageSize = scudo::getPageSizeCached();
- // The current test allocation size is set to the minimum size
- // needed for the scudo allocator to fall back to the secondary allocator
+ // The current test allocation size is set to the maximum
+ // cache entry size
static constexpr scudo::uptr TestAllocSize =
CacheConfig::getDefaultMaxEntrySize();
@@ -327,7 +327,7 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
scudo::uptr EntryHeaderPos;
scudo::CachedBlock Entry =
- Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
+ Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos);
EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
}
@@ -336,6 +336,30 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
MemMap.unmap();
}
+TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) {
+ const scudo::uptr FragmentedPages =
+ 1 + scudo::CachedBlock::MaxReleasedCachePages;
+ scudo::uptr EntryHeaderPos;
+ scudo::CachedBlock Entry;
+ scudo::MemMapT MemMap = allocate(PageSize + FragmentedPages * PageSize);
+ Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(),
+ MemMap.getBase(), MemMap);
+
+ // FragmentedPages > MaxAllowedFragmentedPages so PageSize
+ // cannot be retrieved from the cache
+ Entry = Cache->retrieve(/*MaxAllowedFragmentedPages=*/0, PageSize, PageSize,
+ 0, EntryHeaderPos);
+ EXPECT_FALSE(Entry.isValid());
+
+ // FragmentedPages == MaxAllowedFragmentedPages so PageSize
+ // can be retrieved from the cache
+ Entry =
+ Cache->retrieve(FragmentedPages, PageSize, PageSize, 0, EntryHeaderPos);
+ EXPECT_TRUE(Entry.isValid());
+
+ MemMap.unmap();
+}
+
TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
std::vector<scudo::MemMapT> MemMaps;
// Fill the cache above MaxEntriesCount to force an eviction
@@ -351,7 +375,7 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
scudo::uptr EntryHeaderPos;
RetrievedEntries.push_back(
- Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
+ Cache->retrieve(0, TestAllocSize, PageSize, 0, EntryHeaderPos));
EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
}
More information about the llvm-commits
mailing list