[compiler-rt] [scudo] Add partial chunk heuristic to retrieval algorithm. (PR #105009)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 08:25:11 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-compiler-rt-sanitizer
Author: Joshua Baehring (JoshuaMBa)
<details>
<summary>Changes</summary>
Previously the secondary cache retrieval algorithm would not allow retrievals of memory chunks where the number of unused bytes would be greater than than `MaxUnusedCachePages * PageSize` bytes. This meant that even if a memory chunk satisfied the requirements of the optimal fit algorithm, it may not be returned. This remains true if memory tagging is enabled. However, if memory tagging is disabled, a new heuristic has been put in place. Specifically, If a memory chunk is a non-optimal fit, the cache retrieval algorithm will attempt to release the excess memory to force a cache hit while keeping RSS down.
In the event that a memory chunk is a non-optimal fit, the retrieval algorithm will release excess memory as long as the amount of memory to be released is less than or equal to 4 Pages. If the amount of memory to be released exceeds 4 Pages, the retrieval algorithm will not consider that cached memory chunk valid for retrieval.
---
Full diff: https://github.com/llvm/llvm-project/pull/105009.diff
2 Files Affected:
- (modified) compiler-rt/lib/scudo/standalone/secondary.h (+95-30)
- (modified) compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp (+34-4)
``````````diff
diff --git a/compiler-rt/lib/scudo/standalone/secondary.h b/compiler-rt/lib/scudo/standalone/secondary.h
index 27f8697db7838f..2adea34c13b426 100644
--- a/compiler-rt/lib/scudo/standalone/secondary.h
+++ b/compiler-rt/lib/scudo/standalone/secondary.h
@@ -72,6 +72,14 @@ namespace {
struct CachedBlock {
static constexpr u16 CacheIndexMax = UINT16_MAX;
static constexpr u16 InvalidEntry = CacheIndexMax;
+ // * MaxCachePagesToRelease default is currently 4
+ // - We arrived at this value after noticing that mapping
+ // in larger memory regions performs better than releasing
+ // memory and forcing a cache hit. According to the data,
+ // it suggests that beyond 16KB, the release execution time is
+ // longer than the map execution time. In this way, the default
+ // is dependent on the platform.
+ static constexpr uptr MaxCachePagesToRelease = 4U;
uptr CommitBase = 0;
uptr CommitSize = 0;
@@ -90,8 +98,9 @@ struct CachedBlock {
template <typename Config> class MapAllocatorNoCache {
public:
void init(UNUSED s32 ReleaseToOsInterval) {}
- CachedBlock retrieve(UNUSED uptr Size, UNUSED uptr Alignment,
- UNUSED uptr HeadersSize, UNUSED uptr &EntryHeaderPos) {
+ CachedBlock retrieve(UNUSED uptr MaxAllowedFragmentedBytes, UNUSED uptr Size,
+ UNUSED uptr Alignment, UNUSED uptr HeadersSize,
+ UNUSED uptr &EntryHeaderPos) {
return {};
}
void store(UNUSED Options Options, UNUSED uptr CommitBase,
@@ -334,13 +343,14 @@ class MapAllocatorCache {
}
}
- CachedBlock retrieve(uptr Size, uptr Alignment, uptr HeadersSize,
- uptr &EntryHeaderPos) EXCLUDES(Mutex) {
+ CachedBlock retrieve(uptr MaxAllowedFragmentedBytes, uptr Size,
+ uptr Alignment, uptr HeadersSize, uptr &EntryHeaderPos)
+ EXCLUDES(Mutex) {
const uptr PageSize = getPageSizeCached();
// 10% of the requested size proved to be the optimal choice for
// retrieving cached blocks after testing several options.
constexpr u32 FragmentedBytesDivisor = 10;
- bool Found = false;
+ bool FoundOptimalFit = false;
CachedBlock Entry;
EntryHeaderPos = 0;
{
@@ -348,47 +358,90 @@ class MapAllocatorCache {
CallsToRetrieve++;
if (EntriesCount == 0)
return {};
- u32 OptimalFitIndex = 0;
+ u16 RetrievedIndex = CachedBlock::InvalidEntry;
uptr MinDiff = UINTPTR_MAX;
- for (u32 I = LRUHead; I != CachedBlock::InvalidEntry;
+
+ // Since allocation sizes don't always match cached memory chunk sizes
+ // we allow some memory to be unused (called fragmented bytes). The
+ // amount of unused bytes is exactly EntryHeaderPos - CommitBase.
+ //
+ // CommitBase CommitBase + CommitSize
+ // V V
+ // +---+------------+-----------------+---+
+ // | | | | |
+ // +---+------------+-----------------+---+
+ // ^ ^ ^
+ // Guard EntryHeaderPos Guard-page-end
+ // page-begin
+ //
+ // [EntryHeaderPos, CommitBase + CommitSize) contains the user data as
+ // well as the header metadata. If EntryHeaderPos - CommitBase exceeds
+ // MaxAllowedFragmentedBytes, the cached memory chunk is not considered
+ // valid for retrieval.
+ for (u16 I = LRUHead; I != CachedBlock::InvalidEntry;
I = Entries[I].Next) {
const uptr CommitBase = Entries[I].CommitBase;
const uptr CommitSize = Entries[I].CommitSize;
const uptr AllocPos =
roundDown(CommitBase + CommitSize - Size, Alignment);
const uptr HeaderPos = AllocPos - HeadersSize;
- if (HeaderPos > CommitBase + CommitSize)
+ if (HeaderPos > CommitBase + CommitSize || HeaderPos < CommitBase)
continue;
- if (HeaderPos < CommitBase ||
- AllocPos > CommitBase + PageSize * MaxUnusedCachePages) {
+
+ const uptr Diff = roundDown(HeaderPos, PageSize) - CommitBase;
+
+ if (Diff > MaxAllowedFragmentedBytes || Diff >= MinDiff)
continue;
- }
- Found = true;
- const uptr Diff = HeaderPos - CommitBase;
- // immediately use a cached block if it's size is close enough to the
- // requested size.
- const uptr MaxAllowedFragmentedBytes =
+
+ MinDiff = Diff;
+ RetrievedIndex = I;
+ EntryHeaderPos = HeaderPos;
+
+ const uptr OptimalFitThesholdBytes =
(CommitBase + CommitSize - HeaderPos) / FragmentedBytesDivisor;
- if (Diff <= MaxAllowedFragmentedBytes) {
- OptimalFitIndex = I;
- EntryHeaderPos = HeaderPos;
+ if (Diff <= OptimalFitThesholdBytes) {
+ FoundOptimalFit = true;
break;
}
- // keep track of the smallest cached block
- // that is greater than (AllocSize + HeaderSize)
- if (Diff > MinDiff)
- continue;
- OptimalFitIndex = I;
- MinDiff = Diff;
- EntryHeaderPos = HeaderPos;
}
- if (Found) {
- Entry = Entries[OptimalFitIndex];
- remove(OptimalFitIndex);
+ if (RetrievedIndex != CachedBlock::InvalidEntry) {
+ Entry = Entries[RetrievedIndex];
+ remove(RetrievedIndex);
SuccessfulRetrieves++;
}
}
+ // The difference between the retrieved memory chunk and the request
+ // size is at most MaxAllowedFragmentedBytes
+ //
+ // / MaxAllowedFragmentedBytes \
+ // +--------------------------+-----------+
+ // | | |
+ // +--------------------------+-----------+
+ // \ Bytes to be released / ^
+ // |
+ // (may or may not be commited)
+ //
+ // The maximum number of bytes released to the OS is capped by
+ // MaxCachePagesToRelease
+ //
+ // TODO : Consider making MaxCachePagesToRelease configurable since
+ // the release to OS API can vary across systems.
+ if (!FoundOptimalFit && Entry.Time != 0) {
+ const uptr FragmentedBytes =
+ roundDown(EntryHeaderPos, PageSize) - Entry.CommitBase;
+ const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
+ if (FragmentedBytes > MaxUnusedCacheBytes) {
+ const uptr MaxCacheBytesToRelease =
+ CachedBlock::MaxCachePagesToRelease * PageSize;
+ uptr BytesToRelease =
+ roundUp(Min<uptr>(MaxCacheBytesToRelease,
+ FragmentedBytes - MaxUnusedCacheBytes),
+ PageSize);
+ Entry.MemMap.releaseAndZeroPagesToOS(Entry.CommitBase, BytesToRelease);
+ }
+ }
+
return Entry;
}
@@ -659,8 +712,20 @@ MapAllocator<Config>::tryAllocateFromCache(const Options &Options, uptr Size,
FillContentsMode FillContents) {
CachedBlock Entry;
uptr EntryHeaderPos;
+ uptr MaxAllowedFragmentedBytes;
+ const uptr PageSize = getPageSizeCached();
+
+ if (LIKELY(!useMemoryTagging<Config>(Options))) {
+ const uptr MaxUnusedCacheBytes = MaxUnusedCachePages * PageSize;
+ const uptr MaxCacheBytesToRelease =
+ CachedBlock::MaxCachePagesToRelease * PageSize;
+ MaxAllowedFragmentedBytes = MaxUnusedCacheBytes + MaxCacheBytesToRelease;
+ } else {
+ MaxAllowedFragmentedBytes = MaxUnusedCachePages * PageSize;
+ }
- Entry = Cache.retrieve(Size, Alignment, getHeadersSize(), EntryHeaderPos);
+ Entry = Cache.retrieve(MaxAllowedFragmentedBytes, Size, Alignment,
+ getHeadersSize(), EntryHeaderPos);
if (!Entry.isValid())
return nullptr;
diff --git a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
index e85b6abdb36d22..1f4cab26550cce 100644
--- a/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp
@@ -281,8 +281,8 @@ struct MapAllocatorCacheTest : public Test {
std::unique_ptr<CacheT> Cache = std::make_unique<CacheT>();
const scudo::uptr PageSize = scudo::getPageSizeCached();
- // The current test allocation size is set to the minimum size
- // needed for the scudo allocator to fall back to the secondary allocator
+ // The current test allocation size is set to the maximum
+ // cache entry size
static constexpr scudo::uptr TestAllocSize =
CacheConfig::getDefaultMaxEntrySize();
@@ -327,7 +327,8 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
for (scudo::uptr I = CacheConfig::getEntriesArraySize(); I > 0; I--) {
scudo::uptr EntryHeaderPos;
scudo::CachedBlock Entry =
- Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos);
+ Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
+ PageSize, 0, EntryHeaderPos);
EXPECT_EQ(Entry.MemMap.getBase(), MemMaps[I - 1].getBase());
}
@@ -336,6 +337,34 @@ TEST_F(MapAllocatorCacheTest, CacheOrder) {
MemMap.unmap();
}
+TEST_F(MapAllocatorCacheTest, PartialChunkHeuristicRetrievalTest) {
+ const scudo::uptr MaxUnusedCacheBytes = PageSize;
+ const scudo::uptr MaxCacheBytesToRelease =
+ scudo::CachedBlock::MaxCachePagesToRelease * PageSize;
+ const scudo::uptr FragmentedBytes =
+ MaxUnusedCacheBytes + MaxCacheBytesToRelease;
+
+ scudo::uptr EntryHeaderPos;
+ scudo::CachedBlock Entry;
+ scudo::MemMapT MemMap = allocate(PageSize + FragmentedBytes);
+ Cache->store(Options, MemMap.getBase(), MemMap.getCapacity(),
+ MemMap.getBase(), MemMap);
+
+ // FragmentedBytes > MaxAllowedFragmentedBytes so PageSize
+ // cannot be retrieved from the cache
+ Entry = Cache->retrieve(/*MaxAllowedFragmentedBytes=*/0, PageSize, PageSize,
+ 0, EntryHeaderPos);
+ EXPECT_FALSE(Entry.isValid());
+
+ // FragmentedBytes == MaxAllowedFragmentedBytes so PageSize
+ // can be retrieved from the cache
+ Entry =
+ Cache->retrieve(FragmentedBytes, PageSize, PageSize, 0, EntryHeaderPos);
+ EXPECT_TRUE(Entry.isValid());
+
+ MemMap.unmap();
+}
+
TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
std::vector<scudo::MemMapT> MemMaps;
// Fill the cache above MaxEntriesCount to force an eviction
@@ -351,7 +380,8 @@ TEST_F(MapAllocatorCacheTest, MemoryLeakTest) {
for (scudo::uptr I = CacheConfig::getDefaultMaxEntriesCount(); I > 0; I--) {
scudo::uptr EntryHeaderPos;
RetrievedEntries.push_back(
- Cache->retrieve(TestAllocSize, PageSize, 0, EntryHeaderPos));
+ Cache->retrieve(scudo::MaxUnusedCachePages * PageSize, TestAllocSize,
+ PageSize, 0, EntryHeaderPos));
EXPECT_EQ(MemMaps[I].getBase(), RetrievedEntries.back().MemMap.getBase());
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/105009
More information about the llvm-commits
mailing list