[compiler-rt] 1a4bc11 - Reland "[scudo] Support partial concurrent page release in SizeClassAllocator64"
Chia-hung Duan via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 12 11:28:10 PDT 2023
Author: Chia-hung Duan
Date: 2023-07-12T18:25:01Z
New Revision: 1a4bc114ec8a80550259b5b3b1c7ab0d5c5fa4da
URL: https://github.com/llvm/llvm-project/commit/1a4bc114ec8a80550259b5b3b1c7ab0d5c5fa4da
DIFF: https://github.com/llvm/llvm-project/commit/1a4bc114ec8a80550259b5b3b1c7ab0d5c5fa4da.diff
LOG: Reland "[scudo] Support partial concurrent page release in SizeClassAllocator64"
This reverts commit 2f04b688aadef747172a8f4a7d1658dc049b1e24.
Reviewed By: cferris
Differential Revision: https://reviews.llvm.org/D155015
Added:
Modified:
compiler-rt/lib/scudo/standalone/local_cache.h
compiler-rt/lib/scudo/standalone/primary64.h
compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
Removed:
################################################################################
diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index 6d36a1c399ff15..1095eb5f186d1e 100644
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -35,6 +35,15 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
// u16 will be promoted to int by arithmetic type conversion.
Count = static_cast<u16>(Count + N);
}
+ void appendFromTransferBatch(TransferBatch *B, u16 N) {
+ DCHECK_LE(N, MaxNumCached - Count);
+ DCHECK_GE(B->Count, N);
+ // Append from the back of `B`.
+ memcpy(Batch + Count, B->Batch + (B->Count - N), sizeof(Batch[0]) * N);
+ // u16 will be promoted to int by arithmetic type conversion.
+ Count = static_cast<u16>(Count + N);
+ B->Count = static_cast<u16>(B->Count - N);
+ }
void clear() { Count = 0; }
void add(CompactPtrT P) {
DCHECK_LT(Count, MaxNumCached);
@@ -44,7 +53,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
memcpy(Array, Batch, sizeof(Batch[0]) * Count);
}
u16 getCount() const { return Count; }
- CompactPtrT *getRawArray() { return Batch; }
+ bool isEmpty() const { return Count == 0U; }
CompactPtrT get(u16 I) const {
DCHECK_LE(I, Count);
return Batch[I];
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 00bc2c4d89fb35..fd7a1f9e80cdec 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -984,8 +984,6 @@ template <typename Config> class SizeClassAllocator64 {
NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
ReleaseToOS ReleaseType = ReleaseToOS::Normal)
REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
- // TODO(chiahungduan): Release `FLLock` in step 3 & 4 described in the
- // comment below.
ScopedLock L(Region->FLLock);
const uptr BlockSize = getSizeByClassId(ClassId);
@@ -1010,10 +1008,6 @@ template <typename Config> class SizeClassAllocator64 {
return 0;
}
- // This is only used for debugging to ensure the consistency of the number
- // of groups.
- uptr NumberOfBatchGroups = Region->FreeListInfo.BlockList.size();
-
// ====================================================================== //
// 2. Determine which groups can release the pages. Use a heuristic to
// gather groups that are candidates for doing a release.
@@ -1029,39 +1023,71 @@ template <typename Config> class SizeClassAllocator64 {
if (GroupsToRelease.empty())
return 0;
- // ====================================================================== //
- // 3. Mark the free blocks in `GroupsToRelease` in the `PageReleaseContext`.
- // Then we can tell which pages are in-use by querying
- // `PageReleaseContext`.
- // ====================================================================== //
- PageReleaseContext Context = markFreeBlocks(
- Region, BlockSize, AllocatedUserEnd, CompactPtrBase, GroupsToRelease);
- if (UNLIKELY(!Context.hasBlockMarked())) {
- mergeGroupsToReleaseBack(Region, GroupsToRelease, NumberOfBatchGroups);
- return 0;
- }
+ // Ideally, we should use a class like `ScopedUnlock`. However, this form of
+ // unlocking is not supported by the thread-safety analysis. See
+ // https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#no-alias-analysis
+ // for more details.
+ // Put it as local class so that we can mark the ctor/dtor with proper
+ // annotations associated to the target lock. Note that accessing the
+ // function variable in local class only works in thread-safety annotations.
+ // TODO: Implement general `ScopedUnlock` when it's supported.
+ class FLLockScopedUnlock {
+ public:
+ FLLockScopedUnlock(RegionInfo *Region) RELEASE(Region->FLLock)
+ : R(Region) {
+ R->FLLock.assertHeld();
+ R->FLLock.unlock();
+ }
+ ~FLLockScopedUnlock() ACQUIRE(Region->FLLock) { R->FLLock.lock(); }
- // ====================================================================== //
- // 4. Release the unused physical pages back to the OS.
- // ====================================================================== //
- RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap,
- Region->RegionBeg,
- Context.getReleaseOffset());
- auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
- releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
- if (Recorder.getReleasedRangesCount() > 0) {
- Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
- Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
- Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
+ private:
+ RegionInfo *R;
+ };
+
+ // Note that we have extracted the `GroupsToRelease` from region freelist.
+ // It's safe to let pushBlocks()/popBatches() access the remaining region
+ // freelist. In the steps 3 and 4, we will temporarily release the FLLock
+ // and lock it again before step 5.
+
+ uptr ReleasedBytes = 0;
+ {
+ FLLockScopedUnlock UL(Region);
+ // ==================================================================== //
+ // 3. Mark the free blocks in `GroupsToRelease` in the
+ // `PageReleaseContext`. Then we can tell which pages are in-use by
+ // querying `PageReleaseContext`.
+ // ==================================================================== //
+ PageReleaseContext Context = markFreeBlocks(
+ Region, BlockSize, AllocatedUserEnd, CompactPtrBase, GroupsToRelease);
+ if (UNLIKELY(!Context.hasBlockMarked())) {
+ ScopedLock L(Region->FLLock);
+ mergeGroupsToReleaseBack(Region, GroupsToRelease);
+ return 0;
+ }
+
+ // ==================================================================== //
+ // 4. Release the unused physical pages back to the OS.
+ // ==================================================================== //
+ RegionReleaseRecorder<MemMapT> Recorder(&Region->MemMapInfo.MemMap,
+ Region->RegionBeg,
+ Context.getReleaseOffset());
+ auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
+ releaseFreeMemoryToOS(Context, Recorder, SkipRegion);
+ if (Recorder.getReleasedRangesCount() > 0) {
+ Region->ReleaseInfo.BytesInFreeListAtLastCheckpoint = BytesInFreeList;
+ Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
+ Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
+ }
+ Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast();
+ ReleasedBytes = Recorder.getReleasedBytes();
}
- Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTimeFast();
// ====================================================================== //
// 5. Merge the `GroupsToRelease` back to the freelist.
// ====================================================================== //
- mergeGroupsToReleaseBack(Region, GroupsToRelease, NumberOfBatchGroups);
+ mergeGroupsToReleaseBack(Region, GroupsToRelease);
- return Recorder.getReleasedBytes();
+ return ReleasedBytes;
}
bool hasChanceToReleasePages(RegionInfo *Region, uptr BlockSize,
@@ -1298,7 +1324,7 @@ template <typename Config> class SizeClassAllocator64 {
markFreeBlocks(RegionInfo *Region, const uptr BlockSize,
const uptr AllocatedUserEnd, const uptr CompactPtrBase,
SinglyLinkedList<BatchGroup> &GroupsToRelease)
- REQUIRES(Region->MMLock, Region->FLLock) {
+ REQUIRES(Region->MMLock) EXCLUDES(Region->FLLock) {
const uptr GroupSize = (1U << GroupSizeLog);
auto DecompactPtr = [CompactPtrBase](CompactPtrT CompactPtr) {
return decompactPtrInternal(CompactPtrBase, CompactPtr);
@@ -1347,9 +1373,12 @@ template <typename Config> class SizeClassAllocator64 {
BG.Batches.front()->getCount();
if (NumBlocks == MaxContainedBlocks) {
- for (const auto &It : BG.Batches)
+ for (const auto &It : BG.Batches) {
+ if (&It != BG.Batches.front())
+ DCHECK_EQ(It.getCount(), BG.MaxCachedPerBatch);
for (u16 I = 0; I < It.getCount(); ++I)
DCHECK_EQ(compactPtrGroup(It.get(I)), BG.CompactPtrGroupBase);
+ }
Context.markRangeAsAllCounted(BatchGroupBase, BatchGroupUsedEnd,
Region->RegionBeg, /*RegionIndex=*/0,
@@ -1371,9 +1400,22 @@ template <typename Config> class SizeClassAllocator64 {
}
void mergeGroupsToReleaseBack(RegionInfo *Region,
- SinglyLinkedList<BatchGroup> &GroupsToRelease,
- const uptr NumberOfBatchGroups)
+ SinglyLinkedList<BatchGroup> &GroupsToRelease)
REQUIRES(Region->MMLock, Region->FLLock) {
+ // After merging two freelists, we may have redundant `BatchGroup`s that
+ // need to be recycled. The number of unused `BatchGroup`s is expected to be
+ // small. Pick a constant which is inferred from real programs.
+ constexpr uptr MaxUnusedSize = 8;
+ CompactPtrT Blocks[MaxUnusedSize];
+ u32 Idx = 0;
+ RegionInfo *BatchClassRegion = getRegionInfo(SizeClassMap::BatchClassId);
+ // We can't call pushBatchClassBlocks() to recycle the unused `BatchGroup`s
+ // when we are manipulating the freelist of `BatchClassRegion`. Instead, we
+ // should just push it back to the freelist when we merge two `BatchGroup`s.
+ // This logic hasn't been implemented because we haven't supported releasing
+ // pages in `BatchClassRegion`.
+ DCHECK_NE(BatchClassRegion, Region);
+
// Merge GroupsToRelease back to the Region::FreeListInfo.BlockList. Note
// that both `Region->FreeListInfo.BlockList` and `GroupsToRelease` are
// sorted.
@@ -1386,8 +1428,7 @@ template <typename Config> class SizeClassAllocator64 {
break;
}
- DCHECK_NE(BG->CompactPtrGroupBase,
- GroupsToRelease.front()->CompactPtrGroupBase);
+ DCHECK(!BG->Batches.empty());
if (BG->CompactPtrGroupBase <
GroupsToRelease.front()->CompactPtrGroupBase) {
@@ -1396,6 +1437,64 @@ template <typename Config> class SizeClassAllocator64 {
continue;
}
+ BatchGroup *Cur = GroupsToRelease.front();
+ TransferBatch *UnusedTransferBatch = nullptr;
+ GroupsToRelease.pop_front();
+
+ if (BG->CompactPtrGroupBase == Cur->CompactPtrGroupBase) {
+ BG->PushedBlocks += Cur->PushedBlocks;
+ // We have updated `BatchGroup::BytesInBGAtLastCheckpoint` while
+ // collecting the `GroupsToRelease`.
+ BG->BytesInBGAtLastCheckpoint = Cur->BytesInBGAtLastCheckpoint;
+ const uptr MaxCachedPerBatch = BG->MaxCachedPerBatch;
+
+ // Note that the first TransferBatches in both `Batches` may not be
+ // full and only the first TransferBatch can have non-full blocks. Thus
+ // we have to merge them before appending one to another.
+ if (Cur->Batches.front()->getCount() == MaxCachedPerBatch) {
+ BG->Batches.append_back(&Cur->Batches);
+ } else {
+ TransferBatch *NonFullBatch = Cur->Batches.front();
+ Cur->Batches.pop_front();
+ const u16 NonFullBatchCount = NonFullBatch->getCount();
+ // The remaining Batches in `Cur` are full.
+ BG->Batches.append_back(&Cur->Batches);
+
+ if (BG->Batches.front()->getCount() == MaxCachedPerBatch) {
+ // Only 1 non-full TransferBatch, push it to the front.
+ BG->Batches.push_front(NonFullBatch);
+ } else {
+ const u16 NumBlocksToMove = static_cast<u16>(
+ Min(static_cast<u16>(MaxCachedPerBatch -
+ BG->Batches.front()->getCount()),
+ NonFullBatchCount));
+ BG->Batches.front()->appendFromTransferBatch(NonFullBatch,
+ NumBlocksToMove);
+ if (NonFullBatch->isEmpty())
+ UnusedTransferBatch = NonFullBatch;
+ else
+ BG->Batches.push_front(NonFullBatch);
+ }
+ }
+
+ const u32 NeededSlots = UnusedTransferBatch == nullptr ? 1U : 2U;
+ if (UNLIKELY(Idx + NeededSlots > MaxUnusedSize)) {
+ ScopedLock L(BatchClassRegion->FLLock);
+ pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
+ Idx = 0;
+ }
+ Blocks[Idx++] =
+ compactPtr(SizeClassMap::BatchClassId, reinterpret_cast<uptr>(Cur));
+ if (UnusedTransferBatch) {
+ Blocks[Idx++] =
+ compactPtr(SizeClassMap::BatchClassId,
+ reinterpret_cast<uptr>(UnusedTransferBatch));
+ }
+ Prev = BG;
+ BG = BG->Next;
+ continue;
+ }
+
// At here, the `BG` is the first BatchGroup with CompactPtrGroupBase
// larger than the first element in `GroupsToRelease`. We need to insert
// `GroupsToRelease::front()` (which is `Cur` below) before `BG`.
@@ -1406,8 +1505,6 @@ template <typename Config> class SizeClassAllocator64 {
//
// Afterwards, we don't need to advance `BG` because the order between
// `BG` and the new `GroupsToRelease::front()` hasn't been checked.
- BatchGroup *Cur = GroupsToRelease.front();
- GroupsToRelease.pop_front();
if (Prev == nullptr)
Region->FreeListInfo.BlockList.push_front(Cur);
else
@@ -1416,8 +1513,10 @@ template <typename Config> class SizeClassAllocator64 {
Prev = Cur;
}
- DCHECK_EQ(Region->FreeListInfo.BlockList.size(), NumberOfBatchGroups);
- (void)NumberOfBatchGroups;
+ if (Idx != 0) {
+ ScopedLock L(BatchClassRegion->FLLock);
+ pushBatchClassBlocks(BatchClassRegion, Blocks, Idx);
+ }
if (SCUDO_DEBUG) {
BatchGroup *Prev = Region->FreeListInfo.BlockList.front();
diff --git a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
index c129198918b094..c875a10587b92b 100644
--- a/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/combined_test.cpp
@@ -500,6 +500,10 @@ SCUDO_TYPED_TEST(ScudoCombinedTest, ThreadedCombined) {
if (P)
V.push_back(std::make_pair(P, Size));
}
+
+ // Try to interleave pushBlocks(), popBatch() and releaseToOS().
+ Allocator->releaseToOS(scudo::ReleaseToOS::Force);
+
while (!V.empty()) {
auto Pair = V.back();
Allocator->deallocate(Pair.first, Origin, Pair.second);
diff --git a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
index a17fd58ac604d3..004f39a77227b4 100644
--- a/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
+++ b/compiler-rt/lib/scudo/standalone/tests/primary_test.cpp
@@ -308,10 +308,18 @@ SCUDO_TYPED_TEST(ScudoPrimaryTest, PrimaryThreaded) {
if (P)
V.push_back(std::make_pair(ClassId, P));
}
+
+ // Try to interleave pushBlocks(), popBatch() and releaseToOS().
+ Allocator->releaseToOS(scudo::ReleaseToOS::Force);
+
while (!V.empty()) {
auto Pair = V.back();
Cache.deallocate(Pair.first, Pair.second);
V.pop_back();
+ // This increases the chance of having non-full TransferBatches and it
+ // will jump into the code path of merging TransferBatches.
+ if (std::rand() % 8 == 0)
+ Cache.drain();
}
Cache.destroy(nullptr);
});
More information about the llvm-commits
mailing list