[compiler-rt] 46240c3 - [scudo][standalone] Minor optimization & improvements
Kostya Kortchinsky via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 21 10:05:53 PST 2019
Author: Kostya Kortchinsky
Date: 2019-11-21T10:05:39-08:00
New Revision: 46240c38721fe9919f9c63277bec7bbf3e62073b
URL: https://github.com/llvm/llvm-project/commit/46240c38721fe9919f9c63277bec7bbf3e62073b
DIFF: https://github.com/llvm/llvm-project/commit/46240c38721fe9919f9c63277bec7bbf3e62073b.diff
LOG: [scudo][standalone] Minor optimization & improvements
Summary:
A few small improvements and optimizations:
- when refilling the free list, push back the last batch and return
the front one: this allows to keep the allocations towards the front
of the region;
- instead of using 48 entries in the shuffle array, use a multiple of
`MaxNumCached`;
- make the maximum number of batches to create on refil a constant;
ultimately it should be configurable, but that's for later;
- `initCache` doesn't need to zero out the cache, it's already done.
- it turns out that when using `||` or `&&`, the compiler is adamant
on adding a short circuit for every part of the expression. Which
ends up making somewhat annoying asm with lots of test and
conditional jump. I am changing that to bitwise `|` or `&` in two
place so that the generated code looks better. Added comments since
it might feel weird to people.
This yields to some small performance gains overall, nothing drastic
though.
Reviewers: hctim, morehouse, cferris, eugenis
Subscribers: #sanitizers, llvm-commits
Tags: #sanitizers, #llvm
Differential Revision: https://reviews.llvm.org/D70452
Added:
Modified:
compiler-rt/lib/scudo/standalone/combined.h
compiler-rt/lib/scudo/standalone/primary32.h
compiler-rt/lib/scudo/standalone/primary64.h
Removed:
################################################################################
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index f4fa5d4b99ad..0a05857a20d6 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -144,7 +144,10 @@ template <class Params> class Allocator {
TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
- void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
+ // The Cache must be provided zero-initialized.
+ void initCache(CacheT *Cache) {
+ Cache->initLinkerInitialized(&Stats, &Primary);
+ }
// Release the resources used by a TSD, which involves:
// - draining the local quarantine cache to the global quarantine;
@@ -161,7 +164,7 @@ template <class Params> class Allocator {
uptr Alignment = MinAlignment,
bool ZeroContents = false) {
initThreadMaybe();
- ZeroContents = ZeroContents || Options.ZeroContents;
+ ZeroContents |= static_cast<bool>(Options.ZeroContents);
if (UNLIKELY(Alignment > MaxAlignment)) {
if (Options.MayReturnNull)
@@ -181,12 +184,13 @@ template <class Params> class Allocator {
((Alignment > MinAlignment) ? Alignment : Chunk::getHeaderSize());
// Takes care of extravagantly large sizes as well as integer overflows.
- if (UNLIKELY(Size >= MaxAllowedMallocSize ||
- NeededSize >= MaxAllowedMallocSize)) {
+ COMPILER_CHECK(MaxAllowedMallocSize < UINTPTR_MAX - MaxAlignment);
+ if (UNLIKELY(Size >= MaxAllowedMallocSize)) {
if (Options.MayReturnNull)
return nullptr;
reportAllocationSizeTooBig(Size, NeededSize, MaxAllowedMallocSize);
}
+ DCHECK_LE(Size, NeededSize);
void *Block;
uptr ClassId;
@@ -541,7 +545,9 @@ template <class Params> class Allocator {
Chunk::UnpackedHeader NewHeader = *Header;
// If the quarantine is disabled, the actual size of a chunk is 0 or larger
// than the maximum allowed, we return a chunk directly to the backend.
- const bool BypassQuarantine = !Quarantine.getCacheSize() || !Size ||
+ // Logical Or can be short-circuited, which introduces unnecessary
+ // conditional jumps, so use bitwise Or and let the compiler be clever.
+ const bool BypassQuarantine = !Quarantine.getCacheSize() | !Size |
(Size > Options.QuarantineMaxChunkSize);
if (BypassQuarantine) {
NewHeader.State = Chunk::State::Available;
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 453b06ee5549..a0d8560c3f6c 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -300,10 +300,10 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator32 {
const uptr NumberOfBlocks = RegionSize / Size;
DCHECK_GT(NumberOfBlocks, 0);
TransferBatch *B = nullptr;
- constexpr uptr ShuffleArraySize = 48;
+ constexpr u32 ShuffleArraySize = 8U * TransferBatch::MaxNumCached;
void *ShuffleArray[ShuffleArraySize];
u32 Count = 0;
- const uptr AllocatedUser = NumberOfBlocks * Size;
+ const uptr AllocatedUser = Size * NumberOfBlocks;
for (uptr I = Region; I < Region + AllocatedUser; I += Size) {
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
if (Count == ShuffleArraySize) {
@@ -319,6 +319,11 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator32 {
return nullptr;
}
DCHECK(B);
+ if (!Sci->FreeList.empty()) {
+ Sci->FreeList.push_back(B);
+ B = Sci->FreeList.front();
+ Sci->FreeList.pop_front();
+ }
DCHECK_GT(B->getCount(), 0);
C->getStats().add(StatFree, AllocatedUser);
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index 409472c87776..559742d05ad9 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -187,6 +187,8 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
// Call map for user memory with at least this size.
static const uptr MapSizeIncrement = 1UL << 17;
+ // Fill at most this number of batches from the newly map'd memory.
+ static const u32 MaxNumBatches = 8U;
struct RegionStats {
uptr PoppedBlocks;
@@ -289,16 +291,18 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
C->getStats().add(StatMapped, UserMapSize);
}
- const uptr NumberOfBlocks = Min(
- 8UL * MaxCount, (Region->MappedUser - Region->AllocatedUser) / Size);
+ const u32 NumberOfBlocks = Min(
+ MaxNumBatches * MaxCount,
+ static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size));
DCHECK_GT(NumberOfBlocks, 0);
TransferBatch *B = nullptr;
- constexpr uptr ShuffleArraySize = 48;
+ constexpr u32 ShuffleArraySize =
+ MaxNumBatches * TransferBatch::MaxNumCached;
void *ShuffleArray[ShuffleArraySize];
u32 Count = 0;
const uptr P = RegionBeg + Region->AllocatedUser;
- const uptr AllocatedUser = NumberOfBlocks * Size;
+ const uptr AllocatedUser = Size * NumberOfBlocks;
for (uptr I = P; I < P + AllocatedUser; I += Size) {
ShuffleArray[Count++] = reinterpret_cast<void *>(I);
if (Count == ShuffleArraySize) {
@@ -314,6 +318,11 @@ template <class SizeClassMapT, uptr RegionSizeLog> class SizeClassAllocator64 {
return nullptr;
}
DCHECK(B);
+ if (!Region->FreeList.empty()) {
+ Region->FreeList.push_back(B);
+ B = Region->FreeList.front();
+ Region->FreeList.pop_front();
+ }
DCHECK_GT(B->getCount(), 0);
C->getStats().add(StatFree, AllocatedUser);
More information about the llvm-commits
mailing list