[compiler-rt] 1243cef - [memprof] Replace the block cache with a hashmap.
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 11 11:31:39 PST 2021
Author: Snehasish Kumar
Date: 2021-11-11T11:29:36-08:00
New Revision: 1243cef245f6131c093d65235e87319e8e124e7f
URL: https://github.com/llvm/llvm-project/commit/1243cef245f6131c093d65235e87319e8e124e7f
DIFF: https://github.com/llvm/llvm-project/commit/1243cef245f6131c093d65235e87319e8e124e7f.diff
LOG: [memprof] Replace the block cache with a hashmap.
The existing implementation uses a cache + eviction based scheme to
record heap profile information. This design was adopted to ensure a
constant memory overhead (due to fixed number of cache entries) along
with incremental write-to-disk for evictions. We find that since the
number to entries to track is O(unique-allocation-contexts) the overhead
of keeping all contexts in memory is not very high. On a clang workload,
the max number of unique allocation contexts was ~35K, median ~11K.
For each context, we (currently) store 64 bytes of data - this amounts
to 5.5MB (max). Given the low overheads for a complex workload, we can
simplify the implementation by using a hashmap without eviction.
Other changes:
* Memory map is dumped at the end rather than startup. The relative
order in the profile dump is unchanged since we no longer have evicted
entries at runtime.
* Added a test to check meminfoblocks are merged.
Differential Revision: https://reviews.llvm.org/D111676
Added:
compiler-rt/lib/memprof/memprof_mibmap.cpp
compiler-rt/lib/memprof/memprof_mibmap.h
compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp
Modified:
compiler-rt/lib/memprof/CMakeLists.txt
compiler-rt/lib/memprof/memprof_allocator.cpp
compiler-rt/lib/memprof/memprof_flags.inc
Removed:
compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
################################################################################
diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt
index cb0816f47c600..9e8e370acbbd2 100644
--- a/compiler-rt/lib/memprof/CMakeLists.txt
+++ b/compiler-rt/lib/memprof/CMakeLists.txt
@@ -8,6 +8,7 @@ set(MEMPROF_SOURCES
memprof_interceptors_memintrinsics.cpp
memprof_linux.cpp
memprof_malloc_linux.cpp
+ memprof_mibmap.cpp
memprof_posix.cpp
memprof_rtl.cpp
memprof_shadow_setup.cpp
@@ -36,6 +37,7 @@ SET(MEMPROF_HEADERS
memprof_internal.h
memprof_mapping.h
memprof_meminfoblock.h
+ memprof_mibmap.h
memprof_stack.h
memprof_stats.h
memprof_thread.h
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 3ec3c1ebd6f50..38b68c334cbaa 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -16,6 +16,7 @@
#include "memprof_allocator.h"
#include "memprof_mapping.h"
#include "memprof_meminfoblock.h"
+#include "memprof_mibmap.h"
#include "memprof_stack.h"
#include "memprof_thread.h"
#include "sanitizer_common/sanitizer_allocator_checks.h"
@@ -29,7 +30,6 @@
#include "sanitizer_common/sanitizer_stackdepot.h"
#include <sched.h>
-#include <stdlib.h>
#include <time.h>
namespace __memprof {
@@ -167,146 +167,6 @@ AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
return &ms->allocator_cache;
}
-struct SetEntry {
- SetEntry() : id(0), MIB() {}
- bool Empty() { return id == 0; }
- void Print() {
- CHECK(!Empty());
- MIB.Print(id, flags()->print_terse);
- }
- // The stack id
- u64 id;
- MemInfoBlock MIB;
-};
-
-struct CacheSet {
- enum { kSetSize = 4 };
-
- void PrintAll() {
- for (int i = 0; i < kSetSize; i++) {
- if (Entries[i].Empty())
- continue;
- Entries[i].Print();
- }
- }
- void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
- SpinMutexLock l(&SetMutex);
- AccessCount++;
-
- for (int i = 0; i < kSetSize; i++) {
- auto id = Entries[i].id;
- // Check if this is a hit or an empty entry. Since we always move any
- // filled locations to the front of the array (see below), we don't need
- // to look after finding the first empty entry.
- if (id == new_id || !id) {
- if (id == 0) {
- Entries[i].id = new_id;
- Entries[i].MIB = newMIB;
- } else {
- Entries[i].MIB.Merge(newMIB);
- }
- // Assuming some id locality, we try to swap the matching entry
- // into the first set position.
- if (i != 0) {
- auto tmp = Entries[0];
- Entries[0] = Entries[i];
- Entries[i] = tmp;
- }
- return;
- }
- }
-
- // Miss
- MissCount++;
-
- // We try to find the entries with the lowest alloc count to be evicted:
- int min_idx = 0;
- u64 min_count = Entries[0].MIB.alloc_count;
- for (int i = 1; i < kSetSize; i++) {
- CHECK(!Entries[i].Empty());
- if (Entries[i].MIB.alloc_count < min_count) {
- min_idx = i;
- min_count = Entries[i].MIB.alloc_count;
- }
- }
-
- // Print the evicted entry profile information
- if (!flags()->print_terse)
- Printf("Evicted:\n");
- Entries[min_idx].Print();
-
- // Similar to the hit case, put new MIB in first set position.
- if (min_idx != 0)
- Entries[min_idx] = Entries[0];
- Entries[0].id = new_id;
- Entries[0].MIB = newMIB;
- }
-
- void PrintMissRate(int i) {
- u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
- Printf("Set %d miss rate: %d / %d = %5llu.%02llu%%\n", i, MissCount,
- AccessCount, p / 100, p % 100);
- }
-
- SetEntry Entries[kSetSize];
- u32 AccessCount = 0;
- u32 MissCount = 0;
- SpinMutex SetMutex;
-};
-
-struct MemInfoBlockCache {
- MemInfoBlockCache() {
- if (common_flags()->print_module_map)
- DumpProcessMap();
- if (flags()->print_terse)
- MemInfoBlock::printHeader();
- Sets =
- (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
- Constructed = true;
- }
-
- ~MemInfoBlockCache() { free(Sets); }
-
- void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
- u64 hv = new_id;
-
- // Use mod method where number of entries should be a prime close to power
- // of 2.
- hv %= flags()->mem_info_cache_entries;
-
- return Sets[hv].insertOrMerge(new_id, newMIB);
- }
-
- void PrintAll() {
- for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
- Sets[i].PrintAll();
- }
- }
-
- void PrintMissRate() {
- if (!flags()->print_mem_info_cache_miss_rate)
- return;
- u64 MissCountSum = 0;
- u64 AccessCountSum = 0;
- for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
- MissCountSum += Sets[i].MissCount;
- AccessCountSum += Sets[i].AccessCount;
- }
- u64 p = AccessCountSum ? MissCountSum * 10000ULL / AccessCountSum : 0;
- Printf("Overall miss rate: %llu / %llu = %5llu.%02llu%%\n", MissCountSum,
- AccessCountSum, p / 100, p % 100);
- if (flags()->print_mem_info_cache_miss_rate_details)
- for (int i = 0; i < flags()->mem_info_cache_entries; i++)
- Sets[i].PrintMissRate(i);
- }
-
- CacheSet *Sets;
- // Flag when the Sets have been allocated, in case a deallocation is called
- // very early before the static init of the Allocator and therefore this table
- // have completed.
- bool Constructed = false;
-};
-
// Accumulates the access count from the shadow for the given pointer and size.
u64 GetShadowCount(uptr p, u32 size) {
u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
@@ -357,24 +217,35 @@ struct Allocator {
uptr max_user_defined_malloc_size;
atomic_uint8_t rss_limit_exceeded;
- MemInfoBlockCache MemInfoBlockTable;
+ // Holds the mapping of stack ids to MemInfoBlocks.
+ MIBMapTy MIBMap;
+
bool destructing;
+ bool constructed = false;
// ------------------- Initialization ------------------------
- explicit Allocator(LinkerInitialized) : destructing(false) {}
-
+ explicit Allocator(LinkerInitialized)
+ : destructing(false), constructed(true) {}
~Allocator() { FinishAndPrint(); }
+ static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value,
+ void *Arg) {
+ SpinMutexLock(&Value->mutex);
+ Value->mib.Print(Key, bool(Arg));
+ }
+
void FinishAndPrint() {
+ if (common_flags()->print_module_map)
+ DumpProcessMap();
if (!flags()->print_terse)
Printf("Live on exit:\n");
allocator.ForceLock();
allocator.ForEachChunk(
[](uptr chunk, void *alloc) {
u64 user_requested_size;
+ Allocator *A = (Allocator *)alloc;
MemprofChunk *m =
- ((Allocator *)alloc)
- ->GetMemprofChunk((void *)chunk, user_requested_size);
+ A->GetMemprofChunk((void *)chunk, user_requested_size);
if (!m)
return;
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
@@ -382,16 +253,15 @@ struct Allocator {
long curtime = GetTimestamp();
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
m->cpu_id, GetCpuId());
- ((Allocator *)alloc)
- ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+ InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
},
this);
- allocator.ForceUnlock();
destructing = true;
- MemInfoBlockTable.PrintMissRate();
- MemInfoBlockTable.PrintAll();
+ MIBMap.ForEach(PrintCallback,
+ reinterpret_cast<void *>(flags()->print_terse));
StackDepotPrintAll();
+ allocator.ForceUnlock();
}
void InitLinkerInitialized() {
@@ -523,14 +393,13 @@ struct Allocator {
u64 user_requested_size =
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
- if (memprof_inited && memprof_init_done && !destructing &&
- MemInfoBlockTable.Constructed) {
+ if (memprof_inited && memprof_init_done && constructed && !destructing) {
u64 c = GetShadowCount(p, user_requested_size);
long curtime = GetTimestamp();
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
m->cpu_id, GetCpuId());
- MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+ InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
}
MemprofStats &thread_stats = GetCurrentThreadStats();
diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
index 035fd15b9288e..15e6bbf50df31 100644
--- a/compiler-rt/lib/memprof/memprof_flags.inc
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -37,13 +37,3 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
"pointer to an allocated space which can not be used.")
MEMPROF_FLAG(bool, print_terse, false,
"If set, prints memory profile in a terse format.")
-
-MEMPROF_FLAG(
- int, mem_info_cache_entries, 16381,
- "Size in entries of the mem info block cache, should be closest prime"
- " number to a power of two for best hashing.")
-MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false,
- "If set, prints the miss rate of the mem info block cache.")
-MEMPROF_FLAG(
- bool, print_mem_info_cache_miss_rate_details, false,
- "If set, prints detailed miss rates of the mem info block cache sets.")
diff --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp
new file mode 100644
index 0000000000000..47449cf9612b0
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp
@@ -0,0 +1,35 @@
+//===-- memprof_mibmap.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "memprof_mibmap.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+namespace __memprof {
+
+void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
+ MIBMapTy::Handle h(&Map, static_cast<uptr>(Id), /*remove=*/false,
+ /*create=*/true);
+ if (h.created()) {
+ LockedMemInfoBlock *lmib =
+ (LockedMemInfoBlock *)InternalAlloc(sizeof(LockedMemInfoBlock));
+ lmib->mutex.Init();
+ lmib->mib = Block;
+ *h = lmib;
+ } else {
+ LockedMemInfoBlock *lmib = *h;
+ SpinMutexLock lock(&lmib->mutex);
+ lmib->mib.Merge(Block);
+ }
+}
+
+} // namespace __memprof
diff --git a/compiler-rt/lib/memprof/memprof_mibmap.h b/compiler-rt/lib/memprof/memprof_mibmap.h
new file mode 100644
index 0000000000000..ed5dda174fe5b
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mibmap.h
@@ -0,0 +1,24 @@
+#ifndef MEMPROF_MIBMAP_H_
+#define MEMPROF_MIBMAP_H_
+
+#include "memprof_meminfoblock.h"
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+namespace __memprof {
+
+struct LockedMemInfoBlock {
+ __sanitizer::StaticSpinMutex mutex;
+ MemInfoBlock mib;
+};
+
+// The MIB map stores a mapping from stack ids to MemInfoBlocks.
+typedef __sanitizer::AddrHashMap<LockedMemInfoBlock *, 200003> MIBMapTy;
+
+// Insert a new MemInfoBlock or merge with an existing block identified by the
+// stack id.
+void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map);
+
+} // namespace __memprof
+
+#endif // MEMPROF_MIBMAP_H_
diff --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
deleted file mode 100644
index c253855fbf033..0000000000000
--- a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// Check mem_info_cache_entries option.
-
-// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=log_path=stderr:mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s
-
-// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00%
-// CHECK-NOT: Set
-
-int main() {
- return 0;
-}
diff --git a/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp b/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp
new file mode 100644
index 0000000000000..793c2aa2c977a
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp
@@ -0,0 +1,24 @@
+// RUN: %clangxx_memprof %s -o %t
+
+// RUN: %env_memprof_opts=log_path=stdout %run %t | FileCheck %s
+
+#include <sanitizer/memprof_interface.h>
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+ for (int i = 0; i < 3; i++) {
+ char *x = (char *)malloc(10);
+ if (i % 2)
+ memset(x, 0, 10);
+ else
+ memset(x, 2, 10);
+ free(x);
+ }
+ return 0;
+}
+// We should get one allocation site with alloc_count = loop trip count = 3
+// CHECK: Memory allocation stack id = [[ID:[0-9]+]]
+// CHECK-NEXT-COUNT-1: alloc_count 3
+// CHECK-COUNT-1: Stack for id {{.*}}[[ID]]
+// CHECK-NEXT-COUNT-1: memprof_malloc_linux.cpp
+// CHECK-NEXT-COUNT-1: memprof_merge_mib.cpp
diff --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
deleted file mode 100644
index e32a0de0d913d..0000000000000
--- a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// Check print_mem_info_cache_miss_rate and
-// print_mem_info_cache_miss_rate_details options.
-
-// RUN: %clangxx_memprof -O0 %s -o %t
-// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s
-// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS
-
-// CHECK: Overall miss rate: 0 / {{.*}} = 0.00%
-// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00%
-// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00%
-
-int main() {
- return 0;
-}
More information about the llvm-commits
mailing list