[compiler-rt] 1243cef - [memprof] Replace the block cache with a hashmap.

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 11 11:31:39 PST 2021


Author: Snehasish Kumar
Date: 2021-11-11T11:29:36-08:00
New Revision: 1243cef245f6131c093d65235e87319e8e124e7f

URL: https://github.com/llvm/llvm-project/commit/1243cef245f6131c093d65235e87319e8e124e7f
DIFF: https://github.com/llvm/llvm-project/commit/1243cef245f6131c093d65235e87319e8e124e7f.diff

LOG: [memprof] Replace the block cache with a hashmap.

The existing implementation uses a cache + eviction based scheme to
record heap profile information. This design was adopted to ensure a
constant memory overhead (due to fixed number of cache entries) along
with incremental write-to-disk for evictions. We find that since the
number to entries to track is O(unique-allocation-contexts) the overhead
of keeping all contexts in memory is not very high. On a clang workload,
the max number of unique allocation contexts was ~35K, median ~11K.
For each context, we (currently) store 64 bytes of data - this amounts
to 5.5MB (max). Given the low overheads for a complex workload, we can
simplify the implementation by using a hashmap without eviction.

Other changes:
* Memory map is dumped at the end rather than startup. The relative
order in the profile dump is unchanged since we no longer have evicted
entries at runtime.
* Added a test to check meminfoblocks are merged.

Differential Revision: https://reviews.llvm.org/D111676

Added: 
    compiler-rt/lib/memprof/memprof_mibmap.cpp
    compiler-rt/lib/memprof/memprof_mibmap.h
    compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp

Modified: 
    compiler-rt/lib/memprof/CMakeLists.txt
    compiler-rt/lib/memprof/memprof_allocator.cpp
    compiler-rt/lib/memprof/memprof_flags.inc

Removed: 
    compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
    compiler-rt/test/memprof/TestCases/print_miss_rate.cpp


################################################################################
diff  --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt
index cb0816f47c600..9e8e370acbbd2 100644
--- a/compiler-rt/lib/memprof/CMakeLists.txt
+++ b/compiler-rt/lib/memprof/CMakeLists.txt
@@ -8,6 +8,7 @@ set(MEMPROF_SOURCES
   memprof_interceptors_memintrinsics.cpp
   memprof_linux.cpp
   memprof_malloc_linux.cpp
+  memprof_mibmap.cpp
   memprof_posix.cpp
   memprof_rtl.cpp
   memprof_shadow_setup.cpp
@@ -36,6 +37,7 @@ SET(MEMPROF_HEADERS
   memprof_internal.h
   memprof_mapping.h
   memprof_meminfoblock.h
+  memprof_mibmap.h
   memprof_stack.h
   memprof_stats.h
   memprof_thread.h

diff  --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 3ec3c1ebd6f50..38b68c334cbaa 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -16,6 +16,7 @@
 #include "memprof_allocator.h"
 #include "memprof_mapping.h"
 #include "memprof_meminfoblock.h"
+#include "memprof_mibmap.h"
 #include "memprof_stack.h"
 #include "memprof_thread.h"
 #include "sanitizer_common/sanitizer_allocator_checks.h"
@@ -29,7 +30,6 @@
 #include "sanitizer_common/sanitizer_stackdepot.h"
 
 #include <sched.h>
-#include <stdlib.h>
 #include <time.h>
 
 namespace __memprof {
@@ -167,146 +167,6 @@ AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
   return &ms->allocator_cache;
 }
 
-struct SetEntry {
-  SetEntry() : id(0), MIB() {}
-  bool Empty() { return id == 0; }
-  void Print() {
-    CHECK(!Empty());
-    MIB.Print(id, flags()->print_terse);
-  }
-  // The stack id
-  u64 id;
-  MemInfoBlock MIB;
-};
-
-struct CacheSet {
-  enum { kSetSize = 4 };
-
-  void PrintAll() {
-    for (int i = 0; i < kSetSize; i++) {
-      if (Entries[i].Empty())
-        continue;
-      Entries[i].Print();
-    }
-  }
-  void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
-    SpinMutexLock l(&SetMutex);
-    AccessCount++;
-
-    for (int i = 0; i < kSetSize; i++) {
-      auto id = Entries[i].id;
-      // Check if this is a hit or an empty entry. Since we always move any
-      // filled locations to the front of the array (see below), we don't need
-      // to look after finding the first empty entry.
-      if (id == new_id || !id) {
-        if (id == 0) {
-          Entries[i].id = new_id;
-          Entries[i].MIB = newMIB;
-        } else {
-          Entries[i].MIB.Merge(newMIB);
-        }
-        // Assuming some id locality, we try to swap the matching entry
-        // into the first set position.
-        if (i != 0) {
-          auto tmp = Entries[0];
-          Entries[0] = Entries[i];
-          Entries[i] = tmp;
-        }
-        return;
-      }
-    }
-
-    // Miss
-    MissCount++;
-
-    // We try to find the entries with the lowest alloc count to be evicted:
-    int min_idx = 0;
-    u64 min_count = Entries[0].MIB.alloc_count;
-    for (int i = 1; i < kSetSize; i++) {
-      CHECK(!Entries[i].Empty());
-      if (Entries[i].MIB.alloc_count < min_count) {
-        min_idx = i;
-        min_count = Entries[i].MIB.alloc_count;
-      }
-    }
-
-    // Print the evicted entry profile information
-    if (!flags()->print_terse)
-      Printf("Evicted:\n");
-    Entries[min_idx].Print();
-
-    // Similar to the hit case, put new MIB in first set position.
-    if (min_idx != 0)
-      Entries[min_idx] = Entries[0];
-    Entries[0].id = new_id;
-    Entries[0].MIB = newMIB;
-  }
-
-  void PrintMissRate(int i) {
-    u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
-    Printf("Set %d miss rate: %d / %d = %5llu.%02llu%%\n", i, MissCount,
-           AccessCount, p / 100, p % 100);
-  }
-
-  SetEntry Entries[kSetSize];
-  u32 AccessCount = 0;
-  u32 MissCount = 0;
-  SpinMutex SetMutex;
-};
-
-struct MemInfoBlockCache {
-  MemInfoBlockCache() {
-    if (common_flags()->print_module_map)
-      DumpProcessMap();
-    if (flags()->print_terse)
-      MemInfoBlock::printHeader();
-    Sets =
-        (CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
-    Constructed = true;
-  }
-
-  ~MemInfoBlockCache() { free(Sets); }
-
-  void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
-    u64 hv = new_id;
-
-    // Use mod method where number of entries should be a prime close to power
-    // of 2.
-    hv %= flags()->mem_info_cache_entries;
-
-    return Sets[hv].insertOrMerge(new_id, newMIB);
-  }
-
-  void PrintAll() {
-    for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
-      Sets[i].PrintAll();
-    }
-  }
-
-  void PrintMissRate() {
-    if (!flags()->print_mem_info_cache_miss_rate)
-      return;
-    u64 MissCountSum = 0;
-    u64 AccessCountSum = 0;
-    for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
-      MissCountSum += Sets[i].MissCount;
-      AccessCountSum += Sets[i].AccessCount;
-    }
-    u64 p = AccessCountSum ? MissCountSum * 10000ULL / AccessCountSum : 0;
-    Printf("Overall miss rate: %llu / %llu = %5llu.%02llu%%\n", MissCountSum,
-           AccessCountSum, p / 100, p % 100);
-    if (flags()->print_mem_info_cache_miss_rate_details)
-      for (int i = 0; i < flags()->mem_info_cache_entries; i++)
-        Sets[i].PrintMissRate(i);
-  }
-
-  CacheSet *Sets;
-  // Flag when the Sets have been allocated, in case a deallocation is called
-  // very early before the static init of the Allocator and therefore this table
-  // have completed.
-  bool Constructed = false;
-};
-
 // Accumulates the access count from the shadow for the given pointer and size.
 u64 GetShadowCount(uptr p, u32 size) {
   u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
@@ -357,24 +217,35 @@ struct Allocator {
   uptr max_user_defined_malloc_size;
   atomic_uint8_t rss_limit_exceeded;
 
-  MemInfoBlockCache MemInfoBlockTable;
+  // Holds the mapping of stack ids to MemInfoBlocks.
+  MIBMapTy MIBMap;
+
   bool destructing;
+  bool constructed = false;
 
   // ------------------- Initialization ------------------------
-  explicit Allocator(LinkerInitialized) : destructing(false) {}
-
+  explicit Allocator(LinkerInitialized)
+      : destructing(false), constructed(true) {}
   ~Allocator() { FinishAndPrint(); }
 
+  static void PrintCallback(const uptr Key, LockedMemInfoBlock *const &Value,
+                            void *Arg) {
+    SpinMutexLock(&Value->mutex);
+    Value->mib.Print(Key, bool(Arg));
+  }
+
   void FinishAndPrint() {
+    if (common_flags()->print_module_map)
+      DumpProcessMap();
     if (!flags()->print_terse)
       Printf("Live on exit:\n");
     allocator.ForceLock();
     allocator.ForEachChunk(
         [](uptr chunk, void *alloc) {
           u64 user_requested_size;
+          Allocator *A = (Allocator *)alloc;
           MemprofChunk *m =
-              ((Allocator *)alloc)
-                  ->GetMemprofChunk((void *)chunk, user_requested_size);
+              A->GetMemprofChunk((void *)chunk, user_requested_size);
           if (!m)
             return;
           uptr user_beg = ((uptr)m) + kChunkHeaderSize;
@@ -382,16 +253,15 @@ struct Allocator {
           long curtime = GetTimestamp();
           MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
                               m->cpu_id, GetCpuId());
-          ((Allocator *)alloc)
-              ->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+          InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
         },
         this);
-    allocator.ForceUnlock();
 
     destructing = true;
-    MemInfoBlockTable.PrintMissRate();
-    MemInfoBlockTable.PrintAll();
+    MIBMap.ForEach(PrintCallback,
+                   reinterpret_cast<void *>(flags()->print_terse));
     StackDepotPrintAll();
+    allocator.ForceUnlock();
   }
 
   void InitLinkerInitialized() {
@@ -523,14 +393,13 @@ struct Allocator {
 
     u64 user_requested_size =
         atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
-    if (memprof_inited && memprof_init_done && !destructing &&
-        MemInfoBlockTable.Constructed) {
+    if (memprof_inited && memprof_init_done && constructed && !destructing) {
       u64 c = GetShadowCount(p, user_requested_size);
       long curtime = GetTimestamp();
 
       MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
                           m->cpu_id, GetCpuId());
-        MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
+      InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
     }
 
     MemprofStats &thread_stats = GetCurrentThreadStats();

diff  --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
index 035fd15b9288e..15e6bbf50df31 100644
--- a/compiler-rt/lib/memprof/memprof_flags.inc
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -37,13 +37,3 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
              "pointer to an allocated space which can not be used.")
 MEMPROF_FLAG(bool, print_terse, false,
              "If set, prints memory profile in a terse format.")
-
-MEMPROF_FLAG(
-    int, mem_info_cache_entries, 16381,
-    "Size in entries of the mem info block cache, should be closest prime"
-    " number to a power of two for best hashing.")
-MEMPROF_FLAG(bool, print_mem_info_cache_miss_rate, false,
-             "If set, prints the miss rate of the mem info block cache.")
-MEMPROF_FLAG(
-    bool, print_mem_info_cache_miss_rate_details, false,
-    "If set, prints detailed miss rates of the mem info block cache sets.")

diff  --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp
new file mode 100644
index 0000000000000..47449cf9612b0
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp
@@ -0,0 +1,35 @@
+//===-- memprof_mibmap.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of MemProfiler, a memory profiler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "memprof_mibmap.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+namespace __memprof {
+
+void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
+  MIBMapTy::Handle h(&Map, static_cast<uptr>(Id), /*remove=*/false,
+                     /*create=*/true);
+  if (h.created()) {
+    LockedMemInfoBlock *lmib =
+        (LockedMemInfoBlock *)InternalAlloc(sizeof(LockedMemInfoBlock));
+    lmib->mutex.Init();
+    lmib->mib = Block;
+    *h = lmib;
+  } else {
+    LockedMemInfoBlock *lmib = *h;
+    SpinMutexLock lock(&lmib->mutex);
+    lmib->mib.Merge(Block);
+  }
+}
+
+} // namespace __memprof

diff  --git a/compiler-rt/lib/memprof/memprof_mibmap.h b/compiler-rt/lib/memprof/memprof_mibmap.h
new file mode 100644
index 0000000000000..ed5dda174fe5b
--- /dev/null
+++ b/compiler-rt/lib/memprof/memprof_mibmap.h
@@ -0,0 +1,24 @@
+#ifndef MEMPROF_MIBMAP_H_
+#define MEMPROF_MIBMAP_H_
+
+#include "memprof_meminfoblock.h"
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+namespace __memprof {
+
+struct LockedMemInfoBlock {
+  __sanitizer::StaticSpinMutex mutex;
+  MemInfoBlock mib;
+};
+
+// The MIB map stores a mapping from stack ids to MemInfoBlocks.
+typedef __sanitizer::AddrHashMap<LockedMemInfoBlock *, 200003> MIBMapTy;
+
+// Insert a new MemInfoBlock or merge with an existing block identified by the
+// stack id.
+void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map);
+
+} // namespace __memprof
+
+#endif // MEMPROF_MIBMAP_H_

diff  --git a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp b/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
deleted file mode 100644
index c253855fbf033..0000000000000
--- a/compiler-rt/test/memprof/TestCases/mem_info_cache_entries.cpp
+++ /dev/null
@@ -1,10 +0,0 @@
-// Check mem_info_cache_entries option.
-
-// RUN: %clangxx_memprof -O0 %s -o %t && %env_memprof_opts=log_path=stderr:mem_info_cache_entries=15:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s
-
-// CHECK: Set 14 miss rate: 0 / {{.*}} = 0.00%
-// CHECK-NOT: Set
-
-int main() {
-  return 0;
-}

diff  --git a/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp b/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp
new file mode 100644
index 0000000000000..793c2aa2c977a
--- /dev/null
+++ b/compiler-rt/test/memprof/TestCases/memprof_merge_mib.cpp
@@ -0,0 +1,24 @@
+// RUN: %clangxx_memprof  %s -o %t
+
+// RUN: %env_memprof_opts=log_path=stdout %run %t | FileCheck %s
+
+#include <sanitizer/memprof_interface.h>
+#include <stdlib.h>
+#include <string.h>
+int main(int argc, char **argv) {
+  for (int i = 0; i < 3; i++) {
+    char *x = (char *)malloc(10);
+    if (i % 2)
+      memset(x, 0, 10);
+    else
+      memset(x, 2, 10);
+    free(x);
+  }
+  return 0;
+}
+// We should get one allocation site with alloc_count = loop trip count = 3
+// CHECK: Memory allocation stack id = [[ID:[0-9]+]]
+// CHECK-NEXT-COUNT-1: alloc_count 3
+// CHECK-COUNT-1: Stack for id {{.*}}[[ID]]
+// CHECK-NEXT-COUNT-1: memprof_malloc_linux.cpp
+// CHECK-NEXT-COUNT-1: memprof_merge_mib.cpp

diff  --git a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp b/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
deleted file mode 100644
index e32a0de0d913d..0000000000000
--- a/compiler-rt/test/memprof/TestCases/print_miss_rate.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// Check print_mem_info_cache_miss_rate and
-// print_mem_info_cache_miss_rate_details options.
-
-// RUN: %clangxx_memprof -O0 %s -o %t
-// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1 %run %t 2>&1 | FileCheck %s
-// RUN: %env_memprof_opts=log_path=stderr:print_mem_info_cache_miss_rate=1:print_mem_info_cache_miss_rate_details=1 %run %t 2>&1 | FileCheck %s --check-prefix=DETAILS
-
-// CHECK: Overall miss rate: 0 / {{.*}} = 0.00%
-// DETAILS: Set 0 miss rate: 0 / {{.*}} = 0.00%
-// DETAILS: Set 16380 miss rate: 0 / {{.*}} = 0.00%
-
-int main() {
-  return 0;
-}


        


More information about the llvm-commits mailing list