[compiler-rt] [llvm] [Memprof] Adds the option to collect AccessCountHistograms for memprof. (PR #94264)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 3 10:40:06 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Matthew Weingarten (mattweingarten)

<details>
<summary>Changes</summary>

Adds compile time flag  -mllvm -memprof-histogram and runtime flag histogram=true|false to turn Histogram collection on and off. The -memprof-histogram flag relies on -memprof-use-callbacks=true to work.

Updates shadow mapping logic in histogram mode from having one 8 byte counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only supports this granularity as of now.

Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary format, including changing to a new version of the raw binary format from version 3 to version 4.

Updates creating MemoryInfoBlocks with and without Histograms. When two MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter Histogram is removed.

Adds a memprof_histogram test case.

Initial commit for adding AccessCountHistograms up until RawProfile for memprof

---

Patch is 65.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94264.diff


30 Files Affected:

- (modified) compiler-rt/include/profile/MIBEntryDef.inc (+2) 
- (modified) compiler-rt/include/profile/MemProfData.inc (+137-102) 
- (modified) compiler-rt/lib/memprof/memprof_allocator.cpp (+80-10) 
- (modified) compiler-rt/lib/memprof/memprof_flags.inc (+7-2) 
- (modified) compiler-rt/lib/memprof/memprof_mapping.h (+17-2) 
- (modified) compiler-rt/lib/memprof/memprof_mibmap.cpp (+8) 
- (modified) compiler-rt/lib/memprof/memprof_rawprofile.cpp (+51-11) 
- (modified) llvm/include/llvm/ProfileData/MIBEntryDef.inc (+2) 
- (modified) llvm/include/llvm/ProfileData/MemProf.h (+7) 
- (modified) llvm/include/llvm/ProfileData/MemProfData.inc (+132-101) 
- (modified) llvm/include/llvm/ProfileData/MemProfReader.h (+1-1) 
- (modified) llvm/lib/ProfileData/MemProfReader.cpp (+39-4) 
- (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+10-2) 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.exe () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw () 
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.cc (+138) 
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.exe () 
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.memprofraw () 
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.yaml (+270) 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw () 
- (modified) llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh (+149-1) 
- (added) llvm/test/Transforms/PGOProfile/memprof_histogram.ll (+33) 
- (modified) llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll (+3-3) 


``````````diff
diff --git a/compiler-rt/include/profile/MIBEntryDef.inc b/compiler-rt/include/profile/MIBEntryDef.inc
index 794163ae10386..58c1fc4de4aba 100644
--- a/compiler-rt/include/profile/MIBEntryDef.inc
+++ b/compiler-rt/include/profile/MIBEntryDef.inc
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
 MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
 MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
 MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
+MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
+MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)
\ No newline at end of file
diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc
index b82a4baf6dd74..d4356275d1609 100644
--- a/compiler-rt/include/profile/MemProfData.inc
+++ b/compiler-rt/include/profile/MemProfData.inc
@@ -22,24 +22,27 @@
 #include <string.h>
 
 #ifdef _MSC_VER
-#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
+#define PACKED(...) __pragma(pack(push, 1)) __VA_ARGS__ __pragma(pack(pop))
 #else
 #define PACKED(...) __VA_ARGS__ __attribute__((__packed__))
 #endif
 
-// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
-#define MEMPROF_RAW_MAGIC_64                                                                        \
-  ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 |          \
-   (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
+// A 64-bit magic number to uniquely identify the raw binary memprof profile
+// file.
+#define MEMPROF_RAW_MAGIC_64                                                   \
+  ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 |           \
+   (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | (uint64_t)'f' << 16 |           \
+   (uint64_t)'r' << 8 | (uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 3ULL
+#define MEMPROF_RAW_VERSION 4ULL
 
 #define MEMPROF_BUILDID_MAX_SIZE 32ULL
 
 namespace llvm {
 namespace memprof {
-// A struct describing the header used for the raw binary memprof profile format.
+// A struct describing the header used for the raw binary memprof profile
+// format.
 PACKED(struct Header {
   uint64_t Magic;
   uint64_t Version;
@@ -62,7 +65,7 @@ PACKED(struct SegmentEntry {
   SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
       : Start(S), End(E), Offset(O), BuildIdSize(0) {}
 
-  SegmentEntry(const SegmentEntry& S) {
+  SegmentEntry(const SegmentEntry &S) {
     Start = S.Start;
     End = S.End;
     Offset = S.Offset;
@@ -70,7 +73,7 @@ PACKED(struct SegmentEntry {
     memcpy(BuildId, S.BuildId, S.BuildIdSize);
   }
 
-  SegmentEntry& operator=(const SegmentEntry& S) {
+  SegmentEntry &operator=(const SegmentEntry &S) {
     Start = S.Start;
     End = S.End;
     Offset = S.Offset;
@@ -79,7 +82,7 @@ PACKED(struct SegmentEntry {
     return *this;
   }
 
-  bool operator==(const SegmentEntry& S) const {
+  bool operator==(const SegmentEntry &S) const {
     return Start == S.Start && End == S.End && Offset == S.Offset &&
            BuildIdSize == S.BuildIdSize &&
            memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
@@ -90,111 +93,143 @@ PACKED(struct SegmentEntry {
 // MemProfData.inc since it would mean we are embedding a directive (the
 // #include for MIBEntryDef) into the macros which is undefined behaviour.
 #ifdef _MSC_VER
-__pragma(pack(push,1))
+__pragma(pack(push, 1))
 #endif
 
-// A struct representing the heap allocation characteristics of a particular
-// runtime context. This struct is shared between the compiler-rt runtime and
-// the raw profile reader. The indexed format uses a separate, self-describing
-// backwards compatible format.
-struct MemInfoBlock{
+    // A struct representing the heap allocation characteristics of a particular
+    // runtime context. This struct is shared between the compiler-rt runtime
+    // and the raw profile reader. The indexed format uses a separate,
+    // self-describing backwards compatible format.
+    struct MemInfoBlock {
 
 #define MIBEntryDef(NameTag, Name, Type) Type Name;
 #include "MIBEntryDef.inc"
 #undef MIBEntryDef
 
-bool operator==(const MemInfoBlock& Other) const {
-  bool IsEqual = true;
-#define MIBEntryDef(NameTag, Name, Type) \
+  bool operator==(const MemInfoBlock &Other) const {
+    bool IsEqual = true;
+#define MIBEntryDef(NameTag, Name, Type)                                       \
   IsEqual = (IsEqual && Name == Other.Name);
 #include "MIBEntryDef.inc"
 #undef MIBEntryDef
-  return IsEqual;
-}
+    return IsEqual;
+  }
 
-MemInfoBlock() {
+  MemInfoBlock() {
 #define MIBEntryDef(NameTag, Name, Type) Name = Type();
 #include "MIBEntryDef.inc"
 #undef MIBEntryDef
-}
-
-MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
-             uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
-    : MemInfoBlock() {
-  AllocCount = 1U;
-  TotalAccessCount = AccessCount;
-  MinAccessCount = AccessCount;
-  MaxAccessCount = AccessCount;
-  TotalSize = Size;
-  MinSize = Size;
-  MaxSize = Size;
-  AllocTimestamp = AllocTs;
-  DeallocTimestamp = DeallocTs;
-  TotalLifetime = DeallocTimestamp - AllocTimestamp;
-  MinLifetime = TotalLifetime;
-  MaxLifetime = TotalLifetime;
-  // Access density is accesses per byte. Multiply by 100 to include the
-  // fractional part.
-  TotalAccessDensity = AccessCount * 100 / Size;
-  MinAccessDensity = TotalAccessDensity;
-  MaxAccessDensity = TotalAccessDensity;
-  // Lifetime access density is the access density per second of lifetime.
-  // Multiply by 1000 to convert denominator lifetime to seconds (using a
-  // minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first
-  // to reduce truncations to 0.
-  TotalLifetimeAccessDensity =
-      TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1);
-  MinLifetimeAccessDensity = TotalLifetimeAccessDensity;
-  MaxLifetimeAccessDensity = TotalLifetimeAccessDensity;
-  AllocCpuId = AllocCpu;
-  DeallocCpuId = DeallocCpu;
-  NumMigratedCpu = AllocCpuId != DeallocCpuId;
-}
-
-void Merge(const MemInfoBlock &newMIB) {
-  AllocCount += newMIB.AllocCount;
-
-  TotalAccessCount += newMIB.TotalAccessCount;
-  MinAccessCount = newMIB.MinAccessCount < MinAccessCount ? newMIB.MinAccessCount : MinAccessCount;
-  MaxAccessCount = newMIB.MaxAccessCount > MaxAccessCount ? newMIB.MaxAccessCount : MaxAccessCount;
-
-  TotalSize += newMIB.TotalSize;
-  MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
-  MaxSize = newMIB.MaxSize > MaxSize ? newMIB.MaxSize : MaxSize;
-
-  TotalLifetime += newMIB.TotalLifetime;
-  MinLifetime = newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
-  MaxLifetime = newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
-
-  TotalAccessDensity += newMIB.TotalAccessDensity;
-  MinAccessDensity = newMIB.MinAccessDensity < MinAccessDensity
-                         ? newMIB.MinAccessDensity
-                         : MinAccessDensity;
-  MaxAccessDensity = newMIB.MaxAccessDensity > MaxAccessDensity
-                         ? newMIB.MaxAccessDensity
-                         : MaxAccessDensity;
-
-  TotalLifetimeAccessDensity += newMIB.TotalLifetimeAccessDensity;
-  MinLifetimeAccessDensity =
-      newMIB.MinLifetimeAccessDensity < MinLifetimeAccessDensity
-          ? newMIB.MinLifetimeAccessDensity
-          : MinLifetimeAccessDensity;
-  MaxLifetimeAccessDensity =
-      newMIB.MaxLifetimeAccessDensity > MaxLifetimeAccessDensity
-          ? newMIB.MaxLifetimeAccessDensity
-          : MaxLifetimeAccessDensity;
-
-  // We know newMIB was deallocated later, so just need to check if it was
-  // allocated before last one deallocated.
-  NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
-  AllocTimestamp = newMIB.AllocTimestamp;
-  DeallocTimestamp = newMIB.DeallocTimestamp;
-
-  NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
-  NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
-  AllocCpuId = newMIB.AllocCpuId;
-  DeallocCpuId = newMIB.DeallocCpuId;
-}
+  }
+
+  MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
+               uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
+               uintptr_t Histogram, uint32_t HistogramSize)
+      : MemInfoBlock() {
+    AllocCount = 1U;
+    TotalAccessCount = AccessCount;
+    MinAccessCount = AccessCount;
+    MaxAccessCount = AccessCount;
+    TotalSize = Size;
+    MinSize = Size;
+    MaxSize = Size;
+    AllocTimestamp = AllocTs;
+    DeallocTimestamp = DeallocTs;
+    TotalLifetime = DeallocTimestamp - AllocTimestamp;
+    MinLifetime = TotalLifetime;
+    MaxLifetime = TotalLifetime;
+    // Access density is accesses per byte. Multiply by 100 to include the
+    // fractional part.
+    TotalAccessDensity = AccessCount * 100 / Size;
+    MinAccessDensity = TotalAccessDensity;
+    MaxAccessDensity = TotalAccessDensity;
+    // Lifetime access density is the access density per second of lifetime.
+    // Multiply by 1000 to convert denominator lifetime to seconds (using a
+    // minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first
+    // to reduce truncations to 0.
+    TotalLifetimeAccessDensity =
+        TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1);
+    MinLifetimeAccessDensity = TotalLifetimeAccessDensity;
+    MaxLifetimeAccessDensity = TotalLifetimeAccessDensity;
+    AllocCpuId = AllocCpu;
+    DeallocCpuId = DeallocCpu;
+    NumMigratedCpu = AllocCpuId != DeallocCpuId;
+    // For now we assume HistogramSize is the same as user requested size
+    AccessHistogramSize = HistogramSize;
+    AccessHistogram = Histogram;
+  }
+
+  // Merge cannot free the AccessHistogram pointer, since we need to free either
+  // with InternalFree or free depending on where the allocation is made
+  // (runtime or profdata tool). The merge function expects the Histogram
+  // pointer with the smaller size to be freed.
+  void Merge(const MemInfoBlock &newMIB) {
+    AllocCount += newMIB.AllocCount;
+
+    TotalAccessCount += newMIB.TotalAccessCount;
+    MinAccessCount = newMIB.MinAccessCount < MinAccessCount
+                         ? newMIB.MinAccessCount
+                         : MinAccessCount;
+    MaxAccessCount = newMIB.MaxAccessCount > MaxAccessCount
+                         ? newMIB.MaxAccessCount
+                         : MaxAccessCount;
+
+    TotalSize += newMIB.TotalSize;
+    MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
+    MaxSize = newMIB.MaxSize > MaxSize ? newMIB.MaxSize : MaxSize;
+
+    TotalLifetime += newMIB.TotalLifetime;
+    MinLifetime =
+        newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
+    MaxLifetime =
+        newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
+
+    TotalAccessDensity += newMIB.TotalAccessDensity;
+    MinAccessDensity = newMIB.MinAccessDensity < MinAccessDensity
+                           ? newMIB.MinAccessDensity
+                           : MinAccessDensity;
+    MaxAccessDensity = newMIB.MaxAccessDensity > MaxAccessDensity
+                           ? newMIB.MaxAccessDensity
+                           : MaxAccessDensity;
+
+    TotalLifetimeAccessDensity += newMIB.TotalLifetimeAccessDensity;
+    MinLifetimeAccessDensity =
+        newMIB.MinLifetimeAccessDensity < MinLifetimeAccessDensity
+            ? newMIB.MinLifetimeAccessDensity
+            : MinLifetimeAccessDensity;
+    MaxLifetimeAccessDensity =
+        newMIB.MaxLifetimeAccessDensity > MaxLifetimeAccessDensity
+            ? newMIB.MaxLifetimeAccessDensity
+            : MaxLifetimeAccessDensity;
+
+    // We know newMIB was deallocated later, so just need to check if it was
+    // allocated before last one deallocated.
+    NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
+    AllocTimestamp = newMIB.AllocTimestamp;
+    DeallocTimestamp = newMIB.DeallocTimestamp;
+
+    NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
+    NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
+    AllocCpuId = newMIB.AllocCpuId;
+    DeallocCpuId = newMIB.DeallocCpuId;
+
+    // For merging histograms, we always keep the longer histogram, and add
+    // values of shorter histogram to larger one.
+    uintptr_t ShorterHistogram;
+    uint32_t ShorterHistogramSize;
+    if (newMIB.AccessHistogramSize > AccessHistogramSize) {
+      ShorterHistogram = AccessHistogram;
+      ShorterHistogramSize = AccessHistogramSize;
+      // Swap histogram of current to larger histogram
+      AccessHistogram = newMIB.AccessHistogram;
+      AccessHistogramSize = newMIB.AccessHistogramSize;
+    } else {
+      ShorterHistogram = newMIB.AccessHistogram;
+      ShorterHistogramSize = newMIB.AccessHistogramSize;
+    }
+    for (size_t i = 0; i < ShorterHistogramSize; ++i) {
+      ((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
+    }
+  }
 
 #ifdef _MSC_VER
 } __pragma(pack(pop));
@@ -205,4 +240,4 @@ void Merge(const MemInfoBlock &newMIB) {
 } // namespace memprof
 } // namespace llvm
 
-#endif
+#endif
\ No newline at end of file
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 35e941228525a..f93f633f0182f 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -34,6 +34,8 @@
 #include <sched.h>
 #include <time.h>
 
+#define MAX_HISTOGRAM_PRINT_SIZE 32U
+
 namespace __memprof {
 namespace {
 using ::llvm::memprof::MemInfoBlock;
@@ -68,6 +70,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
            "cpu: %u, num same dealloc_cpu: %u\n",
            M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
            M.NumSameDeallocCpu);
+    Printf("AcccessCountHistogram[%u]: ", M.AccessHistogramSize);
+    uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
+                             ? MAX_HISTOGRAM_PRINT_SIZE
+                             : M.AccessHistogramSize;
+    for (size_t i = 0; i < PrintSize; ++i) {
+      Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
+    }
+    Printf("\n");
   }
 }
 } // namespace
@@ -216,6 +226,32 @@ u64 GetShadowCount(uptr p, u32 size) {
   return count;
 }
 
+// Accumulates the access count from the shadow for the given pointer and size.
+u64 GetShadowCountHistogram(uptr p, u32 size) {
+  u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
+  u8 *shadow_end = (u8 *)MEM_TO_SHADOW(p + size);
+  u64 count = 0;
+  for (; shadow <= shadow_end; shadow++)
+    count += *shadow;
+  return count;
+}
+
+// If we use the normal approach in clearCountersWithoutHistogram, the histogram
+// will clear to much data and may overwrite shadow counters that are in use.
+void clearCountersHistogram(uptr addr, uptr size) {
+  u8 *shadow_8 = (u8 *)HISTOGRAM_MEM_TO_SHADOW(addr);
+  u8 *shadow_end_8 = (u8 *)HISTOGRAM_MEM_TO_SHADOW(addr + size);
+  for (; shadow_8 < shadow_end_8; shadow_8++) {
+    *shadow_8 = 0;
+  }
+}
+
+void clearCountersWithoutHistogram(uptr addr, uptr size) {
+  uptr shadow_beg = MEM_TO_SHADOW(addr);
+  uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
+  REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+}
+
 // Clears the shadow counters (when memory is allocated).
 void ClearShadow(uptr addr, uptr size) {
   CHECK(AddrIsAlignedByGranularity(addr));
@@ -226,7 +262,11 @@ void ClearShadow(uptr addr, uptr size) {
   uptr shadow_beg = MEM_TO_SHADOW(addr);
   uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
   if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
-    REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+    if (flags()->histogram) {
+      clearCountersHistogram(addr, size);
+    } else {
+      clearCountersWithoutHistogram(addr, size);
+    }
   } else {
     uptr page_size = GetPageSizeCached();
     uptr page_beg = RoundUpTo(shadow_beg, page_size);
@@ -279,6 +319,43 @@ struct Allocator {
     Print(Value->mib, Key, bool(Arg));
   }
 
+  static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
+    if (flags()->histogram) {
+      return CreateNewMIBWithHistogram(p, m, user_size);
+    } else {
+      return CreateNewMIBWithoutHistogram(p, m, user_size);
+    }
+  }
+
+  static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
+                                                u64 user_size) {
+
+    u64 c = GetShadowCountHistogram(p, user_size);
+    long curtime = GetTimestamp();
+    uint32_t HistogramSize =
+        RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
+    uintptr_t Histogram =
+        (uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
+    memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
+    for (size_t i = 0; i < HistogramSize; ++i) {
+      u8 Counter =
+          *((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
+      ((uint64_t *)Histogram)[i] = (uint64_t)Counter;
+    }
+    MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+                        GetCpuId(), Histogram, HistogramSize);
+    return newMIB;
+  }
+
+  static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
+                                                   u64 user_size) {
+    u64 c = GetShadowCount(p, user_size);
+    long curtime = GetTimestamp();
+    MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+                        GetCpuId(), 0, 0);
+    return newMIB;
+  }
+
   void FinishAndWrite() {
     if (print_text && common_flags()->print_module_map)
       DumpProcessMap();
@@ -319,10 +396,7 @@ struct Allocator {
           if (!m)
             return;
           uptr user_beg = ((uptr)m) + kChunkHeaderSize;
-          u64 c = GetShadowCount(user_beg, user_requested_size);
-          long curtime = GetTimestamp();
-          MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
-                              m->cpu_id, GetCpuId());
+          MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
           InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
         },
         this);
@@ -451,11 +525,7 @@ struct Allocator {
         atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
     if (memprof_inited && atomic_load_relaxed(&constructed) &&
         !atomic_load_relaxed(&destructing)) {
-      u64 c = GetShadowCount(p, user_requested_size);
-      long curtime = GetTimestamp();
-
-      MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
-                          m->cpu_id, GetCpuId());
+      MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
       InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
     }
 
diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
index ee0760ddc302a..8d8d539ea752f 100644
--- a/compiler-rt/lib/memprof/memprof_flags.inc
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -36,6 +36,11 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
              "POSIX standard). If set to false, realloc(p, 0) will return a "
              "pointer to an allocated space which can not be used.")
 MEMPROF_FLAG(bool, print_text, false,
-  "If set, prints the heap profile in text format. Else use the raw binary serialization format.")
+             "If set, prints the heap profile in text format. Else use the raw "
+             "binary serialization format.")
 MEMPROF_FLAG(bool, print_terse, false,
-             "If set, prints memory profile in a terse format. Only applicable if print_text = true.")
+             "If set, prints memory profile in a terse format. Only applicable "
+             "if print_text = true.")
+MEMPROF_FLAG(bool, histogram, false,
+             "If set, collects a histogram in memory info blocks alongside one "
+             "large counter.")
\ No newline at end of file
diff --git a/compiler-rt/lib/memprof/memprof_mapping.h b/compiler-rt/lib/memprof/memprof_mapping.h
index 1cc0836834cdf..658ed9d0e74dd 100644
--- a/compiler-rt/lib/memprof/mempr...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/94264


More information about the llvm-commits mailing list