[compiler-rt] [llvm] [Memprof] Adds the option to collect AccessCountHistograms for memprof. (PR #94264)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 10:40:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Matthew Weingarten (mattweingarten)
<details>
<summary>Changes</summary>
Adds compile time flag -mllvm -memprof-histogram and runtime flag histogram=true|false to turn Histogram collection on and off. The -memprof-histogram flag relies on -memprof-use-callbacks=true to work.
Updates shadow mapping logic in histogram mode from having one 8 byte counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only supports this granularity as of now.
Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary format, including changing to a new version of the raw binary format from version 3 to version 4.
Updates creating MemoryInfoBlocks with and without Histograms. When two MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter Histogram is removed.
Adds a memprof_histogram test case.
Initial commit for adding AccessCountHistograms up until RawProfile for memprof
---
Patch is 65.75 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94264.diff
30 Files Affected:
- (modified) compiler-rt/include/profile/MIBEntryDef.inc (+2)
- (modified) compiler-rt/include/profile/MemProfData.inc (+137-102)
- (modified) compiler-rt/lib/memprof/memprof_allocator.cpp (+80-10)
- (modified) compiler-rt/lib/memprof/memprof_flags.inc (+7-2)
- (modified) compiler-rt/lib/memprof/memprof_mapping.h (+17-2)
- (modified) compiler-rt/lib/memprof/memprof_mibmap.cpp (+8)
- (modified) compiler-rt/lib/memprof/memprof_rawprofile.cpp (+51-11)
- (modified) llvm/include/llvm/ProfileData/MIBEntryDef.inc (+2)
- (modified) llvm/include/llvm/ProfileData/MemProf.h (+7)
- (modified) llvm/include/llvm/ProfileData/MemProfData.inc (+132-101)
- (modified) llvm/include/llvm/ProfileData/MemProfReader.h (+1-1)
- (modified) llvm/lib/ProfileData/MemProfReader.cpp (+39-4)
- (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+10-2)
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.exe ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw ()
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.cc (+138)
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.exe ()
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.memprofraw ()
- (added) llvm/test/Transforms/PGOProfile/Inputs/memprof_histogram.yaml (+270)
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw ()
- (modified) llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh (+149-1)
- (added) llvm/test/Transforms/PGOProfile/memprof_histogram.ll (+33)
- (modified) llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll (+3-3)
``````````diff
diff --git a/compiler-rt/include/profile/MIBEntryDef.inc b/compiler-rt/include/profile/MIBEntryDef.inc
index 794163ae10386..58c1fc4de4aba 100644
--- a/compiler-rt/include/profile/MIBEntryDef.inc
+++ b/compiler-rt/include/profile/MIBEntryDef.inc
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
+MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
+MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)
\ No newline at end of file
diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc
index b82a4baf6dd74..d4356275d1609 100644
--- a/compiler-rt/include/profile/MemProfData.inc
+++ b/compiler-rt/include/profile/MemProfData.inc
@@ -22,24 +22,27 @@
#include <string.h>
#ifdef _MSC_VER
-#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
+#define PACKED(...) __pragma(pack(push, 1)) __VA_ARGS__ __pragma(pack(pop))
#else
#define PACKED(...) __VA_ARGS__ __attribute__((__packed__))
#endif
-// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
-#define MEMPROF_RAW_MAGIC_64 \
- ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \
- (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
+// A 64-bit magic number to uniquely identify the raw binary memprof profile
+// file.
+#define MEMPROF_RAW_MAGIC_64 \
+ ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | \
+ (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | \
+ (uint64_t)'r' << 8 | (uint64_t)129)
// The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 3ULL
+#define MEMPROF_RAW_VERSION 4ULL
#define MEMPROF_BUILDID_MAX_SIZE 32ULL
namespace llvm {
namespace memprof {
-// A struct describing the header used for the raw binary memprof profile format.
+// A struct describing the header used for the raw binary memprof profile
+// format.
PACKED(struct Header {
uint64_t Magic;
uint64_t Version;
@@ -62,7 +65,7 @@ PACKED(struct SegmentEntry {
SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
: Start(S), End(E), Offset(O), BuildIdSize(0) {}
- SegmentEntry(const SegmentEntry& S) {
+ SegmentEntry(const SegmentEntry &S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
@@ -70,7 +73,7 @@ PACKED(struct SegmentEntry {
memcpy(BuildId, S.BuildId, S.BuildIdSize);
}
- SegmentEntry& operator=(const SegmentEntry& S) {
+ SegmentEntry &operator=(const SegmentEntry &S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
@@ -79,7 +82,7 @@ PACKED(struct SegmentEntry {
return *this;
}
- bool operator==(const SegmentEntry& S) const {
+ bool operator==(const SegmentEntry &S) const {
return Start == S.Start && End == S.End && Offset == S.Offset &&
BuildIdSize == S.BuildIdSize &&
memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
@@ -90,111 +93,143 @@ PACKED(struct SegmentEntry {
// MemProfData.inc since it would mean we are embedding a directive (the
// #include for MIBEntryDef) into the macros which is undefined behaviour.
#ifdef _MSC_VER
-__pragma(pack(push,1))
+__pragma(pack(push, 1))
#endif
-// A struct representing the heap allocation characteristics of a particular
-// runtime context. This struct is shared between the compiler-rt runtime and
-// the raw profile reader. The indexed format uses a separate, self-describing
-// backwards compatible format.
-struct MemInfoBlock{
+ // A struct representing the heap allocation characteristics of a particular
+ // runtime context. This struct is shared between the compiler-rt runtime
+ // and the raw profile reader. The indexed format uses a separate,
+ // self-describing backwards compatible format.
+ struct MemInfoBlock {
#define MIBEntryDef(NameTag, Name, Type) Type Name;
#include "MIBEntryDef.inc"
#undef MIBEntryDef
-bool operator==(const MemInfoBlock& Other) const {
- bool IsEqual = true;
-#define MIBEntryDef(NameTag, Name, Type) \
+ bool operator==(const MemInfoBlock &Other) const {
+ bool IsEqual = true;
+#define MIBEntryDef(NameTag, Name, Type) \
IsEqual = (IsEqual && Name == Other.Name);
#include "MIBEntryDef.inc"
#undef MIBEntryDef
- return IsEqual;
-}
+ return IsEqual;
+ }
-MemInfoBlock() {
+ MemInfoBlock() {
#define MIBEntryDef(NameTag, Name, Type) Name = Type();
#include "MIBEntryDef.inc"
#undef MIBEntryDef
-}
-
-MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
- uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
- : MemInfoBlock() {
- AllocCount = 1U;
- TotalAccessCount = AccessCount;
- MinAccessCount = AccessCount;
- MaxAccessCount = AccessCount;
- TotalSize = Size;
- MinSize = Size;
- MaxSize = Size;
- AllocTimestamp = AllocTs;
- DeallocTimestamp = DeallocTs;
- TotalLifetime = DeallocTimestamp - AllocTimestamp;
- MinLifetime = TotalLifetime;
- MaxLifetime = TotalLifetime;
- // Access density is accesses per byte. Multiply by 100 to include the
- // fractional part.
- TotalAccessDensity = AccessCount * 100 / Size;
- MinAccessDensity = TotalAccessDensity;
- MaxAccessDensity = TotalAccessDensity;
- // Lifetime access density is the access density per second of lifetime.
- // Multiply by 1000 to convert denominator lifetime to seconds (using a
- // minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first
- // to reduce truncations to 0.
- TotalLifetimeAccessDensity =
- TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1);
- MinLifetimeAccessDensity = TotalLifetimeAccessDensity;
- MaxLifetimeAccessDensity = TotalLifetimeAccessDensity;
- AllocCpuId = AllocCpu;
- DeallocCpuId = DeallocCpu;
- NumMigratedCpu = AllocCpuId != DeallocCpuId;
-}
-
-void Merge(const MemInfoBlock &newMIB) {
- AllocCount += newMIB.AllocCount;
-
- TotalAccessCount += newMIB.TotalAccessCount;
- MinAccessCount = newMIB.MinAccessCount < MinAccessCount ? newMIB.MinAccessCount : MinAccessCount;
- MaxAccessCount = newMIB.MaxAccessCount > MaxAccessCount ? newMIB.MaxAccessCount : MaxAccessCount;
-
- TotalSize += newMIB.TotalSize;
- MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
- MaxSize = newMIB.MaxSize > MaxSize ? newMIB.MaxSize : MaxSize;
-
- TotalLifetime += newMIB.TotalLifetime;
- MinLifetime = newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
- MaxLifetime = newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
-
- TotalAccessDensity += newMIB.TotalAccessDensity;
- MinAccessDensity = newMIB.MinAccessDensity < MinAccessDensity
- ? newMIB.MinAccessDensity
- : MinAccessDensity;
- MaxAccessDensity = newMIB.MaxAccessDensity > MaxAccessDensity
- ? newMIB.MaxAccessDensity
- : MaxAccessDensity;
-
- TotalLifetimeAccessDensity += newMIB.TotalLifetimeAccessDensity;
- MinLifetimeAccessDensity =
- newMIB.MinLifetimeAccessDensity < MinLifetimeAccessDensity
- ? newMIB.MinLifetimeAccessDensity
- : MinLifetimeAccessDensity;
- MaxLifetimeAccessDensity =
- newMIB.MaxLifetimeAccessDensity > MaxLifetimeAccessDensity
- ? newMIB.MaxLifetimeAccessDensity
- : MaxLifetimeAccessDensity;
-
- // We know newMIB was deallocated later, so just need to check if it was
- // allocated before last one deallocated.
- NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
- AllocTimestamp = newMIB.AllocTimestamp;
- DeallocTimestamp = newMIB.DeallocTimestamp;
-
- NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
- NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
- AllocCpuId = newMIB.AllocCpuId;
- DeallocCpuId = newMIB.DeallocCpuId;
-}
+ }
+
+ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
+ uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
+ uintptr_t Histogram, uint32_t HistogramSize)
+ : MemInfoBlock() {
+ AllocCount = 1U;
+ TotalAccessCount = AccessCount;
+ MinAccessCount = AccessCount;
+ MaxAccessCount = AccessCount;
+ TotalSize = Size;
+ MinSize = Size;
+ MaxSize = Size;
+ AllocTimestamp = AllocTs;
+ DeallocTimestamp = DeallocTs;
+ TotalLifetime = DeallocTimestamp - AllocTimestamp;
+ MinLifetime = TotalLifetime;
+ MaxLifetime = TotalLifetime;
+ // Access density is accesses per byte. Multiply by 100 to include the
+ // fractional part.
+ TotalAccessDensity = AccessCount * 100 / Size;
+ MinAccessDensity = TotalAccessDensity;
+ MaxAccessDensity = TotalAccessDensity;
+ // Lifetime access density is the access density per second of lifetime.
+ // Multiply by 1000 to convert denominator lifetime to seconds (using a
+ // minimum lifetime of 1ms to avoid divide by 0. Do the multiplication first
+ // to reduce truncations to 0.
+ TotalLifetimeAccessDensity =
+ TotalAccessDensity * 1000 / (TotalLifetime ? TotalLifetime : 1);
+ MinLifetimeAccessDensity = TotalLifetimeAccessDensity;
+ MaxLifetimeAccessDensity = TotalLifetimeAccessDensity;
+ AllocCpuId = AllocCpu;
+ DeallocCpuId = DeallocCpu;
+ NumMigratedCpu = AllocCpuId != DeallocCpuId;
+ // For now we assume HistogramSize is the same as user requested size
+ AccessHistogramSize = HistogramSize;
+ AccessHistogram = Histogram;
+ }
+
+ // Merge cannot free the AccessHistogram pointer, since we need to free either
+ // with InternalFree or free depending on where the allocation is made
+ // (runtime or profdata tool). The merge function expects the Histogram
+ // pointer with the smaller size to be freed.
+ void Merge(const MemInfoBlock &newMIB) {
+ AllocCount += newMIB.AllocCount;
+
+ TotalAccessCount += newMIB.TotalAccessCount;
+ MinAccessCount = newMIB.MinAccessCount < MinAccessCount
+ ? newMIB.MinAccessCount
+ : MinAccessCount;
+ MaxAccessCount = newMIB.MaxAccessCount > MaxAccessCount
+ ? newMIB.MaxAccessCount
+ : MaxAccessCount;
+
+ TotalSize += newMIB.TotalSize;
+ MinSize = newMIB.MinSize < MinSize ? newMIB.MinSize : MinSize;
+ MaxSize = newMIB.MaxSize > MaxSize ? newMIB.MaxSize : MaxSize;
+
+ TotalLifetime += newMIB.TotalLifetime;
+ MinLifetime =
+ newMIB.MinLifetime < MinLifetime ? newMIB.MinLifetime : MinLifetime;
+ MaxLifetime =
+ newMIB.MaxLifetime > MaxLifetime ? newMIB.MaxLifetime : MaxLifetime;
+
+ TotalAccessDensity += newMIB.TotalAccessDensity;
+ MinAccessDensity = newMIB.MinAccessDensity < MinAccessDensity
+ ? newMIB.MinAccessDensity
+ : MinAccessDensity;
+ MaxAccessDensity = newMIB.MaxAccessDensity > MaxAccessDensity
+ ? newMIB.MaxAccessDensity
+ : MaxAccessDensity;
+
+ TotalLifetimeAccessDensity += newMIB.TotalLifetimeAccessDensity;
+ MinLifetimeAccessDensity =
+ newMIB.MinLifetimeAccessDensity < MinLifetimeAccessDensity
+ ? newMIB.MinLifetimeAccessDensity
+ : MinLifetimeAccessDensity;
+ MaxLifetimeAccessDensity =
+ newMIB.MaxLifetimeAccessDensity > MaxLifetimeAccessDensity
+ ? newMIB.MaxLifetimeAccessDensity
+ : MaxLifetimeAccessDensity;
+
+ // We know newMIB was deallocated later, so just need to check if it was
+ // allocated before last one deallocated.
+ NumLifetimeOverlaps += newMIB.AllocTimestamp < DeallocTimestamp;
+ AllocTimestamp = newMIB.AllocTimestamp;
+ DeallocTimestamp = newMIB.DeallocTimestamp;
+
+ NumSameAllocCpu += AllocCpuId == newMIB.AllocCpuId;
+ NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
+ AllocCpuId = newMIB.AllocCpuId;
+ DeallocCpuId = newMIB.DeallocCpuId;
+
+ // For merging histograms, we always keep the longer histogram, and add
+ // values of shorter histogram to larger one.
+ uintptr_t ShorterHistogram;
+ uint32_t ShorterHistogramSize;
+ if (newMIB.AccessHistogramSize > AccessHistogramSize) {
+ ShorterHistogram = AccessHistogram;
+ ShorterHistogramSize = AccessHistogramSize;
+ // Swap histogram of current to larger histogram
+ AccessHistogram = newMIB.AccessHistogram;
+ AccessHistogramSize = newMIB.AccessHistogramSize;
+ } else {
+ ShorterHistogram = newMIB.AccessHistogram;
+ ShorterHistogramSize = newMIB.AccessHistogramSize;
+ }
+ for (size_t i = 0; i < ShorterHistogramSize; ++i) {
+ ((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
+ }
+ }
#ifdef _MSC_VER
} __pragma(pack(pop));
@@ -205,4 +240,4 @@ void Merge(const MemInfoBlock &newMIB) {
} // namespace memprof
} // namespace llvm
-#endif
+#endif
\ No newline at end of file
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 35e941228525a..f93f633f0182f 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -34,6 +34,8 @@
#include <sched.h>
#include <time.h>
+#define MAX_HISTOGRAM_PRINT_SIZE 32U
+
namespace __memprof {
namespace {
using ::llvm::memprof::MemInfoBlock;
@@ -68,6 +70,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
"cpu: %u, num same dealloc_cpu: %u\n",
M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
M.NumSameDeallocCpu);
+ Printf("AcccessCountHistogram[%u]: ", M.AccessHistogramSize);
+ uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
+ ? MAX_HISTOGRAM_PRINT_SIZE
+ : M.AccessHistogramSize;
+ for (size_t i = 0; i < PrintSize; ++i) {
+ Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
+ }
+ Printf("\n");
}
}
} // namespace
@@ -216,6 +226,32 @@ u64 GetShadowCount(uptr p, u32 size) {
return count;
}
+// Accumulates the access count from the shadow for the given pointer and size.
+u64 GetShadowCountHistogram(uptr p, u32 size) {
+ u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
+ u8 *shadow_end = (u8 *)MEM_TO_SHADOW(p + size);
+ u64 count = 0;
+ for (; shadow <= shadow_end; shadow++)
+ count += *shadow;
+ return count;
+}
+
+// If we use the normal approach in clearCountersWithoutHistogram, the histogram
+// will clear to much data and may overwrite shadow counters that are in use.
+void clearCountersHistogram(uptr addr, uptr size) {
+ u8 *shadow_8 = (u8 *)HISTOGRAM_MEM_TO_SHADOW(addr);
+ u8 *shadow_end_8 = (u8 *)HISTOGRAM_MEM_TO_SHADOW(addr + size);
+ for (; shadow_8 < shadow_end_8; shadow_8++) {
+ *shadow_8 = 0;
+ }
+}
+
+void clearCountersWithoutHistogram(uptr addr, uptr size) {
+ uptr shadow_beg = MEM_TO_SHADOW(addr);
+ uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
+ REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+}
+
// Clears the shadow counters (when memory is allocated).
void ClearShadow(uptr addr, uptr size) {
CHECK(AddrIsAlignedByGranularity(addr));
@@ -226,7 +262,11 @@ void ClearShadow(uptr addr, uptr size) {
uptr shadow_beg = MEM_TO_SHADOW(addr);
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
- REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
+ if (flags()->histogram) {
+ clearCountersHistogram(addr, size);
+ } else {
+ clearCountersWithoutHistogram(addr, size);
+ }
} else {
uptr page_size = GetPageSizeCached();
uptr page_beg = RoundUpTo(shadow_beg, page_size);
@@ -279,6 +319,43 @@ struct Allocator {
Print(Value->mib, Key, bool(Arg));
}
+ static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
+ if (flags()->histogram) {
+ return CreateNewMIBWithHistogram(p, m, user_size);
+ } else {
+ return CreateNewMIBWithoutHistogram(p, m, user_size);
+ }
+ }
+
+ static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
+ u64 user_size) {
+
+ u64 c = GetShadowCountHistogram(p, user_size);
+ long curtime = GetTimestamp();
+ uint32_t HistogramSize =
+ RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
+ uintptr_t Histogram =
+ (uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
+ memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
+ for (size_t i = 0; i < HistogramSize; ++i) {
+ u8 Counter =
+ *((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
+ ((uint64_t *)Histogram)[i] = (uint64_t)Counter;
+ }
+ MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+ GetCpuId(), Histogram, HistogramSize);
+ return newMIB;
+ }
+
+ static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
+ u64 user_size) {
+ u64 c = GetShadowCount(p, user_size);
+ long curtime = GetTimestamp();
+ MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+ GetCpuId(), 0, 0);
+ return newMIB;
+ }
+
void FinishAndWrite() {
if (print_text && common_flags()->print_module_map)
DumpProcessMap();
@@ -319,10 +396,7 @@ struct Allocator {
if (!m)
return;
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
- u64 c = GetShadowCount(user_beg, user_requested_size);
- long curtime = GetTimestamp();
- MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
- m->cpu_id, GetCpuId());
+ MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
},
this);
@@ -451,11 +525,7 @@ struct Allocator {
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
if (memprof_inited && atomic_load_relaxed(&constructed) &&
!atomic_load_relaxed(&destructing)) {
- u64 c = GetShadowCount(p, user_requested_size);
- long curtime = GetTimestamp();
-
- MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
- m->cpu_id, GetCpuId());
+ MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
}
diff --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
index ee0760ddc302a..8d8d539ea752f 100644
--- a/compiler-rt/lib/memprof/memprof_flags.inc
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -36,6 +36,11 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
"POSIX standard). If set to false, realloc(p, 0) will return a "
"pointer to an allocated space which can not be used.")
MEMPROF_FLAG(bool, print_text, false,
- "If set, prints the heap profile in text format. Else use the raw binary serialization format.")
+ "If set, prints the heap profile in text format. Else use the raw "
+ "binary serialization format.")
MEMPROF_FLAG(bool, print_terse, false,
- "If set, prints memory profile in a terse format. Only applicable if print_text = true.")
+ "If set, prints memory profile in a terse format. Only applicable "
+ "if print_text = true.")
+MEMPROF_FLAG(bool, histogram, false,
+ "If set, collects a histogram in memory info blocks alongside one "
+ "large counter.")
\ No newline at end of file
diff --git a/compiler-rt/lib/memprof/memprof_mapping.h b/compiler-rt/lib/memprof/memprof_mapping.h
index 1cc0836834cdf..658ed9d0e74dd 100644
--- a/compiler-rt/lib/memprof/mempr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/94264
More information about the llvm-commits
mailing list