[compiler-rt] 30b93db - [Memprof] Adds the option to collect AccessCountHistograms for memprof. (#94264)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 26 08:37:26 PDT 2024


Author: Matthew Weingarten
Date: 2024-06-26T08:37:22-07:00
New Revision: 30b93db5476e3ae2efdaba25fb53fcc3c081da77

URL: https://github.com/llvm/llvm-project/commit/30b93db5476e3ae2efdaba25fb53fcc3c081da77
DIFF: https://github.com/llvm/llvm-project/commit/30b93db5476e3ae2efdaba25fb53fcc3c081da77.diff

LOG: [Memprof] Adds the option to collect AccessCountHistograms for memprof. (#94264)

Adds compile time flag -mllvm -memprof-histogram and runtime flag
histogram=true|false to turn Histogram collection on and off. The
-memprof-histogram flag relies on -memprof-use-callbacks=true to work.

Updates shadow mapping logic in histogram mode from having one 8 byte
counter for 64 bytes, to 1 byte for 8 bytes, capped at 255. Only
supports this granularity as of now.

Updates the RawMemprofReader and serializing MemoryInfoBlocks to binary
format, including changing to a new version of the raw binary format
from version 3 to version 4.

Updates creating MemoryInfoBlocks with and without Histograms. When two
MemoryInfoBlocks are merged, AccessCounts are summed up and the shorter
Histogram is removed.

Adds a memprof_histogram test case.

Initial commit for adding AccessCountHistograms up until RawProfile for
memprof

Added: 
    llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw
    llvm/test/tools/llvm-profdata/memprof-basic-histogram.test
    llvm/test/tools/llvm-profdata/memprof-basic_v3.test
    llvm/test/tools/llvm-profdata/memprof-padding-histogram.test

Modified: 
    compiler-rt/include/profile/MIBEntryDef.inc
    compiler-rt/include/profile/MemProfData.inc
    compiler-rt/lib/memprof/memprof_allocator.cpp
    compiler-rt/lib/memprof/memprof_flags.inc
    compiler-rt/lib/memprof/memprof_mapping.h
    compiler-rt/lib/memprof/memprof_mibmap.cpp
    compiler-rt/lib/memprof/memprof_rawprofile.cpp
    compiler-rt/lib/memprof/memprof_rtl.cpp
    compiler-rt/lib/memprof/tests/rawprofile.cpp
    llvm/include/llvm/ProfileData/MIBEntryDef.inc
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/include/llvm/ProfileData/MemProfData.inc
    llvm/include/llvm/ProfileData/MemProfReader.h
    llvm/lib/ProfileData/MemProfReader.cpp
    llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
    llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
    llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw
    llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw
    llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw
    llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
    llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll
    llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh
    llvm/test/tools/llvm-profdata/memprof-basic.test
    llvm/test/tools/llvm-profdata/memprof-inline.test
    llvm/test/tools/llvm-profdata/memprof-multi.test
    llvm/test/tools/llvm-profdata/memprof-pic.test
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/include/profile/MIBEntryDef.inc b/compiler-rt/include/profile/MIBEntryDef.inc
index 794163ae10386..58c1fc4de4aba 100644
--- a/compiler-rt/include/profile/MIBEntryDef.inc
+++ b/compiler-rt/include/profile/MIBEntryDef.inc
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
 MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
 MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
 MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
+MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
+MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)
\ No newline at end of file

diff  --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc
index b82a4baf6dd74..3dc88478fb93a 100644
--- a/compiler-rt/include/profile/MemProfData.inc
+++ b/compiler-rt/include/profile/MemProfData.inc
@@ -33,7 +33,11 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 3ULL
+#define MEMPROF_RAW_VERSION 4ULL
+
+// Currently supported versions.
+#define MEMPROF_RAW_SUPPORTED_VERSIONS                                         \
+  { 3ULL, 4ULL }
 
 #define MEMPROF_BUILDID_MAX_SIZE 32ULL
 
@@ -119,7 +123,8 @@ MemInfoBlock() {
 }
 
 MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
-             uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
+             uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
+             uintptr_t Histogram, uint32_t HistogramSize)
     : MemInfoBlock() {
   AllocCount = 1U;
   TotalAccessCount = AccessCount;
@@ -149,6 +154,8 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
   AllocCpuId = AllocCpu;
   DeallocCpuId = DeallocCpu;
   NumMigratedCpu = AllocCpuId != DeallocCpuId;
+  AccessHistogramSize = HistogramSize;
+  AccessHistogram = Histogram;
 }
 
 void Merge(const MemInfoBlock &newMIB) {
@@ -194,6 +201,24 @@ void Merge(const MemInfoBlock &newMIB) {
   NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
   AllocCpuId = newMIB.AllocCpuId;
   DeallocCpuId = newMIB.DeallocCpuId;
+
+  // For merging histograms, we always keep the longer histogram, and add
+  // values of shorter histogram to larger one.
+  uintptr_t ShorterHistogram;
+  uint32_t ShorterHistogramSize;
+  if (newMIB.AccessHistogramSize > AccessHistogramSize) {
+    ShorterHistogram = AccessHistogram;
+    ShorterHistogramSize = AccessHistogramSize;
+    // Swap histogram of current to larger histogram
+    AccessHistogram = newMIB.AccessHistogram;
+    AccessHistogramSize = newMIB.AccessHistogramSize;
+  } else {
+    ShorterHistogram = newMIB.AccessHistogram;
+    ShorterHistogramSize = newMIB.AccessHistogramSize;
+  }
+  for (size_t i = 0; i < ShorterHistogramSize; ++i) {
+    ((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
+  }
 }
 
 #ifdef _MSC_VER

diff  --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 35e941228525a..19b2b90106824 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -34,6 +34,10 @@
 #include <sched.h>
 #include <time.h>
 
+#define MAX_HISTOGRAM_PRINT_SIZE 32U
+
+extern bool __memprof_histogram;
+
 namespace __memprof {
 namespace {
 using ::llvm::memprof::MemInfoBlock;
@@ -68,6 +72,14 @@ void Print(const MemInfoBlock &M, const u64 id, bool print_terse) {
            "cpu: %u, num same dealloc_cpu: %u\n",
            M.NumMigratedCpu, M.NumLifetimeOverlaps, M.NumSameAllocCpu,
            M.NumSameDeallocCpu);
+    Printf("AccessCountHistogram[%u]: ", M.AccessHistogramSize);
+    uint32_t PrintSize = M.AccessHistogramSize > MAX_HISTOGRAM_PRINT_SIZE
+                             ? MAX_HISTOGRAM_PRINT_SIZE
+                             : M.AccessHistogramSize;
+    for (size_t i = 0; i < PrintSize; ++i) {
+      Printf("%llu ", ((uint64_t *)M.AccessHistogram)[i]);
+    }
+    Printf("\n");
   }
 }
 } // namespace
@@ -216,6 +228,17 @@ u64 GetShadowCount(uptr p, u32 size) {
   return count;
 }
 
+// Accumulates the access count from the shadow for the given pointer and size.
+// See memprof_mapping.h for an overview on histogram counters.
+u64 GetShadowCountHistogram(uptr p, u32 size) {
+  u8 *shadow = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p);
+  u8 *shadow_end = (u8 *)HISTOGRAM_MEM_TO_SHADOW(p + size);
+  u64 count = 0;
+  for (; shadow <= shadow_end; shadow++)
+    count += *shadow;
+  return count;
+}
+
 // Clears the shadow counters (when memory is allocated).
 void ClearShadow(uptr addr, uptr size) {
   CHECK(AddrIsAlignedByGranularity(addr));
@@ -223,8 +246,16 @@ void ClearShadow(uptr addr, uptr size) {
   CHECK(AddrIsAlignedByGranularity(addr + size));
   CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
   CHECK(REAL(memset));
-  uptr shadow_beg = MEM_TO_SHADOW(addr);
-  uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
+  uptr shadow_beg;
+  uptr shadow_end;
+  if (__memprof_histogram) {
+    shadow_beg = HISTOGRAM_MEM_TO_SHADOW(addr);
+    shadow_end = HISTOGRAM_MEM_TO_SHADOW(addr + size);
+  } else {
+    shadow_beg = MEM_TO_SHADOW(addr);
+    shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
+  }
+
   if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
     REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
   } else {
@@ -279,6 +310,44 @@ struct Allocator {
     Print(Value->mib, Key, bool(Arg));
   }
 
+  // See memprof_mapping.h for an overview on histogram counters.
+  static MemInfoBlock CreateNewMIB(uptr p, MemprofChunk *m, u64 user_size) {
+    if (__memprof_histogram) {
+      return CreateNewMIBWithHistogram(p, m, user_size);
+    } else {
+      return CreateNewMIBWithoutHistogram(p, m, user_size);
+    }
+  }
+
+  static MemInfoBlock CreateNewMIBWithHistogram(uptr p, MemprofChunk *m,
+                                                u64 user_size) {
+
+    u64 c = GetShadowCountHistogram(p, user_size);
+    long curtime = GetTimestamp();
+    uint32_t HistogramSize =
+        RoundUpTo(user_size, HISTOGRAM_GRANULARITY) / HISTOGRAM_GRANULARITY;
+    uintptr_t Histogram =
+        (uintptr_t)InternalAlloc(HistogramSize * sizeof(uint64_t));
+    memset((void *)Histogram, 0, HistogramSize * sizeof(uint64_t));
+    for (size_t i = 0; i < HistogramSize; ++i) {
+      u8 Counter =
+          *((u8 *)HISTOGRAM_MEM_TO_SHADOW(p + HISTOGRAM_GRANULARITY * i));
+      ((uint64_t *)Histogram)[i] = (uint64_t)Counter;
+    }
+    MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+                        GetCpuId(), Histogram, HistogramSize);
+    return newMIB;
+  }
+
+  static MemInfoBlock CreateNewMIBWithoutHistogram(uptr p, MemprofChunk *m,
+                                                   u64 user_size) {
+    u64 c = GetShadowCount(p, user_size);
+    long curtime = GetTimestamp();
+    MemInfoBlock newMIB(user_size, c, m->timestamp_ms, curtime, m->cpu_id,
+                        GetCpuId(), 0, 0);
+    return newMIB;
+  }
+
   void FinishAndWrite() {
     if (print_text && common_flags()->print_module_map)
       DumpProcessMap();
@@ -319,10 +388,7 @@ struct Allocator {
           if (!m)
             return;
           uptr user_beg = ((uptr)m) + kChunkHeaderSize;
-          u64 c = GetShadowCount(user_beg, user_requested_size);
-          long curtime = GetTimestamp();
-          MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
-                              m->cpu_id, GetCpuId());
+          MemInfoBlock newMIB = CreateNewMIB(user_beg, m, user_requested_size);
           InsertOrMerge(m->alloc_context_id, newMIB, A->MIBMap);
         },
         this);
@@ -451,11 +517,7 @@ struct Allocator {
         atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
     if (memprof_inited && atomic_load_relaxed(&constructed) &&
         !atomic_load_relaxed(&destructing)) {
-      u64 c = GetShadowCount(p, user_requested_size);
-      long curtime = GetTimestamp();
-
-      MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
-                          m->cpu_id, GetCpuId());
+      MemInfoBlock newMIB = this->CreateNewMIB(p, m, user_requested_size);
       InsertOrMerge(m->alloc_context_id, newMIB, MIBMap);
     }
 

diff  --git a/compiler-rt/lib/memprof/memprof_flags.inc b/compiler-rt/lib/memprof/memprof_flags.inc
index ee0760ddc302a..7c5dc091f7935 100644
--- a/compiler-rt/lib/memprof/memprof_flags.inc
+++ b/compiler-rt/lib/memprof/memprof_flags.inc
@@ -38,4 +38,4 @@ MEMPROF_FLAG(bool, allocator_frees_and_returns_null_on_realloc_zero, true,
 MEMPROF_FLAG(bool, print_text, false,
   "If set, prints the heap profile in text format. Else use the raw binary serialization format.")
 MEMPROF_FLAG(bool, print_terse, false,
-             "If set, prints memory profile in a terse format. Only applicable if print_text = true.")
+             "If set, prints memory profile in a terse format. Only applicable if print_text = true.")
\ No newline at end of file

diff  --git a/compiler-rt/lib/memprof/memprof_mapping.h b/compiler-rt/lib/memprof/memprof_mapping.h
index 1cc0836834cdf..fef8acfcfc921 100644
--- a/compiler-rt/lib/memprof/memprof_mapping.h
+++ b/compiler-rt/lib/memprof/memprof_mapping.h
@@ -22,7 +22,6 @@ static const u64 kDefaultShadowScale = 3;
 
 #define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
 #define MEMPROF_ALIGNMENT 32
-
 namespace __memprof {
 
 extern uptr kHighMemEnd; // Initialized in __memprof_init.
@@ -37,6 +36,34 @@ extern uptr kHighMemEnd; // Initialized in __memprof_init.
 #define MEM_TO_SHADOW(mem)                                                     \
   ((((mem) & SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
 
+// Histogram shadow memory is laid 
diff erent to the standard configuration:
+
+//             8 bytes
+//         +---+---+---+  +---+---+---+  +---+---+---+
+//  Memory |     a     |  |     b     |  |     c     |
+//         +---+---+---+  +---+---+---+  +---+---+---+
+
+//             +---+          +---+          +---+
+//  Shadow     | a |          | b |          | c |
+//             +---+          +---+          +---+
+//            1 byte
+//
+// Where we have a 1 byte counter for each 8 bytes. HISTOGRAM_MEM_TO_SHADOW
+// translates a memory address to the address of its corresponding shadow
+// counter memory address. The same data is still provided in MIB whether
+// histograms are used or not. Total access counts per allocations are
+// computed by summing up all individual 1 byte counters. This can incur an
+// accuracy penalty.
+
+#define HISTOGRAM_GRANULARITY 8U
+
+#define HISTOGRAM_MAX_COUNTER 255U
+
+#define HISTOGRAM_SHADOW_MASK ~(HISTOGRAM_GRANULARITY - 1)
+
+#define HISTOGRAM_MEM_TO_SHADOW(mem)                                           \
+  ((((mem) & HISTOGRAM_SHADOW_MASK) >> SHADOW_SCALE) + (SHADOW_OFFSET))
+
 #define SHADOW_ENTRY_SIZE (MEM_GRANULARITY >> SHADOW_SCALE)
 
 #define kLowMemBeg 0
@@ -108,6 +135,14 @@ inline void RecordAccess(uptr a) {
   (*shadow_address)++;
 }
 
+inline void RecordAccessHistogram(uptr a) {
+  CHECK_EQ(SHADOW_ENTRY_SIZE, 8);
+  u8 *shadow_address = (u8 *)HISTOGRAM_MEM_TO_SHADOW(a);
+  if (*shadow_address < HISTOGRAM_MAX_COUNTER) {
+    (*shadow_address)++;
+  }
+}
+
 } // namespace __memprof
 
 #endif // MEMPROF_MAPPING_H

diff  --git a/compiler-rt/lib/memprof/memprof_mibmap.cpp b/compiler-rt/lib/memprof/memprof_mibmap.cpp
index 32f0796c8f241..a49ed8bf4fd64 100644
--- a/compiler-rt/lib/memprof/memprof_mibmap.cpp
+++ b/compiler-rt/lib/memprof/memprof_mibmap.cpp
@@ -30,7 +30,18 @@ void InsertOrMerge(const uptr Id, const MemInfoBlock &Block, MIBMapTy &Map) {
   } else {
     LockedMemInfoBlock *lmib = *h;
     SpinMutexLock lock(&lmib->mutex);
+    uintptr_t ShorterHistogram;
+    if (Block.AccessHistogramSize > lmib->mib.AccessHistogramSize)
+      ShorterHistogram = lmib->mib.AccessHistogram;
+    else
+      ShorterHistogram = Block.AccessHistogram;
+
     lmib->mib.Merge(Block);
+    // The larger histogram is kept and the shorter histogram is discarded after
+    // adding the counters to the larger historam. Free only the shorter
+    // Histogram
+    if (Block.AccessHistogramSize > 0 || lmib->mib.AccessHistogramSize > 0)
+      InternalFree((void *)ShorterHistogram);
   }
 }
 

diff  --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index fa92fa0e4b53e..a897648584828 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -146,24 +146,38 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds,
 // ---------- MIB Entry 0
 // Alloc Count
 // ...
+//       ---- AccessHistogram Entry 0
+//            ...
+//       ---- AccessHistogram Entry AccessHistogramSize - 1
 // ---------- MIB Entry 1
 // Alloc Count
 // ...
+//       ---- AccessHistogram Entry 0
+//            ...
+//       ---- AccessHistogram Entry AccessHistogramSize - 1
 // ----------
 void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
                               const u64 ExpectedNumBytes, char *&Buffer) {
   char *Ptr = Buffer;
   const u64 NumEntries = StackIds.Size();
   Ptr = WriteBytes(NumEntries, Ptr);
-
   for (u64 i = 0; i < NumEntries; i++) {
     const u64 Key = StackIds[i];
     MIBMapTy::Handle h(&MIBMap, Key, /*remove=*/true, /*create=*/false);
     CHECK(h.exists());
     Ptr = WriteBytes(Key, Ptr);
+    // FIXME: We unnecessarily serialize the AccessHistogram pointer. Adding a
+    // serialization schema will fix this issue. See also FIXME in
+    // deserialization.
     Ptr = WriteBytes((*h)->mib, Ptr);
+    for (u64 j = 0; j < (*h)->mib.AccessHistogramSize; ++j) {
+      u64 HistogramEntry = ((u64 *)((*h)->mib.AccessHistogram))[j];
+      Ptr = WriteBytes(HistogramEntry, Ptr);
+    }
+    if ((*h)->mib.AccessHistogramSize > 0) {
+      InternalFree((void *)((*h)->mib.AccessHistogram));
+    }
   }
-
   CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
         "Expected num bytes != actual bytes written");
 }
@@ -192,7 +206,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
 // ---------- MIB Entry
 // Alloc Count
 // ...
-// ----------
+//       ---- AccessHistogram Entry 0
+//            ...
+//       ---- AccessHistogram Entry AccessHistogramSize - 1
+// ---------- MIB Entry 1
+// Alloc Count
+// ...
+//       ---- AccessHistogram Entry 0
+//            ...
+//       ---- AccessHistogram Entry AccessHistogramSize - 1
 // Optional Padding Bytes
 // ---------- Stack Info
 // Num Entries
@@ -218,13 +240,26 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
   const u64 NumMIBInfoBytes = RoundUpTo(
       sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8);
 
+  // Get Number of AccessHistogram entries in total
+  u64 TotalAccessHistogramEntries = 0;
+  MIBMap.ForEach(
+      [](const uptr Key, UNUSED LockedMemInfoBlock *const &MIB, void *Arg) {
+        u64 *TotalAccessHistogramEntries = (u64 *)Arg;
+        *TotalAccessHistogramEntries += MIB->mib.AccessHistogramSize;
+      },
+      reinterpret_cast<void *>(&TotalAccessHistogramEntries));
+  const u64 NumHistogramBytes =
+      RoundUpTo(TotalAccessHistogramEntries * sizeof(uint64_t), 8);
+
   const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
 
   // Ensure that the profile is 8b aligned. We allow for some optional padding
   // at the end so that any subsequent profile serialized to the same file does
   // not incur unaligned accesses.
-  const u64 TotalSizeBytes = RoundUpTo(
-      sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8);
+  const u64 TotalSizeBytes =
+      RoundUpTo(sizeof(Header) + NumSegmentBytes + NumStackBytes +
+                    NumMIBInfoBytes + NumHistogramBytes,
+                8);
 
   // Allocate the memory for the entire buffer incl. info blocks.
   Buffer = (char *)InternalAlloc(TotalSizeBytes);
@@ -235,14 +270,16 @@ u64 SerializeToRawProfile(MIBMapTy &MIBMap, ArrayRef<LoadedModule> Modules,
                 static_cast<u64>(TotalSizeBytes),
                 sizeof(Header),
                 sizeof(Header) + NumSegmentBytes,
-                sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes};
+                sizeof(Header) + NumSegmentBytes + NumMIBInfoBytes +
+                    NumHistogramBytes};
   Ptr = WriteBytes(header, Ptr);
 
   SerializeSegmentsToBuffer(Modules, NumSegmentBytes, Ptr);
   Ptr += NumSegmentBytes;
 
-  SerializeMIBInfoToBuffer(MIBMap, StackIds, NumMIBInfoBytes, Ptr);
-  Ptr += NumMIBInfoBytes;
+  SerializeMIBInfoToBuffer(MIBMap, StackIds,
+                           NumMIBInfoBytes + NumHistogramBytes, Ptr);
+  Ptr += NumMIBInfoBytes + NumHistogramBytes;
 
   SerializeStackToBuffer(StackIds, NumStackBytes, Ptr);
 

diff  --git a/compiler-rt/lib/memprof/memprof_rtl.cpp b/compiler-rt/lib/memprof/memprof_rtl.cpp
index 0a63f813848ee..a87e8cefd6c40 100644
--- a/compiler-rt/lib/memprof/memprof_rtl.cpp
+++ b/compiler-rt/lib/memprof/memprof_rtl.cpp
@@ -32,6 +32,9 @@ uptr __memprof_shadow_memory_dynamic_address; // Global interface symbol.
 // Allow the user to specify a profile output file via the binary.
 SANITIZER_WEAK_ATTRIBUTE char __memprof_profile_filename[1];
 
+// Share ClHistogram compiler flag with runtime.
+SANITIZER_WEAK_ATTRIBUTE bool __memprof_histogram;
+
 namespace __memprof {
 
 static void MemprofDie() {
@@ -75,12 +78,23 @@ uptr kHighMemEnd;
 // exported functions
 
 #define MEMPROF_MEMORY_ACCESS_CALLBACK_BODY() __memprof::RecordAccess(addr);
+#define MEMPROF_MEMORY_ACCESS_CALLBACK_BODY_HIST()                             \
+  __memprof::RecordAccessHistogram(addr);
 
 #define MEMPROF_MEMORY_ACCESS_CALLBACK(type)                                   \
   extern "C" NOINLINE INTERFACE_ATTRIBUTE void __memprof_##type(uptr addr) {   \
     MEMPROF_MEMORY_ACCESS_CALLBACK_BODY()                                      \
   }
 
+#define MEMPROF_MEMORY_ACCESS_CALLBACK_HIST(type)                              \
+  extern "C" NOINLINE INTERFACE_ATTRIBUTE void __memprof_hist_##type(          \
+      uptr addr) {                                                             \
+    MEMPROF_MEMORY_ACCESS_CALLBACK_BODY_HIST()                                 \
+  }
+
+MEMPROF_MEMORY_ACCESS_CALLBACK_HIST(load)
+MEMPROF_MEMORY_ACCESS_CALLBACK_HIST(store)
+
 MEMPROF_MEMORY_ACCESS_CALLBACK(load)
 MEMPROF_MEMORY_ACCESS_CALLBACK(store)
 
@@ -260,11 +274,20 @@ void __memprof_record_access(void const volatile *addr) {
   __memprof::RecordAccess((uptr)addr);
 }
 
+void __memprof_record_access_hist(void const volatile *addr) {
+  __memprof::RecordAccessHistogram((uptr)addr);
+}
+
 void __memprof_record_access_range(void const volatile *addr, uptr size) {
   for (uptr a = (uptr)addr; a < (uptr)addr + size; a += kWordSize)
     __memprof::RecordAccess(a);
 }
 
+void __memprof_record_access_range_hist(void const volatile *addr, uptr size) {
+  for (uptr a = (uptr)addr; a < (uptr)addr + size; a += kWordSize)
+    __memprof::RecordAccessHistogram(a);
+}
+
 extern "C" SANITIZER_INTERFACE_ATTRIBUTE u16
 __sanitizer_unaligned_load16(const uu16 *p) {
   __memprof_record_access(p);

diff  --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp
index c5dfdca890be9..5764af9ce8afb 100644
--- a/compiler-rt/lib/memprof/tests/rawprofile.cpp
+++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp
@@ -95,7 +95,7 @@ TEST(MemProf, Basic) {
   // sizeof(MemInfoBlock) contains stack id + MeminfoBlock.
   EXPECT_EQ(StackOffset - MIBOffset, 8 + 2 * (8 + sizeof(MemInfoBlock)));
 
-  EXPECT_EQ(StackOffset, 408ULL);
+  EXPECT_EQ(StackOffset, 432ULL);
   // We expect 2 stack entries, with 5 frames - 8b for total count,
   // 2 * (8b for id, 8b for frame count and 5*8b for fake frames).
   // Since this is the last section, there may be additional padding at the end

diff  --git a/llvm/include/llvm/ProfileData/MIBEntryDef.inc b/llvm/include/llvm/ProfileData/MIBEntryDef.inc
index 794163ae10386..58c1fc4de4aba 100644
--- a/llvm/include/llvm/ProfileData/MIBEntryDef.inc
+++ b/llvm/include/llvm/ProfileData/MIBEntryDef.inc
@@ -51,3 +51,5 @@ MIBEntryDef(MaxAccessDensity = 22, MaxAccessDensity, uint32_t)
 MIBEntryDef(TotalLifetimeAccessDensity = 23, TotalLifetimeAccessDensity, uint64_t)
 MIBEntryDef(MinLifetimeAccessDensity = 24, MinLifetimeAccessDensity, uint32_t)
 MIBEntryDef(MaxLifetimeAccessDensity = 25, MaxLifetimeAccessDensity, uint32_t)
+MIBEntryDef(AccessHistogramSize = 26, AccessHistogramSize, uint32_t)
+MIBEntryDef(AccessHistogram = 27, AccessHistogram, uintptr_t)
\ No newline at end of file

diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 5cd74be9dc8c4..cee8e5ef4c25d 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -124,6 +124,13 @@ struct PortableMemInfoBlock {
   OS << "        " << #Name << ": " << Name << "\n";
 #include "llvm/ProfileData/MIBEntryDef.inc"
 #undef MIBEntryDef
+    if (AccessHistogramSize > 0) {
+      OS << "        " << "AccessHistogramValues" << ":";
+      for (uint32_t I = 0; I < AccessHistogramSize; ++I) {
+        OS << " " << ((uint64_t *)AccessHistogram)[I];
+      }
+      OS << "\n";
+    }
   }
 
   // Return the schema, only for unit tests.

diff  --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index b82a4baf6dd74..3f785bd23fce3 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -33,7 +33,13 @@
    (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
 
 // The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 3ULL
+#define MEMPROF_RAW_VERSION 4ULL
+
+// Currently supported versions.
+#define MEMPROF_RAW_SUPPORTED_VERSIONS                                         \
+  { 3ULL, 4ULL }
+
+#define MEMPROF_V3_MIB_SIZE 132ULL;
 
 #define MEMPROF_BUILDID_MAX_SIZE 32ULL
 
@@ -119,7 +125,8 @@ MemInfoBlock() {
 }
 
 MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
-             uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu)
+             uint32_t DeallocTs, uint32_t AllocCpu, uint32_t DeallocCpu,
+             uintptr_t Histogram, uint32_t HistogramSize)
     : MemInfoBlock() {
   AllocCount = 1U;
   TotalAccessCount = AccessCount;
@@ -149,6 +156,8 @@ MemInfoBlock(uint32_t Size, uint64_t AccessCount, uint32_t AllocTs,
   AllocCpuId = AllocCpu;
   DeallocCpuId = DeallocCpu;
   NumMigratedCpu = AllocCpuId != DeallocCpuId;
+  AccessHistogramSize = HistogramSize;
+  AccessHistogram = Histogram;
 }
 
 void Merge(const MemInfoBlock &newMIB) {
@@ -194,6 +203,24 @@ void Merge(const MemInfoBlock &newMIB) {
   NumSameDeallocCpu += DeallocCpuId == newMIB.DeallocCpuId;
   AllocCpuId = newMIB.AllocCpuId;
   DeallocCpuId = newMIB.DeallocCpuId;
+
+  // For merging histograms, we always keep the longer histogram, and add
+  // values of shorter histogram to larger one.
+  uintptr_t ShorterHistogram;
+  uint32_t ShorterHistogramSize;
+  if (newMIB.AccessHistogramSize > AccessHistogramSize) {
+    ShorterHistogram = AccessHistogram;
+    ShorterHistogramSize = AccessHistogramSize;
+    // Swap histogram of current to larger histogram
+    AccessHistogram = newMIB.AccessHistogram;
+    AccessHistogramSize = newMIB.AccessHistogramSize;
+  } else {
+    ShorterHistogram = newMIB.AccessHistogram;
+    ShorterHistogramSize = newMIB.AccessHistogramSize;
+  }
+  for (size_t i = 0; i < ShorterHistogramSize; ++i) {
+    ((uint64_t *)AccessHistogram)[i] += ((uint64_t *)ShorterHistogram)[i];
+  }
 }
 
 #ifdef _MSC_VER

diff  --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index fbba6483abe3e..da2f14b276ffb 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -137,7 +137,7 @@ class RawMemProfReader final : public MemProfReader {
 public:
   RawMemProfReader(const RawMemProfReader &) = delete;
   RawMemProfReader &operator=(const RawMemProfReader &) = delete;
-  ~RawMemProfReader() override = default;
+  virtual ~RawMemProfReader() override;
 
   // Prints the contents of the profile in YAML format.
   void printYAML(raw_ostream &OS);
@@ -205,8 +205,14 @@ class RawMemProfReader final : public MemProfReader {
 
   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
 
+  llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+  readMemInfoBlocks(const char *Ptr);
+
   // The profiled binary.
   object::OwningBinary<object::Binary> Binary;
+  // Version of raw memprof binary currently being read. Defaults to most up
+  // to date version.
+  uint64_t MemprofRawVersion = MEMPROF_RAW_VERSION;
   // The preferred load address of the executable segment.
   uint64_t PreferredTextSegmentAddress = 0;
   // The base address of the text segment in the process during profiling.

diff  --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index de58cb6331860..9112e19982eca 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -41,7 +41,6 @@
 #include "llvm/Support/Path.h"
 
 #define DEBUG_TYPE "memprof"
-
 namespace llvm {
 namespace memprof {
 namespace {
@@ -68,7 +67,14 @@ Error checkBuffer(const MemoryBuffer &Buffer) {
   const char *Next = Buffer.getBufferStart();
   while (Next < Buffer.getBufferEnd()) {
     const auto *H = reinterpret_cast<const Header *>(Next);
-    if (H->Version != MEMPROF_RAW_VERSION) {
+
+    // Check if the version in header is among the supported versions.
+    bool IsSupported = false;
+    for (auto SupportedVersion : MEMPROF_RAW_SUPPORTED_VERSIONS) {
+      if (H->Version == SupportedVersion)
+        IsSupported = true;
+    }
+    if (!IsSupported) {
       return make_error<InstrProfError>(instrprof_error::unsupported_version);
     }
 
@@ -96,19 +102,63 @@ llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
 }
 
 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
-readMemInfoBlocks(const char *Ptr) {
+readMemInfoBlocksV3(const char *Ptr) {
   using namespace support;
 
   const uint64_t NumItemsToRead =
-      endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+      endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+
   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
   for (uint64_t I = 0; I < NumItemsToRead; I++) {
     const uint64_t Id =
-        endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
-    const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
+        endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+
+    // We cheat a bit here and remove the const from cast to set the
+    // Histogram Pointer to newly allocated buffer. We also cheat, since V3 and
+    // V4 do not have the same fields. V3 is missing AccessHistogramSize and
+    // AccessHistogram. This means we read "dirty" data in here, but it should
+    // not segfault, since there will be callstack data placed after this in the
+    // binary format.
+    MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
+    // Overwrite dirty data.
+    MIB.AccessHistogramSize = 0;
+    MIB.AccessHistogram = 0;
+
     Items.push_back({Id, MIB});
+    // Only increment by the size of MIB in V3.
+    Ptr += MEMPROF_V3_MIB_SIZE;
+  }
+  return Items;
+}
+
+llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+readMemInfoBlocksV4(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+
+  llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    const uint64_t Id =
+        endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+    // We cheat a bit here and remove the const from cast to set the
+    // Histogram Pointer to newly allocated buffer.
+    MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
+
     // Only increment by size of MIB since readNext implicitly increments.
     Ptr += sizeof(MemInfoBlock);
+
+    if (MIB.AccessHistogramSize > 0) {
+      MIB.AccessHistogram =
+          (uintptr_t)malloc(MIB.AccessHistogramSize * sizeof(uint64_t));
+    }
+
+    for (uint64_t J = 0; J < MIB.AccessHistogramSize; J++) {
+      ((uint64_t *)MIB.AccessHistogram)[J] =
+          endian::readNext<uint64_t, llvm::endianness::little, unaligned>(Ptr);
+    }
+    Items.push_back({Id, MIB});
   }
   return Items;
 }
@@ -251,6 +301,16 @@ RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
   return std::move(Reader);
 }
 
+// We need to make sure that all leftover MIB histograms that have not been
+// freed by merge are freed here.
+RawMemProfReader::~RawMemProfReader() {
+  for (auto &[_, MIB] : CallstackProfileData) {
+    if (MemprofRawVersion >= 4ULL && MIB.AccessHistogramSize > 0) {
+      free((void *)MIB.AccessHistogram);
+    }
+  }
+}
+
 bool RawMemProfReader::hasFormat(const StringRef Path) {
   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
   if (!BufferOr)
@@ -281,7 +341,7 @@ void RawMemProfReader::printYAML(raw_ostream &OS) {
 
   OS << "MemprofProfile:\n";
   OS << "  Summary:\n";
-  OS << "    Version: " << MEMPROF_RAW_VERSION << "\n";
+  OS << "    Version: " << MemprofRawVersion << "\n";
   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
   OS << "    NumMibInfo: " << NumMibInfo << "\n";
   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
@@ -610,6 +670,20 @@ RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
   return BuildIds.takeVector();
 }
 
+// FIXME: Add a schema for serializing similiar to IndexedMemprofReader. This
+// will help being able to deserialize 
diff erent versions raw memprof versions
+// more easily.
+llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+RawMemProfReader::readMemInfoBlocks(const char *Ptr) {
+  if (MemprofRawVersion == 3ULL)
+    return readMemInfoBlocksV3(Ptr);
+  else if (MemprofRawVersion == 4ULL)
+    return readMemInfoBlocksV4(Ptr);
+  else
+    assert(false &&
+           "Panic: Unsupported version number when reading MemInfoBlocks");
+}
+
 Error RawMemProfReader::readRawProfile(
     std::unique_ptr<MemoryBuffer> DataBuffer) {
   const char *Next = DataBuffer->getBufferStart();
@@ -617,6 +691,10 @@ Error RawMemProfReader::readRawProfile(
   while (Next < DataBuffer->getBufferEnd()) {
     const auto *Header = reinterpret_cast<const memprof::Header *>(Next);
 
+    // Set Reader version to memprof raw version of profile. Checking if version
+    // is supported is checked before creating the reader.
+    MemprofRawVersion = Header->Version;
+
     // Read in the segment information, check whether its the same across all
     // profiles in this binary file.
     const llvm::SmallVector<SegmentEntry> Entries =
@@ -636,7 +714,21 @@ Error RawMemProfReader::readRawProfile(
     // stackdepot ids are the same.
     for (const auto &[Id, MIB] : readMemInfoBlocks(Next + Header->MIBOffset)) {
       if (CallstackProfileData.count(Id)) {
-        CallstackProfileData[Id].Merge(MIB);
+
+        if (MemprofRawVersion >= 4ULL &&
+            (CallstackProfileData[Id].AccessHistogramSize > 0 ||
+             MIB.AccessHistogramSize > 0)) {
+          uintptr_t ShorterHistogram;
+          if (CallstackProfileData[Id].AccessHistogramSize >
+              MIB.AccessHistogramSize)
+            ShorterHistogram = MIB.AccessHistogram;
+          else
+            ShorterHistogram = CallstackProfileData[Id].AccessHistogram;
+          CallstackProfileData[Id].Merge(MIB);
+          free((void *)ShorterHistogram);
+        } else {
+          CallstackProfileData[Id].Merge(MIB);
+        }
       } else {
         CallstackProfileData[Id] = MIB;
       }

diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 8a12fa19a3ded..1880928b0d522 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -77,6 +77,8 @@ constexpr char MemProfShadowMemoryDynamicAddress[] =
 
 constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";
 
+constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";
+
 // Command-line flags.
 
 static cl::opt<bool> ClInsertVersionCheck(
@@ -145,10 +147,14 @@ static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
 // override these hints anyway.
 static cl::opt<bool> ClMemProfMatchHotColdNew(
     "memprof-match-hot-cold-new",
-    cl::desc(
+ cl::desc(
         "Match allocation profiles onto existing hot/cold operator new calls"),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> ClHistogram("memprof-histogram",
+                                 cl::desc("Collect access count histograms"),
+                                 cl::Hidden, cl::init(false));
+
 static cl::opt<bool>
     ClPrintMemProfMatchInfo("memprof-print-match-info",
                             cl::desc("Print matching stats for each allocation "
@@ -279,6 +285,11 @@ ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
 
 PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
                                              AnalysisManager<Module> &AM) {
+
+  assert((!ClHistogram || (ClHistogram && ClUseCalls)) &&
+         "Cannot use -memprof-histogram without Callbacks. Set "
+         "memprof-use-callbacks");
+
   ModuleMemProfiler Profiler(M);
   if (Profiler.instrumentModule(M))
     return PreservedAnalyses::none();
@@ -508,7 +519,24 @@ void createProfileFileNameVar(Module &M) {
   }
 }
 
+// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible
+// to the runtime, changing shadow count behavior.
+void createMemprofHistogramFlagVar(Module &M) {
+  const StringRef VarName(MemProfHistogramFlagVar);
+  Type *IntTy1 = Type::getInt1Ty(M.getContext());
+  auto MemprofHistogramFlag = new GlobalVariable(
+      M, IntTy1, true, GlobalValue::WeakAnyLinkage,
+      Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);
+  Triple TT(M.getTargetTriple());
+  if (TT.supportsCOMDAT()) {
+    MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);
+    MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));
+  }
+  appendToCompilerUsed(M, MemprofHistogramFlag);
+}
+
 bool ModuleMemProfiler::instrumentModule(Module &M) {
+
   // Create a module constructor.
   std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);
   std::string VersionCheckName =
@@ -524,6 +552,8 @@ bool ModuleMemProfiler::instrumentModule(Module &M) {
 
   createProfileFileNameVar(M);
 
+  createMemprofHistogramFlagVar(M);
+
   return true;
 }
 
@@ -532,11 +562,12 @@ void MemProfiler::initializeCallbacks(Module &M) {
 
   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
     const std::string TypeStr = AccessIsWrite ? "store" : "load";
+    const std::string HistPrefix = ClHistogram ? "hist_" : "";
 
     SmallVector<Type *, 2> Args1{1, IntptrTy};
-    MemProfMemoryAccessCallback[AccessIsWrite] =
-        M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr,
-                              FunctionType::get(IRB.getVoidTy(), Args1, false));
+    MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(
+        ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,
+        FunctionType::get(IRB.getVoidTy(), Args1, false));
   }
   MemProfMemmove = M.getOrInsertFunction(
       ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);
@@ -1024,4 +1055,4 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   }
 
   return PreservedAnalyses::none();
-}
+}
\ No newline at end of file

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
index b10c2f9c72147..361354d7d0a3a 100755
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
index 790249a44b0d1..1ff4352a07d1f 100644
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe
index 3000e2b8515a2..e9e6897a4428e 100755
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw
index c0db6d2e0f56c..1ff4f1d9a5c01 100644
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.nocolinfo.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe
index 6e0eaafdaa4de..c9f81fc911151 100755
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw
index 43bd116cec4dd..c496a134bf3ce 100644
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_internal_linkage.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe
index 6bb4279baa7d3..d555a8cea0ad7 100755
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw
index e74f9663618c2..923d309a0e560 100644
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_loop_unroll.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
index 212f8f8ecce76..1b4ca7feb5619 100755
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
index 3a06639d3a2be..a2cfc3f93d669 100644
Binary files a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll b/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll
index 3d4b93c85ddf7..3c4138fc4ca49 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll
@@ -28,12 +28,12 @@ entry:
   store i32 0, ptr %retval, align 4
   store i32 %argc, ptr %argc.addr, align 4
   store ptr %argv, ptr %argv.addr, align 8
-  call void @_ZL3foov.__uniq.231888424933890731874095357293037629092() #4, !dbg !14
+  call void @_ZL3foov.__uniq.50354172613129440706982166615384819716() #4, !dbg !14
   ret i32 0, !dbg !15
 }
 
 ; Function Attrs: mustprogress noinline optnone uwtable
-define internal void @_ZL3foov.__uniq.231888424933890731874095357293037629092() #1 !dbg !16 {
+define internal void @_ZL3foov.__uniq.50354172613129440706982166615384819716() #1 !dbg !16 {
 entry:
   %a = alloca ptr, align 8
   %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 20) #5, !dbg !17
@@ -76,7 +76,7 @@ attributes #5 = { builtin allocsize(0) }
 !13 = !{}
 !14 = !DILocation(line: 8, column: 3, scope: !10)
 !15 = !DILocation(line: 9, column: 3, scope: !10)
-!16 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3foov.__uniq.231888424933890731874095357293037629092", scope: !11, file: !11, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0)
+!16 = distinct !DISubprogram(name: "foo", linkageName: "_ZL3foov.__uniq.50354172613129440706982166615384819716", scope: !11, file: !11, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0)
 !17 = !DILocation(line: 4, column: 12, scope: !16)
 !18 = !DILocation(line: 4, column: 8, scope: !16)
 !19 = !DILocation(line: 5, column: 10, scope: !16)

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe
new file mode 100755
index 0000000000000..502472c6977dd
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw
new file mode 100644
index 0000000000000..850da9e557111
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/basic-histogram.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe
index d03894b6d8e40..32b52c8b5be95 100755
Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw
index 62b7d299d3aa1..dae9b4d9c0795 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofexe
new file mode 100755
index 0000000000000..d03894b6d8e40
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofraw
new file mode 100644
index 0000000000000..62b7d299d3aa1
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/basic_v3.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe
index 5687d065d035d..3de26f6ee4213 100755
Binary files a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw
index 43f9ca99c28bb..56926cb9d32ed 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/buildid.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
index 766d2c4713136..2953080c3d52d 100755
Binary files a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw
index 26254ae67c7c2..453b059b33199 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe
index d5c3ec65b3ac7..3ba9f8a565ed3 100755
Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw
index a1018aae2fba4..792edb742303e 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe
new file mode 100755
index 0000000000000..fad3fc111a33e
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw
new file mode 100644
index 0000000000000..0d540ca94a5aa
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/padding-histogram.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe
index d854186380e73..b92ed10d24109 100755
Binary files a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe and b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw
index 992022fa9129c..1994b49eb15e5 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/pic.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh b/llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh
index 5365a0bb6cc3a..10c36a834c06a 100755
--- a/llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh
+++ b/llvm/test/tools/llvm-profdata/Inputs/update_memprof_inputs.sh
@@ -31,7 +31,7 @@ read -r -d '' INLINE << EOF
 
 __attribute__((always_inline))
 void qux(int x) {
-  char *ptr = malloc(x);
+  char *ptr = (char*) malloc(x);
   memset(ptr, 0, x);
   free(ptr);
 }
@@ -64,6 +64,190 @@ int main(int argc, char **argv) {
 }
 EOF
 
+read -r -d '' BASIC_HISTOGRAM << EOF
+struct A {
+  long int a;
+  long int b;
+  long int c;
+  long int d;
+  long int e;
+  long int f;
+  long int g;
+  long int h;
+  A() {};
+};
+
+void foo() {
+  long int acc = 0;
+  A *a = new A();
+  acc += a->a;
+  acc += a->b;
+  acc += a->c;
+  acc += a->d;
+  acc += a->e;
+  acc += a->f;
+  acc += a->g;
+  acc += a->h;
+  delete a;
+}
+void bar() {
+  long int acc = 0;
+  A *a = new A();
+  acc += a->a;
+  acc += a->a;
+  acc += a->a;
+  acc += a->a;
+  acc += a->a;
+  acc += a->a;
+  acc += a->a;
+  acc += a->a;
+  acc += a->b;
+  acc += a->b;
+  acc += a->b;
+  acc += a->b;
+  acc += a->b;
+  acc += a->b;
+  acc += a->b;
+  acc += a->c;
+  acc += a->c;
+  acc += a->c;
+  acc += a->c;
+  acc += a->c;
+  acc += a->c;
+  acc += a->d;
+  acc += a->d;
+  acc += a->d;
+  acc += a->d;
+  acc += a->d;
+  acc += a->e;
+  acc += a->e;
+  acc += a->e;
+  acc += a->e;
+  acc += a->f;
+  acc += a->f;
+  acc += a->f;
+  acc += a->g;
+  acc += a->g;
+  acc += a->h;
+
+  delete a;
+}
+
+int main(int argc, char **argv) {
+  long int acc = 0;
+  A *a = new A();
+  acc += a->a;
+  acc += a->b;
+  acc += a->c;
+  acc += a->d;
+  acc += a->e;
+  acc += a->f;
+  acc += a->g;
+  acc += a->h;
+
+  delete a;
+
+  A *b = new A();
+  acc += b->a;
+  acc += b->a;
+  acc += b->a;
+  acc += b->a;
+  acc += b->a;
+  acc += b->a;
+  acc += b->a;
+  acc += b->a;
+  acc += b->b;
+  acc += b->b;
+  acc += b->b;
+  acc += b->b;
+  acc += b->b;
+  acc += b->b;
+  acc += b->b;
+  acc += b->c;
+  acc += b->c;
+  acc += b->c;
+  acc += b->c;
+  acc += b->c;
+  acc += b->c;
+  acc += b->d;
+  acc += b->d;
+  acc += b->d;
+  acc += b->d;
+  acc += b->d;
+  acc += b->e;
+  acc += b->e;
+  acc += b->e;
+  acc += b->e;
+  acc += b->f;
+  acc += b->f;
+  acc += b->f;
+  acc += b->g;
+  acc += b->g;
+  acc += b->h;
+
+  delete b;
+
+  A *c = new A();
+  acc += c->a;
+
+  for (int i = 0; i < 21; ++i) {
+
+    foo();
+  }
+
+  for (int i = 0; i < 21; ++i) {
+
+    bar();
+  }
+
+  return 0;
+}
+EOF
+
+read -r -d '' PADDING_HISTOGRAM << EOF
+struct A {
+  char a;
+  char b;
+  long int c;
+  char d;
+  int e;
+  A() {};
+};
+
+struct B {
+  double x;
+  double y;
+  B() {};
+};
+
+struct C {
+  A a;
+  char z;
+  B b;
+  C() {};
+};
+
+int main(int argc, char **argv) {
+  long int acc = 0;
+
+  A *a = new A();
+  acc += a->a;
+  acc += a->b;
+  acc += a->c;
+  acc += a->d;
+  acc += a->e;
+
+  C *c = new C();
+  acc += c->a.a;
+  acc += c->a.a;
+  acc += c->b.x;
+  acc += c->b.y;
+
+  return 0;
+}
+EOF
+
+
 DEFAULT_MEMPROF_FLAGS="-fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie"
 
 # Map each test to their source and any additional flags separated by ; 
@@ -81,3 +265,20 @@ for name in "${!INPUTS[@]}"; do
   env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/${name}.memprofexe > ${OUTDIR}/${name}.memprofraw
   rm ${OUTDIR}/${name}.c
 done
+
+
+DEFAULT_HIST_FLAGS="${DEFAULT_MEMPROF_FLAGS} -mllvm -memprof-use-callbacks=true -mllvm -memprof-histogram"
+
+
+# Map each test to their source and any additional flags separated by ; 
+declare -A HISTOGRAM_INPUTS
+HISTOGRAM_INPUTS["basic-histogram"]="BASIC_HISTOGRAM"
+HISTOGRAM_INPUTS["padding-histogram"]="PADDING_HISTOGRAM"
+
+for name in "${!HISTOGRAM_INPUTS[@]}"; do
+  IFS=";" read -r src flags <<< "${HISTOGRAM_INPUTS[$name]}"
+  echo "${!src}" > ${OUTDIR}/${name}.c
+  ${CLANG} ${DEFAULT_HIST_FLAGS} ${flags} ${OUTDIR}/${name}.c -o ${OUTDIR}/${name}.memprofexe
+  env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/${name}.memprofexe > ${OUTDIR}/${name}.memprofraw
+  rm ${OUTDIR}/${name}.c
+done
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test b/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test
new file mode 100644
index 0000000000000..3d30a627bdd79
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/memprof-basic-histogram.test
@@ -0,0 +1,244 @@
+REQUIRES: x86_64-linux
+
+To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang
+RUN: llvm-profdata show --memory %p/Inputs/basic-histogram.memprofraw --profiled-binary %p/Inputs/basic-histogram.memprofexe -o - | FileCheck %s
+
+We expect 5 MIBs, each with 
diff erent AccessHistogramValues.
+
+CHECK: MemprofProfile:
+CHECK-NEXT:   Summary:
+CHECK-NEXT:     Version: 4
+CHECK-NEXT:     NumSegments: {{[0-9]+}}
+CHECK-NEXT:     NumMibInfo: 5
+CHECK-NEXT:     NumAllocFunctions: 3
+CHECK-NEXT:     NumStackOffsets: 5
+CHECK-NEXT:   Segments:
+CHECK-NEXT:   -
+CHECK-NEXT:     BuildId: {{[[:xdigit:]]+}}
+CHECK-NEXT:     Start: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     End: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     Offset: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:   -
+
+CHECK:   Records:
+CHECK-NEXT:   -
+CHECK-NEXT:     FunctionGUID: {{[0-9]+}}
+CHECK-NEXT:     AllocSites:
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 2
+CHECK-NEXT:         Column: 10
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 8
+CHECK-NEXT:         MinAccessCount: 8
+CHECK-NEXT:         MaxAccessCount: 8
+CHECK-NEXT:         TotalSize: 64
+CHECK-NEXT:         MinSize: 64
+CHECK-NEXT:         MaxSize: 64
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 12
+CHECK-NEXT:         MinAccessDensity: 12
+CHECK-NEXT:         MaxAccessDensity: 12
+CHECK-NEXT:         TotalLifetimeAccessDensity: 12000
+CHECK-NEXT:         MinLifetimeAccessDensity: 12000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 12000
+CHECK-NEXT:         AccessHistogramSize: 8
+CHECK-NEXT:         AccessHistogram: {{[0-9]+}}
+CHECK-NEXT:         AccessHistogramValues: 1 1 1 1 1 1 1 1
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 14
+CHECK-NEXT:         Column: 10
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 36
+CHECK-NEXT:         MinAccessCount: 36
+CHECK-NEXT:         MaxAccessCount: 36
+CHECK-NEXT:         TotalSize: 64
+CHECK-NEXT:         MinSize: 64
+CHECK-NEXT:         MaxSize: 64
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 56
+CHECK-NEXT:         MinAccessDensity: 56
+CHECK-NEXT:         MaxAccessDensity: 56
+CHECK-NEXT:         TotalLifetimeAccessDensity: 56000
+CHECK-NEXT:         MinLifetimeAccessDensity: 56000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 56000
+CHECK-NEXT:         AccessHistogramSize: 8
+CHECK-NEXT:         AccessHistogram: {{[0-9]+}}
+CHECK-NEXT:         AccessHistogramValues: 8 7 6 5 4 3 2 1
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 54
+CHECK-NEXT:         Column: 10
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 1
+CHECK-NEXT:         MinAccessCount: 1
+CHECK-NEXT:         MaxAccessCount: 1
+CHECK-NEXT:         TotalSize: 64
+CHECK-NEXT:         MinSize: 64
+CHECK-NEXT:         MaxSize: 64
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 1
+CHECK-NEXT:         MinAccessDensity: 1
+CHECK-NEXT:         MaxAccessDensity: 1
+CHECK-NEXT:         TotalLifetimeAccessDensity: 1000
+CHECK-NEXT:         MinLifetimeAccessDensity: 1000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 1000
+CHECK-NEXT:         AccessHistogramSize: 8
+CHECK-NEXT:         AccessHistogram: {{[0-9]+}}
+CHECK-NEXT:         AccessHistogramValues: 1 0 0 0 0 0 0 0
+CHECK-NEXT:     CallSites:
+CHECK-NEXT:     -
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 59
+CHECK-NEXT:         Column: 5
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 64
+CHECK-NEXT:         Column: 5
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:   -
+CHECK-NEXT:     FunctionGUID: {{[0-9]+}}
+CHECK-NEXT:     AllocSites:
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: _Z3foov
+CHECK-NEXT:         LineOffset: 2
+CHECK-NEXT:         Column: 10
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 59
+CHECK-NEXT:         Column: 5
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 21
+CHECK-NEXT:         TotalAccessCount: 168
+CHECK-NEXT:         MinAccessCount: 8
+CHECK-NEXT:         MaxAccessCount: 8
+CHECK-NEXT:         TotalSize: 1344
+CHECK-NEXT:         MinSize: 64
+CHECK-NEXT:         MaxSize: 64
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 20
+CHECK-NEXT:         NumSameDeallocCpu: 20
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 252
+CHECK-NEXT:         MinAccessDensity: 12
+CHECK-NEXT:         MaxAccessDensity: 12
+CHECK-NEXT:         TotalLifetimeAccessDensity: 252000
+CHECK-NEXT:         MinLifetimeAccessDensity: 12000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 12000
+CHECK-NEXT:         AccessHistogramSize: 8
+CHECK-NEXT:         AccessHistogram: {{[0-9]+}}
+CHECK-NEXT:         AccessHistogramValues: 21 21 21 21 21 21 21 21
+CHECK-NEXT:   -
+CHECK-NEXT:     FunctionGUID: {{[0-9]+}}
+CHECK-NEXT:     AllocSites:
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: _Z3barv
+CHECK-NEXT:         LineOffset: 2
+CHECK-NEXT:         Column: 10
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 64
+CHECK-NEXT:         Column: 5
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 21
+CHECK-NEXT:         TotalAccessCount: 756
+CHECK-NEXT:         MinAccessCount: 36
+CHECK-NEXT:         MaxAccessCount: 36
+CHECK-NEXT:         TotalSize: 1344
+CHECK-NEXT:         MinSize: 64
+CHECK-NEXT:         MaxSize: 64
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 20
+CHECK-NEXT:         NumSameDeallocCpu: 20
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 1176
+CHECK-NEXT:         MinAccessDensity: 56
+CHECK-NEXT:         MaxAccessDensity: 56
+CHECK-NEXT:         TotalLifetimeAccessDensity: 1176000
+CHECK-NEXT:         MinLifetimeAccessDensity: 56000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 56000
+CHECK-NEXT:         AccessHistogramSize: 8
+CHECK-NEXT:         AccessHistogram: {{[0-9]+}}
+CHECK-NEXT:         AccessHistogramValues: 168 147 126 105 84 63 42 21
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index 8eaa2fa1013f4..e15df50bc1657 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -8,7 +8,7 @@ additional allocations which do not originate from the main binary are pruned.
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:   Summary:
-CHECK-NEXT:     Version: 3
+CHECK-NEXT:     Version: 4
 CHECK-NEXT:     NumSegments: {{[0-9]+}}
 CHECK-NEXT:     NumMibInfo: 2
 CHECK-NEXT:     NumAllocFunctions: 1
@@ -59,6 +59,8 @@ CHECK-NEXT:         MaxAccessDensity: 20
 CHECK-NEXT:         TotalLifetimeAccessDensity: 20000
 CHECK-NEXT:         MinLifetimeAccessDensity: 20000
 CHECK-NEXT:         MaxLifetimeAccessDensity: 20000
+CHECK-NEXT:         AccessHistogramSize: 0
+CHECK-NEXT:         AccessHistogram: 0
 CHECK-NEXT:     -
 CHECK-NEXT:       Callstack:
 CHECK-NEXT:       -
@@ -93,3 +95,5 @@ CHECK-NEXT:         MaxAccessDensity: 20
 CHECK-NEXT:         TotalLifetimeAccessDensity: 20000
 CHECK-NEXT:         MinLifetimeAccessDensity: 20000
 CHECK-NEXT:         MaxLifetimeAccessDensity: 20000
+CHECK-NEXT:         AccessHistogramSize: 0
+CHECK-NEXT:         AccessHistogram: 0
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic_v3.test b/llvm/test/tools/llvm-profdata/memprof-basic_v3.test
new file mode 100644
index 0000000000000..3c10c574c4032
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/memprof-basic_v3.test
@@ -0,0 +1,102 @@
+REQUIRES: x86_64-linux
+
+This is a copy of memprof-basic.test with slight changes to check that we can still read v3 of memprofraw.
+
+Inputs cannot and should not be updated.
+
+RUN: llvm-profdata show --memory %p/Inputs/basic_v3.memprofraw --profiled-binary %p/Inputs/basic_v3.memprofexe -o - | FileCheck %s
+
+We expect 2 MIB entries, 1 each for the malloc calls in the program. Any
+additional allocations which do not originate from the main binary are pruned.
+
+CHECK:  MemprofProfile:
+CHECK-NEXT:   Summary:
+CHECK-NEXT:     Version: 3
+CHECK-NEXT:     NumSegments: {{[0-9]+}}
+CHECK-NEXT:     NumMibInfo: 2
+CHECK-NEXT:     NumAllocFunctions: 1
+CHECK-NEXT:     NumStackOffsets: 2
+CHECK-NEXT:   Segments:
+CHECK-NEXT:   -
+CHECK-NEXT:     BuildId: {{[[:xdigit:]]+}}
+CHECK-NEXT:     Start: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     End: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     Offset: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:   -
+
+CHECK:   Records:
+CHECK-NEXT:   -
+CHECK-NEXT:     FunctionGUID: {{[0-9]+}}
+CHECK-NEXT:     AllocSites:
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 1
+CHECK-NEXT:         Column: 21
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 2
+CHECK-NEXT:         MinAccessCount: 2
+CHECK-NEXT:         MaxAccessCount: 2
+CHECK-NEXT:         TotalSize: 10
+CHECK-NEXT:         MinSize: 10
+CHECK-NEXT:         MaxSize: 10
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 20
+CHECK-NEXT:         MinAccessDensity: 20
+CHECK-NEXT:         MaxAccessDensity: 20
+CHECK-NEXT:         TotalLifetimeAccessDensity: 20000
+CHECK-NEXT:         MinLifetimeAccessDensity: 20000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 20000
+CHECK-NEXT:         AccessHistogramSize: 0
+CHECK-NEXT:         AccessHistogram: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         SymbolName: main
+CHECK-NEXT:         LineOffset: 4
+CHECK-NEXT:         Column: 15
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 2
+CHECK-NEXT:         MinAccessCount: 2
+CHECK-NEXT:         MaxAccessCount: 2
+CHECK-NEXT:         TotalSize: 10
+CHECK-NEXT:         MinSize: 10
+CHECK-NEXT:         MaxSize: 10
+CHECK-NEXT:         AllocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         DeallocTimestamp: {{[0-9]+}}
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: {{[0-9]+}}
+CHECK-NEXT:         DeallocCpuId: {{[0-9]+}}
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
+CHECK-NEXT:         TotalAccessDensity: 20
+CHECK-NEXT:         MinAccessDensity: 20
+CHECK-NEXT:         MaxAccessDensity: 20
+CHECK-NEXT:         TotalLifetimeAccessDensity: 20000
+CHECK-NEXT:         MinLifetimeAccessDensity: 20000
+CHECK-NEXT:         MaxLifetimeAccessDensity: 20000
+CHECK-NEXT:         AccessHistogramSize: 0
+CHECK-NEXT:         AccessHistogram: 0
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test
index dd842c0542083..79ce2ad838482 100644
--- a/llvm/test/tools/llvm-profdata/memprof-inline.test
+++ b/llvm/test/tools/llvm-profdata/memprof-inline.test
@@ -5,7 +5,7 @@ RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:  Summary:
-CHECK-NEXT:    Version: 3
+CHECK-NEXT:    Version: 4
 CHECK-NEXT:    NumSegments: {{[0-9]+}}
 CHECK-NEXT:    NumMibInfo: 2
 CHECK-NEXT:    NumAllocFunctions: 2
@@ -20,25 +20,25 @@ CHECK-NEXT:  -
 
 CHECK:  Records:
 CHECK-NEXT:  -
-CHECK-NEXT:    FunctionGUID: 15505678318020221912
+CHECK-NEXT:    FunctionGUID: 3873612792189045660
 CHECK-NEXT:    AllocSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      Callstack:
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 15505678318020221912
-CHECK-NEXT:        SymbolName: qux
+CHECK-NEXT:        Function: 3873612792189045660
+CHECK-NEXT:        SymbolName: _Z3quxi
 CHECK-NEXT:        LineOffset: 1
-CHECK-NEXT:        Column: 15
+CHECK-NEXT:        Column: 23
 CHECK-NEXT:        Inline: 1
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 6699318081062747564
-CHECK-NEXT:        SymbolName: foo
+CHECK-NEXT:        Function: 1228452328526475178
+CHECK-NEXT:        SymbolName: _Z3fooi
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 18
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 16434608426314478903
-CHECK-NEXT:        SymbolName: bar
+CHECK-NEXT:        Function: 3727899762981752933
+CHECK-NEXT:        SymbolName: _Z3bari
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 19
 CHECK-NEXT:        Inline: 0
@@ -74,26 +74,30 @@ CHECK-NEXT:        MaxAccessDensity: 100
 CHECK-NEXT:        TotalLifetimeAccessDensity: 100000
 CHECK-NEXT:        MinLifetimeAccessDensity: 100000
 CHECK-NEXT:        MaxLifetimeAccessDensity: 100000
+CHECK-NEXT:        AccessHistogramSize: 0
+CHECK-NEXT:        AccessHistogram: 0
+
+
 CHECK-NEXT:  -
-CHECK-NEXT:    FunctionGUID: 6699318081062747564
+CHECK-NEXT:    FunctionGUID: 1228452328526475178
 CHECK-NEXT:    AllocSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      Callstack:
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 15505678318020221912
-CHECK-NEXT:        SymbolName: qux
+CHECK-NEXT:        Function: 3873612792189045660
+CHECK-NEXT:        SymbolName: _Z3quxi
 CHECK-NEXT:        LineOffset: 1
-CHECK-NEXT:        Column: 15
+CHECK-NEXT:        Column: 23
 CHECK-NEXT:        Inline: 1
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 6699318081062747564
-CHECK-NEXT:        SymbolName: foo
+CHECK-NEXT:        Function: 1228452328526475178
+CHECK-NEXT:        SymbolName: _Z3fooi
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 18
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 16434608426314478903
-CHECK-NEXT:        SymbolName: bar
+CHECK-NEXT:        Function: 3727899762981752933
+CHECK-NEXT:        SymbolName: _Z3bari
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 19
 CHECK-NEXT:        Inline: 0
@@ -129,28 +133,31 @@ CHECK-NEXT:        MaxAccessDensity: 100
 CHECK-NEXT:        TotalLifetimeAccessDensity: 100000
 CHECK-NEXT:        MinLifetimeAccessDensity: 100000
 CHECK-NEXT:        MaxLifetimeAccessDensity: 100000
+CHECK-NEXT:        AccessHistogramSize: 0
+CHECK-NEXT:        AccessHistogram: 0
+
 CHECK-NEXT:    CallSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 15505678318020221912
-CHECK-NEXT:        SymbolName: qux
+CHECK-NEXT:        Function: 3873612792189045660
+CHECK-NEXT:        SymbolName: _Z3quxi
 CHECK-NEXT:        LineOffset: 1
-CHECK-NEXT:        Column: 15
+CHECK-NEXT:        Column: 23
 CHECK-NEXT:        Inline: 1
 CHECK-NEXT:    -
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 6699318081062747564
-CHECK-NEXT:        SymbolName: foo
+CHECK-NEXT:        Function: 1228452328526475178
+CHECK-NEXT:        SymbolName: _Z3fooi
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 18
 CHECK-NEXT:        Inline: 0
 CHECK-NEXT:  -
-CHECK-NEXT:    FunctionGUID: 16434608426314478903
+CHECK-NEXT:    FunctionGUID: 3727899762981752933
 CHECK-NEXT:    CallSites:
 CHECK-NEXT:    -
 CHECK-NEXT:      -
-CHECK-NEXT:        Function: 16434608426314478903
-CHECK-NEXT:        SymbolName: bar
+CHECK-NEXT:        Function: 3727899762981752933
+CHECK-NEXT:        SymbolName: _Z3bari
 CHECK-NEXT:        LineOffset: 0
 CHECK-NEXT:        Column: 19
 CHECK-NEXT:        Inline: 0

diff  --git a/llvm/test/tools/llvm-profdata/memprof-multi.test b/llvm/test/tools/llvm-profdata/memprof-multi.test
index f3cdbd1f5266c..62439823defd0 100644
--- a/llvm/test/tools/llvm-profdata/memprof-multi.test
+++ b/llvm/test/tools/llvm-profdata/memprof-multi.test
@@ -7,7 +7,7 @@ We expect 2 MIB entries, 1 each for the malloc calls in the program.
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:  Summary:
-CHECK-NEXT:    Version: 3
+CHECK-NEXT:    Version: 4
 CHECK-NEXT:    NumSegments: {{[0-9]+}}
 CHECK-NEXT:    NumMibInfo: 2
 CHECK-NEXT:    NumAllocFunctions: 1

diff  --git a/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
new file mode 100644
index 0000000000000..4ba58e3c870d5
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/memprof-padding-histogram.test
@@ -0,0 +1,99 @@
+REQUIRES: x86_64-linux
+
+To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang
+RUN: llvm-profdata show --memory %p/Inputs/padding-histogram.memprofraw --profiled-binary %p/Inputs/padding-histogram.memprofexe -o - | FileCheck %s
+
+We expect 2 
diff erent MIBs with histogram values. This test is to make sure we properly deal with padding.
+
+CHECK: MemprofProfile:
+CHECK-NEXT:   Summary:
+CHECK-NEXT:     Version: 4
+CHECK-NEXT:     NumSegments: {{[0-9]+}}
+CHECK-NEXT:     NumMibInfo: 2
+CHECK-NEXT:     NumAllocFunctions: 1
+CHECK-NEXT:     NumStackOffsets: 2
+CHECK-NEXT:   Segments:
+CHECK-NEXT:   -
+CHECK-NEXT:     BuildId: {{[[:xdigit:]]+}}
+CHECK-NEXT:     Start: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     End: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:     Offset: 0x{{[[:xdigit:]]+}}
+CHECK-NEXT:   -
+
+CHECK:   Records:
+CHEC-NEXT    FunctionGUID: {{[0-9]+}}
+CHEC-NEXT    AllocSites:
+CHEC-NEXT    -
+CHEC-NEXT      Callstack:
+CHEC-NEXT      -
+CHEC-NEXT        Function: {{[0-9]+}}
+CHEC-NEXT        SymbolName: main
+CHEC-NEXT        LineOffset: 3
+CHEC-NEXT        Column: 10
+CHEC-NEXT        Inline: 0
+CHEC-NEXT      MemInfoBlock:
+CHEC-NEXT        AllocCount: 1
+CHEC-NEXT        TotalAccessCount: 5
+CHEC-NEXT        MinAccessCount: 5
+CHEC-NEXT        MaxAccessCount: 5
+CHEC-NEXT        TotalSize: 24
+CHEC-NEXT        MinSize: 24
+CHEC-NEXT        MaxSize: 24
+CHEC-NEXT        AllocTimestamp: {{[0-9]+}}
+CHEC-NEXT        DeallocTimestamp: {{[0-9]+}}
+CHEC-NEXT        TotalLifetime: 0
+CHEC-NEXT        MinLifetime: 0
+CHEC-NEXT        MaxLifetime: 0
+CHEC-NEXT        AllocCpuId: 11
+CHEC-NEXT        DeallocCpuId: 11
+CHEC-NEXT        NumMigratedCpu: 0
+CHEC-NEXT        NumLifetimeOverlaps: 0
+CHEC-NEXT        NumSameAllocCpu: 0
+CHEC-NEXT        NumSameDeallocCpu: 0
+CHEC-NEXT        DataTypeId: 0
+CHEC-NEXT        TotalAccessDensity: 20
+CHEC-NEXT        MinAccessDensity: 20
+CHEC-NEXT        MaxAccessDensity: 20
+CHEC-NEXT        TotalLifetimeAccessDensity: 20000
+CHEC-NEXT        MinLifetimeAccessDensity: 20000
+CHEC-NEXT        MaxLifetimeAccessDensity: 20000
+CHEC-NEXT        AccessHistogramSize: 3
+CHEC-NEXT        AccessHistogram: {{[0-9]+}}
+CHEC-NEXT        AccessHistogramValues: -2 -1 -2
+CHEC-NEXT    -
+CHEC-NEXT      Callstack:
+CHEC-NEXT      -
+CHEC-NEXT        Function: {{[0-9]+}}
+CHEC-NEXT        SymbolName: main
+CHEC-NEXT        LineOffset: 10
+CHEC-NEXT        Column: 10
+CHEC-NEXT        Inline: 0
+CHEC-NEXT      MemInfoBlock:
+CHEC-NEXT        AllocCount: 1
+CHEC-NEXT        TotalAccessCount: 4
+CHEC-NEXT        MinAccessCount: 4
+CHEC-NEXT        MaxAccessCount: 4
+CHEC-NEXT        TotalSize: 48
+CHEC-NEXT        MinSize: 48
+CHEC-NEXT        MaxSize: 48
+CHEC-NEXT        AllocTimestamp: {{[0-9]+}}
+CHEC-NEXT        DeallocTimestamp: {{[0-9]+}}
+CHEC-NEXT        TotalLifetime: 0
+CHEC-NEXT        MinLifetime: 0
+CHEC-NEXT        MaxLifetime: 0
+CHEC-NEXT        AllocCpuId: 11
+CHEC-NEXT        DeallocCpuId: 11
+CHEC-NEXT        NumMigratedCpu: 0
+CHEC-NEXT        NumLifetimeOverlaps: 0
+CHEC-NEXT        NumSameAllocCpu: 0
+CHEC-NEXT        NumSameDeallocCpu: 0
+CHEC-NEXT        DataTypeId: 0
+CHEC-NEXT        TotalAccessDensity: 8
+CHEC-NEXT        MinAccessDensity: 8
+CHEC-NEXT        MaxAccessDensity: 8
+CHEC-NEXT        TotalLifetimeAccessDensity: 8000
+CHEC-NEXT        MinLifetimeAccessDensity: 8000
+CHEC-NEXT        MaxLifetimeAccessDensity: 8000
+CHEC-NEXT        AccessHistogramSize: 6
+CHEC-NEXT        AccessHistogram: {{[0-9]+}}
+CHEC-NEXT        AccessHistogramValues: -2 -0 -0 -0 -1 -1
\ No newline at end of file

diff  --git a/llvm/test/tools/llvm-profdata/memprof-pic.test b/llvm/test/tools/llvm-profdata/memprof-pic.test
index 03c98ed9200d9..78d2c5c54feb1 100644
--- a/llvm/test/tools/llvm-profdata/memprof-pic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-pic.test
@@ -11,7 +11,7 @@ RUN: llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/I
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:   Summary:
-CHECK-NEXT:     Version: 3
+CHECK-NEXT:     Version: 4
 CHECK-NEXT:     NumSegments: {{[0-9]+}}
 CHECK-NEXT:     NumMibInfo: 2
 CHECK-NEXT:     NumAllocFunctions: 1
@@ -62,6 +62,9 @@ CHECK-NEXT:         MaxAccessDensity: 20
 CHECK-NEXT:         TotalLifetimeAccessDensity: 20000
 CHECK-NEXT:         MinLifetimeAccessDensity: 20000
 CHECK-NEXT:         MaxLifetimeAccessDensity: 20000
+CHECK-NEXT:         AccessHistogramSize: 0
+CHECK-NEXT:         AccessHistogram: 0
+
 CHECK-NEXT:     -
 CHECK-NEXT:       Callstack:
 CHECK-NEXT:       -
@@ -96,3 +99,5 @@ CHECK-NEXT:         MaxAccessDensity: 20
 CHECK-NEXT:         TotalLifetimeAccessDensity: 20000
 CHECK-NEXT:         MinLifetimeAccessDensity: 20000
 CHECK-NEXT:         MaxLifetimeAccessDensity: 20000
+CHECK-NEXT:         AccessHistogramSize: 0
+CHECK-NEXT:         AccessHistogram: 0
\ No newline at end of file

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 15eb59ee00c94..14623e180f6c8 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -247,7 +247,7 @@ TEST(MemProf, FillsValue) {
 TEST(MemProf, PortableWrapper) {
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
-                    /*dealloc_cpu=*/4);
+                    /*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0);
 
   const auto Schema = llvm::memprof::getFullSchema();
   PortableMemInfoBlock WriteBlock(Info, Schema);
@@ -276,7 +276,7 @@ TEST(MemProf, RecordSerializationRoundTripVersion0And1) {
 
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
-                    /*dealloc_cpu=*/4);
+                    /*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0);
 
   llvm::SmallVector<llvm::SmallVector<FrameId>> AllocCallStacks = {
       {0x123, 0x345}, {0x123, 0x567}};
@@ -310,7 +310,7 @@ TEST(MemProf, RecordSerializationRoundTripVerion2) {
 
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
-                    /*dealloc_cpu=*/4);
+                    /*dealloc_cpu=*/4, /*Histogram=*/0, /*HistogramSize=*/0);
 
   llvm::SmallVector<llvm::memprof::CallStackId> CallStackIds = {0x123, 0x456};
 


        


More information about the llvm-commits mailing list