[llvm] 11314f4 - [memprof] Filter out callstack frames which cannot be symbolized.

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 4 11:10:57 PST 2022


Author: Snehasish Kumar
Date: 2022-03-04T11:10:08-08:00
New Revision: 11314f405903482c5a09e29922db80305e7c20d4

URL: https://github.com/llvm/llvm-project/commit/11314f405903482c5a09e29922db80305e7c20d4
DIFF: https://github.com/llvm/llvm-project/commit/11314f405903482c5a09e29922db80305e7c20d4.diff

LOG: [memprof] Filter out callstack frames which cannot be symbolized.

This patch filters out callstack frames which can't be symbolized or if
the frames belong to the runtime. Symbolization may not be possible if
debug information is unavailable or if the addresses are from a shared
library. For now we only support optimization of the main binary which
is statically linked to the compiler runtime.

Differential Revision: https://reviews.llvm.org/D120860

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/RawMemProfReader.h
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/test/tools/llvm-profdata/memprof-basic.test
    llvm/test/tools/llvm-profdata/memprof-multi.test
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index b0b832a0baef4..944d71386a4b2 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -81,7 +81,7 @@ class RawMemProfReader {
 
     // If there is an error here then the mock symbolizer has not been
     // initialized properly.
-    if (Error E = symbolizeStackFrames())
+    if (Error E = symbolizeAndFilterStackFrames())
       report_fatal_error(std::move(E));
   }
 
@@ -91,7 +91,11 @@ class RawMemProfReader {
       : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {}
   Error initialize();
   Error readRawProfile();
-  Error symbolizeStackFrames();
+  // Symbolize and cache all the virtual addresses we encounter in the
+  // callstacks from the raw profile. Also prune callstack frames which we can't
+  // symbolize or those that belong to the runtime. For profile entries where
+  // the entire callstack is pruned, we drop the entry from the profile.
+  Error symbolizeAndFilterStackFrames();
 
   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
   Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB,

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 512d6caf7d90d..72c6f2f4a68f0 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -10,10 +10,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <algorithm>
 #include <cstdint>
 #include <type_traits>
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
@@ -26,6 +29,7 @@
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/RawMemProfReader.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Path.h"
 
 #define DEBUG_TYPE "memprof"
 
@@ -168,6 +172,11 @@ Error report(Error E, const StringRef Context) {
   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
                     std::move(E));
 }
+
+bool isRuntimePath(const StringRef Path) {
+  return StringRef(llvm::sys::path::convert_to_slash(Path))
+      .contains("memprof/memprof_");
+}
 } // namespace
 
 Expected<std::unique_ptr<RawMemProfReader>>
@@ -218,6 +227,9 @@ bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
 
 void RawMemProfReader::printYAML(raw_ostream &OS) {
   OS << "MemprofProfile:\n";
+  // TODO: Update printSummaries to print out the data after the profile has
+  // been symbolized and pruned. We can parse some raw profile characteristics
+  // from the data buffer for additional information.
   printSummaries(OS);
   // Print out the merged contents of the profiles.
   OS << "  Records:\n";
@@ -276,19 +288,28 @@ Error RawMemProfReader::initialize() {
   if (Error E = readRawProfile())
     return E;
 
-  return symbolizeStackFrames();
+  return symbolizeAndFilterStackFrames();
 }
 
-Error RawMemProfReader::symbolizeStackFrames() {
+Error RawMemProfReader::symbolizeAndFilterStackFrames() {
   // The specifier to use when symbolization is requested.
   const DILineInfoSpecifier Specifier(
       DILineInfoSpecifier::FileLineInfoKind::RawValue,
       DILineInfoSpecifier::FunctionNameKind::LinkageName);
 
-  for (const auto &Entry : StackMap) {
+  // For entries where all PCs in the callstack are discarded, we erase the
+  // entry from the stack map.
+  llvm::SmallVector<uint64_t> EntriesToErase;
+  // We keep track of all prior discarded entries so that we can avoid invoking
+  // the symbolizer for such entries.
+  llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
+  for (auto &Entry : StackMap) {
     for (const uint64_t VAddr : Entry.getSecond()) {
-      // Check if we have already symbolized and cached the result.
-      if (SymbolizedFrame.count(VAddr) > 0)
+      // Check if we have already symbolized and cached the result or if we
+      // don't want to attempt symbolization since we know this address is bad.
+      // In this case the address is also removed from the current callstack.
+      if (SymbolizedFrame.count(VAddr) > 0 ||
+          AllVAddrsToDiscard.contains(VAddr))
         continue;
 
       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
@@ -297,6 +318,13 @@ Error RawMemProfReader::symbolizeStackFrames() {
         return DIOr.takeError();
       DIInliningInfo DI = DIOr.get();
 
+      // Drop frames which we can't symbolize or if they belong to the runtime.
+      if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
+          isRuntimePath(DI.getFrame(0).FileName)) {
+        AllVAddrsToDiscard.insert(VAddr);
+        continue;
+      }
+
       for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
         const auto &Frame = DI.getFrame(I);
         SymbolizedFrame[VAddr].emplace_back(
@@ -311,7 +339,28 @@ Error RawMemProfReader::symbolizeStackFrames() {
             I != 0);
       }
     }
+
+    auto &CallStack = Entry.getSecond();
+    CallStack.erase(std::remove_if(CallStack.begin(), CallStack.end(),
+                                   [&AllVAddrsToDiscard](const uint64_t A) {
+                                     return AllVAddrsToDiscard.contains(A);
+                                   }),
+                    CallStack.end());
+    if (CallStack.empty())
+      EntriesToErase.push_back(Entry.getFirst());
+  }
+
+  // Drop the entries where the callstack is empty.
+  for (const uint64_t Id : EntriesToErase) {
+    StackMap.erase(Id);
+    ProfileData.erase(Id);
   }
+
+  if (StackMap.empty())
+    return make_error<InstrProfError>(
+        instrprof_error::malformed,
+        "no entries in callstack map after symbolization");
+
   return Error::success();
 }
 

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index ccb5b4a438796..af22c3b6c39b3 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -33,8 +33,8 @@ env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw
 
 RUN: llvm-profdata show --memory %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o - | FileCheck %s
 
-We expect 3 MIB entries, 1 each for the malloc calls in the program and one
-additional entry from a realloc in glibc/libio/vasprintf.c.
+We expect 2 MIB entries, 1 each for the malloc calls in the program. Any
+additional allocations which do not originate from the main binary are pruned.
 
 CHECK: MemprofProfile:
 CHECK-NEXT:   -
@@ -49,51 +49,9 @@ CHECK-NEXT:   -
 CHECK-NEXT:     Callstack:
 CHECK-NEXT:     -
 CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 73
-CHECK-NEXT:       Column: 3
-CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 0
-CHECK-NEXT:       Column: 0
-CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     MemInfoBlock:
-CHECK-NEXT:       AllocCount: 1
-CHECK-NEXT:       TotalAccessCount: 0
-CHECK-NEXT:       MinAccessCount: 0
-CHECK-NEXT:       MaxAccessCount: 0
-CHECK-NEXT:       TotalSize: 53
-CHECK-NEXT:       MinSize: 53
-CHECK-NEXT:       MaxSize: 53
-CHECK-NEXT:       AllocTimestamp: 0
-CHECK-NEXT:       DeallocTimestamp: 987
-CHECK-NEXT:       TotalLifetime: 987
-CHECK-NEXT:       MinLifetime: 987
-CHECK-NEXT:       MaxLifetime: 987
-CHECK-NEXT:       AllocCpuId: 4294967295
-CHECK-NEXT:       DeallocCpuId: 56
-CHECK-NEXT:       NumMigratedCpu: 1
-CHECK-NEXT:       NumLifetimeOverlaps: 0
-CHECK-NEXT:       NumSameAllocCpu: 0
-CHECK-NEXT:       NumSameDeallocCpu: 0
-CHECK-NEXT:       DataTypeId: {{[0-9]+}}
-CHECK-NEXT:   -
-CHECK-NEXT:     Callstack:
-CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 57
-CHECK-NEXT:       Column: 3
-CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
 CHECK-NEXT:       LineOffset: 1
 CHECK-NEXT:       Column: 21
 CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 0
-CHECK-NEXT:       Column: 0
-CHECK-NEXT:       Inline: 0
 CHECK-NEXT:     MemInfoBlock:
 CHECK-NEXT:       AllocCount: 1
 CHECK-NEXT:       TotalAccessCount: 2
@@ -118,19 +76,9 @@ CHECK-NEXT:   -
 CHECK-NEXT:     Callstack:
 CHECK-NEXT:     -
 CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 57
-CHECK-NEXT:       Column: 3
-CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
 CHECK-NEXT:       LineOffset: 5
 CHECK-NEXT:       Column: 15
 CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 0
-CHECK-NEXT:       Column: 0
-CHECK-NEXT:       Inline: 0
 CHECK-NEXT:     MemInfoBlock:
 CHECK-NEXT:       AllocCount: 1
 CHECK-NEXT:       TotalAccessCount: 2

diff  --git a/llvm/test/tools/llvm-profdata/memprof-multi.test b/llvm/test/tools/llvm-profdata/memprof-multi.test
index 3643b54039038..17641136dcbed 100644
--- a/llvm/test/tools/llvm-profdata/memprof-multi.test
+++ b/llvm/test/tools/llvm-profdata/memprof-multi.test
@@ -35,8 +35,7 @@ env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > multi.memprofraw
 
 RUN: llvm-profdata show --memory %p/Inputs/multi.memprofraw --profiled-binary %p/Inputs/multi.memprofexe -o - | FileCheck %s
 
-We expect 2 MIB entries, 1 each for the malloc calls in the program. Unlike the
-memprof-basic.test we do not see any allocation from glibc.
+We expect 2 MIB entries, 1 each for the malloc calls in the program.
 
 CHECK: MemprofProfile:
 CHECK-NEXT:   -

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 5fee174404071..fd310428f0692 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -62,6 +62,7 @@ struct MockInfo {
   uint32_t Line;
   uint32_t StartLine;
   uint32_t Column;
+  std::string FileName = "valid/path.cc";
 };
 DIInliningInfo makeInliningInfo(std::initializer_list<MockInfo> MockFrames) {
   DIInliningInfo Result;
@@ -71,6 +72,7 @@ DIInliningInfo makeInliningInfo(std::initializer_list<MockInfo> MockFrames) {
     Frame.Line = Item.Line;
     Frame.StartLine = Item.StartLine;
     Frame.Column = Item.Column;
+    Frame.FileName = Item.FileName;
     Result.addFrame(Frame);
   }
   return Result;
@@ -234,4 +236,58 @@ TEST(MemProf, RecordSerializationRoundTrip) {
   EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0]));
   EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1]));
 }
+
+TEST(MemProf, SymbolizationFilter) {
+  std::unique_ptr<MockSymbolizer> Symbolizer(new MockSymbolizer());
+
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x1000},
+                                                specifier(), false))
+      .Times(1) // once since we don't lookup invalid PCs repeatedly.
+      .WillRepeatedly(Return(makeInliningInfo({
+          {"malloc", 70, 57, 3, "memprof/memprof_malloc_linux.cpp"},
+      })));
+
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000},
+                                                specifier(), false))
+      .Times(1) // once since we don't lookup invalid PCs repeatedly.
+      .WillRepeatedly(Return(makeInliningInfo({
+          {"new", 70, 57, 3, "memprof/memprof_new_delete.cpp"},
+      })));
+
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x3000},
+                                                specifier(), false))
+      .Times(1) // once since we don't lookup invalid PCs repeatedly.
+      .WillRepeatedly(Return(makeInliningInfo({
+          {DILineInfo::BadString, 0, 0, 0},
+      })));
+
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x4000},
+                                                specifier(), false))
+      .Times(1)
+      .WillRepeatedly(Return(makeInliningInfo({
+          {"foo", 10, 5, 30},
+      })));
+
+  CallStackMap CSM;
+  CSM[0x1] = {0x1000, 0x2000, 0x3000, 0x4000};
+  // This entry should be dropped since all PCs are either not
+  // symbolizable or belong to the runtime.
+  CSM[0x2] = {0x1000, 0x2000};
+
+  llvm::MapVector<uint64_t, MemInfoBlock> Prof;
+  Prof[0x1].AllocCount = 1;
+  Prof[0x2].AllocCount = 1;
+
+  auto Seg = makeSegments();
+
+  RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
+
+  std::vector<MemProfRecord> Records;
+  for (const MemProfRecord &R : Reader) {
+    Records.push_back(R);
+  }
+  ASSERT_EQ(Records.size(), 1U);
+  ASSERT_EQ(Records[0].CallStack.size(), 1U);
+  EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false));
+}
 } // namespace


        


More information about the llvm-commits mailing list