[llvm] 87f5e22 - [MemProf] Tolerate missing leaf debug frames (#71233)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 3 21:01:11 PDT 2023


Author: Teresa Johnson
Date: 2023-11-03T21:01:07-07:00
New Revision: 87f5e22987f509b7bd5b67eb2a19697508077f25

URL: https://github.com/llvm/llvm-project/commit/87f5e22987f509b7bd5b67eb2a19697508077f25
DIFF: https://github.com/llvm/llvm-project/commit/87f5e22987f509b7bd5b67eb2a19697508077f25.diff

LOG: [MemProf] Tolerate missing leaf debug frames (#71233)

Loosen up the matching so that a missing leaf debug frame in the profile
does not prevent matching an allocation context if we can match further
up the inlined call context. This relies on the pre-inliner, which was
already the default when performing normal PGO feedback along with the
MemProf feedback, but to ensure matching is not affected by the presence
of PGO, enable the pre-inliner for MemProf feedback as well.

Added: 
    llvm/test/Other/new-pm-memprof.ll
    llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
    llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll

Modified: 
    llvm/include/llvm/Passes/PassBuilder.h
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
    llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 2c7ceda7998eda1..23bc891a8f1e97c 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -628,11 +628,16 @@ class PassBuilder {
   Error parseModulePassPipeline(ModulePassManager &MPM,
                                 ArrayRef<PipelineElement> Pipeline);
 
+  // Adds passes to do pre-inlining and related cleanup passes before
+  // profile instrumentation/matching (to enable better context sensitivity),
+  // and for memprof to enable better matching with missing debug frames.
+  void addPreInlinerPasses(ModulePassManager &MPM, OptimizationLevel Level,
+                           ThinOrFullLTOPhase LTOPhase);
+
   void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level,
                          bool RunProfileGen, bool IsCS,
                          bool AtomicCounterUpdate, std::string ProfileFile,
                          std::string ProfileRemappingFile,
-                         ThinOrFullLTOPhase LTOPhase,
                          IntrusiveRefCntPtr<vfs::FileSystem> FS);
 
   // Extension Point callbacks

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index baea2913338cda7..f4a4802bcb649f3 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -733,47 +733,52 @@ void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
   MPM.addPass(NameAnonGlobalPass());
 }
 
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
-                                    OptimizationLevel Level, bool RunProfileGen,
-                                    bool IsCS, bool AtomicCounterUpdate,
-                                    std::string ProfileFile,
-                                    std::string ProfileRemappingFile,
-                                    ThinOrFullLTOPhase LTOPhase,
-                                    IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
+                                      OptimizationLevel Level,
+                                      ThinOrFullLTOPhase LTOPhase) {
   assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
-  if (!IsCS && !DisablePreInliner) {
-    InlineParams IP;
+  if (DisablePreInliner)
+    return;
+  InlineParams IP;
 
-    IP.DefaultThreshold = PreInlineThreshold;
+  IP.DefaultThreshold = PreInlineThreshold;
 
-    // FIXME: The hint threshold has the same value used by the regular inliner
-    // when not optimzing for size. This should probably be lowered after
-    // performance testing.
-    // FIXME: this comment is cargo culted from the old pass manager, revisit).
-    IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
-    ModuleInlinerWrapperPass MIWP(
-        IP, /* MandatoryFirst */ true,
-        InlineContext{LTOPhase, InlinePass::EarlyInliner});
-    CGSCCPassManager &CGPipeline = MIWP.getPM();
+  // FIXME: The hint threshold has the same value used by the regular inliner
+  // when not optimzing for size. This should probably be lowered after
+  // performance testing.
+  // FIXME: this comment is cargo culted from the old pass manager, revisit).
+  IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
+  ModuleInlinerWrapperPass MIWP(
+      IP, /* MandatoryFirst */ true,
+      InlineContext{LTOPhase, InlinePass::EarlyInliner});
+  CGSCCPassManager &CGPipeline = MIWP.getPM();
 
-    FunctionPassManager FPM;
-    FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
-    FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
-    FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
-        true)));                    // Merge & remove basic blocks.
-    FPM.addPass(InstCombinePass()); // Combine silly sequences.
-    invokePeepholeEPCallbacks(FPM, Level);
+  FunctionPassManager FPM;
+  FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+  FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
+  FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+      true)));                    // Merge & remove basic blocks.
+  FPM.addPass(InstCombinePass()); // Combine silly sequences.
+  invokePeepholeEPCallbacks(FPM, Level);
 
-    CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
-        std::move(FPM), PTO.EagerlyInvalidateAnalyses));
+  CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+      std::move(FPM), PTO.EagerlyInvalidateAnalyses));
 
-    MPM.addPass(std::move(MIWP));
+  MPM.addPass(std::move(MIWP));
 
-    // Delete anything that is now dead to make sure that we don't instrument
-    // dead code. Instrumentation can end up keeping dead code around and
-    // dramatically increase code size.
-    MPM.addPass(GlobalDCEPass());
-  }
+  // Delete anything that is now dead to make sure that we don't instrument
+  // dead code. Instrumentation can end up keeping dead code around and
+  // dramatically increase code size.
+  MPM.addPass(GlobalDCEPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
+                                    OptimizationLevel Level, bool RunProfileGen,
+                                    bool IsCS, bool AtomicCounterUpdate,
+                                    std::string ProfileFile,
+                                    std::string ProfileRemappingFile,
+                                    IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+  assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
 
   if (!RunProfileGen) {
     assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
@@ -1104,6 +1109,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
                                                 PTO.EagerlyInvalidateAnalyses));
 
+  // Invoke the pre-inliner passes for instrumentation PGO or MemProf.
+  if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+      (PGOOpt->Action == PGOOptions::IRInstr ||
+       PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty()))
+    addPreInlinerPasses(MPM, Level, Phase);
+
   // Add all the requested passes for instrumentation PGO, if requested.
   if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
       (PGOOpt->Action == PGOOptions::IRInstr ||
@@ -1111,7 +1122,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
     addPGOInstrPasses(MPM, Level,
                       /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
                       /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
-                      PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase,
+                      PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
                       PGOOpt->FS);
     MPM.addPass(PGOIndirectCallPromotion(false, false));
   }
@@ -1332,12 +1343,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
-                        LTOPhase, PGOOpt->FS);
+                        PGOOpt->FS);
     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
-                        LTOPhase, PGOOpt->FS);
+                        PGOOpt->FS);
   }
 
   // Re-compute GlobalsAA here prior to function passes. This is particularly
@@ -1831,12 +1842,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
                         PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
-                        ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
+                        PGOOpt->FS);
     else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
       addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
                         /*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
                         PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
-                        ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
+                        PGOOpt->FS);
   }
 
   // Break up allocas

diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 2b29ea2a65fdc87..eb9866727deef59 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -808,19 +808,19 @@ static void readMemprof(Module &M, Function &F,
         auto CalleeGUID = Function::getGUID(Name);
         auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
                                       ProfileHasColumns ? DIL->getColumn() : 0);
-        // LeafFound will only be false on the first iteration, since we either
-        // set it true or break out of the loop below.
+        // Check if we have found the profile's leaf frame. If yes, collect
+        // the rest of the call's inlined context starting here. If not, see if
+        // we find a match further up the inlined context (in case the profile
+        // was missing debug frames at the leaf).
         if (!LeafFound) {
           AllocInfoIter = LocHashToAllocInfo.find(StackId);
           CallSitesIter = LocHashToCallSites.find(StackId);
-          // Check if the leaf is in one of the maps. If not, no need to look
-          // further at this call.
-          if (AllocInfoIter == LocHashToAllocInfo.end() &&
-              CallSitesIter == LocHashToCallSites.end())
-            break;
-          LeafFound = true;
+          if (AllocInfoIter != LocHashToAllocInfo.end() ||
+              CallSitesIter != LocHashToCallSites.end())
+            LeafFound = true;
         }
-        InlinedCallStack.push_back(StackId);
+        if (LeafFound)
+          InlinedCallStack.push_back(StackId);
       }
       // If leaf not in either of the maps, skip inst.
       if (!LeafFound)

diff  --git a/llvm/test/Other/new-pm-memprof.ll b/llvm/test/Other/new-pm-memprof.ll
new file mode 100644
index 000000000000000..c98a1fdd35d8d5e
--- /dev/null
+++ b/llvm/test/Other/new-pm-memprof.ll
@@ -0,0 +1,12 @@
+;; Ensure we invoke the preinliner when feeding back a memprof profile.
+
+;; The opt invocation will fail as the profdata file is empty, which is fine
+;; since we are simply testing the pass pipeline below.
+; RUN: not opt -debug-pass-manager -passes='default<O2>' -memory-profile-file=/dev/null %s 2>&1 | FileCheck %s
+
+; CHECK: Running pass: InlinerPass on (foo)
+; CHECK: Running pass: MemProfUsePass
+
+define void @foo() {
+  ret void
+}

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
new file mode 100755
index 000000000000000..212f8f8ecce7668
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
new file mode 100644
index 000000000000000..3a06639d3a2bec8
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
index cbf9a401a607235..4261c2c5fbe3a3d 100755
--- a/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
+++ b/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
@@ -94,3 +94,36 @@ ${LLVMPROFDATA} merge --text ${OUTDIR}/memprof_pgo.profraw -o ${OUTDIR}/memprof_
 rm ${OUTDIR}/memprof.cc
 rm ${OUTDIR}/pgo.exe
 rm ${OUTDIR}/memprof_pgo.profraw
+
+# Use musttail to simulate a missing leaf debug frame in the profiled binary.
+# Note we don't currently match onto explicit ::operator new calls, which is
+# why the non-musttail case uses implicit new (which doesn't support musttail).
+# Note that changes in the code below which affect relative line number
+# offsets of calls from their parent function can affect callsite matching in
+# the LLVM IR.
+cat > ${OUTDIR}/memprof_missing_leaf.cc << EOF
+#include <new>
+#ifndef USE_MUSTTAIL
+#define USE_MUSTTAIL 0
+#endif
+
+// clang::musttail requires that the argument signature matches that of the caller.
+void *bar(std::size_t s) {
+#if USE_MUSTTAIL
+  [[clang::musttail]] return ::operator new (s);
+#else
+  return new char[s];
+#endif
+}
+
+int main() {
+  char *a = (char *)bar(1);
+  delete a;
+  return 0;
+}
+EOF
+
+${CLANG} ${COMMON_FLAGS} -fmemory-profile -DUSE_MUSTTAIL=1 ${OUTDIR}/memprof_missing_leaf.cc -o ${OUTDIR}/memprof_missing_leaf.exe
+env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_missing_leaf.exe > ${OUTDIR}/memprof_missing_leaf.memprofraw
+
+rm ${OUTDIR}/memprof_missing_leaf.cc

diff  --git a/llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll b/llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll
new file mode 100644
index 000000000000000..e46945b763b1d09
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll
@@ -0,0 +1,88 @@
+;; Tests memprof profile matching when the leaf frame is missing in the
+;; profile. In this case the call to operator new was inlined before
+;; matching and we are able to match the next call frame up the inlined
+;; context.
+
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
+;; # To generate below LLVM IR for use in matching.
+;; $ clang++ -gmlt -fdebug-info-for-profiling -S memprof_missing_leaf.cc \
+;; 	-O2 -emit-llvm
+
+; RUN: llvm-profdata merge %S/Inputs/memprof_missing_leaf.memprofraw --profiled-binary %S/Inputs/memprof_missing_leaf.exe -o %t.memprofdata
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s
+
+; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
+; CHECK: attributes #[[ATTR]] = {{.*}} "memprof"="notcold"
+
+; ModuleID = '<stdin>'
+source_filename = "memprof_missing_leaf.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) #0
+
+; Function Attrs: mustprogress norecurse uwtable
+define dso_local noundef i32 @main() #1 !dbg !8 {
+entry:
+  %s.addr.i = alloca i64, align 8
+  %retval = alloca i32, align 4
+  %a = alloca ptr, align 8
+  store i32 0, ptr %retval, align 4
+  store i64 1, ptr %s.addr.i, align 8, !tbaa !11
+  %0 = load i64, ptr %s.addr.i, align 8, !dbg !15, !tbaa !11
+  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef %0) #3, !dbg !18
+  store ptr %call.i, ptr %a, align 8, !dbg !19, !tbaa !20
+  %1 = load ptr, ptr %a, align 8, !dbg !22, !tbaa !20
+  %isnull = icmp eq ptr %1, null, !dbg !23
+  br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !23
+
+delete.notnull:                                   ; preds = %entry
+  call void @_ZdlPv(ptr noundef %1) #4, !dbg !23
+  br label %delete.end, !dbg !23
+
+delete.end:                                       ; preds = %delete.notnull, %entry
+  ret i32 0, !dbg !24
+}
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdlPv(ptr noundef) #2
+
+attributes #0 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nobuiltin nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { builtin allocsize(0) }
+attributes #4 = { builtin nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 (git at github.com:llvm/llvm-project.git 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof_missing_leaf.cc", directory: ".", checksumkind: CSK_MD5, checksum: "f1445a8699406a6b826128704d257677")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 15, type: !9, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = !DISubroutineType(types: !10)
+!10 = !{}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"long", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C++ TBAA"}
+!15 = !DILocation(line: 11, column: 19, scope: !16, inlinedAt: !17)
+!16 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barm", scope: !1, file: !1, line: 7, type: !9, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!17 = distinct !DILocation(line: 16, column: 21, scope: !8)
+!18 = !DILocation(line: 11, column: 10, scope: !16, inlinedAt: !17)
+!19 = !DILocation(line: 16, column: 9, scope: !8)
+!20 = !{!21, !21, i64 0}
+!21 = !{!"any pointer", !13, i64 0}
+!22 = !DILocation(line: 17, column: 10, scope: !8)
+!23 = !DILocation(line: 17, column: 3, scope: !8)
+!24 = !DILocation(line: 18, column: 3, scope: !8)


        


More information about the llvm-commits mailing list