[llvm] 87f5e22 - [MemProf] Tolerate missing leaf debug frames (#71233)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 3 21:01:11 PDT 2023
Author: Teresa Johnson
Date: 2023-11-03T21:01:07-07:00
New Revision: 87f5e22987f509b7bd5b67eb2a19697508077f25
URL: https://github.com/llvm/llvm-project/commit/87f5e22987f509b7bd5b67eb2a19697508077f25
DIFF: https://github.com/llvm/llvm-project/commit/87f5e22987f509b7bd5b67eb2a19697508077f25.diff
LOG: [MemProf] Tolerate missing leaf debug frames (#71233)
Loosen up the matching so that a missing leaf debug frame in the profile
does not prevent matching an allocation context if we can match further
up the inlined call context. This relies on the pre-inliner, which was
already the default when performing normal PGO feedback along with the
MemProf feedback, but to ensure matching is not affected by the presence
of PGO, enable the pre-inliner for MemProf feedback as well.
Added:
llvm/test/Other/new-pm-memprof.ll
llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll
Modified:
llvm/include/llvm/Passes/PassBuilder.h
llvm/lib/Passes/PassBuilderPipelines.cpp
llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
Removed:
################################################################################
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 2c7ceda7998eda1..23bc891a8f1e97c 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -628,11 +628,16 @@ class PassBuilder {
Error parseModulePassPipeline(ModulePassManager &MPM,
ArrayRef<PipelineElement> Pipeline);
+ // Adds passes to do pre-inlining and related cleanup passes before
+ // profile instrumentation/matching (to enable better context sensitivity),
+ // and for memprof to enable better matching with missing debug frames.
+ void addPreInlinerPasses(ModulePassManager &MPM, OptimizationLevel Level,
+ ThinOrFullLTOPhase LTOPhase);
+
void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level,
bool RunProfileGen, bool IsCS,
bool AtomicCounterUpdate, std::string ProfileFile,
std::string ProfileRemappingFile,
- ThinOrFullLTOPhase LTOPhase,
IntrusiveRefCntPtr<vfs::FileSystem> FS);
// Extension Point callbacks
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index baea2913338cda7..f4a4802bcb649f3 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -733,47 +733,52 @@ void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
MPM.addPass(NameAnonGlobalPass());
}
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
- OptimizationLevel Level, bool RunProfileGen,
- bool IsCS, bool AtomicCounterUpdate,
- std::string ProfileFile,
- std::string ProfileRemappingFile,
- ThinOrFullLTOPhase LTOPhase,
- IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase LTOPhase) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
- if (!IsCS && !DisablePreInliner) {
- InlineParams IP;
+ if (DisablePreInliner)
+ return;
+ InlineParams IP;
- IP.DefaultThreshold = PreInlineThreshold;
+ IP.DefaultThreshold = PreInlineThreshold;
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // FIXME: this comment is cargo culted from the old pass manager, revisit).
- IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
- ModuleInlinerWrapperPass MIWP(
- IP, /* MandatoryFirst */ true,
- InlineContext{LTOPhase, InlinePass::EarlyInliner});
- CGSCCPassManager &CGPipeline = MIWP.getPM();
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
+ // FIXME: this comment is cargo culted from the old pass manager, revisit).
+ IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
+ ModuleInlinerWrapperPass MIWP(
+ IP, /* MandatoryFirst */ true,
+ InlineContext{LTOPhase, InlinePass::EarlyInliner});
+ CGSCCPassManager &CGPipeline = MIWP.getPM();
- FunctionPassManager FPM;
- FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
- FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
- true))); // Merge & remove basic blocks.
- FPM.addPass(InstCombinePass()); // Combine silly sequences.
- invokePeepholeEPCallbacks(FPM, Level);
+ FunctionPassManager FPM;
+ FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+ FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
+ true))); // Merge & remove basic blocks.
+ FPM.addPass(InstCombinePass()); // Combine silly sequences.
+ invokePeepholeEPCallbacks(FPM, Level);
- CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- std::move(FPM), PTO.EagerlyInvalidateAnalyses));
+ CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ std::move(FPM), PTO.EagerlyInvalidateAnalyses));
- MPM.addPass(std::move(MIWP));
+ MPM.addPass(std::move(MIWP));
- // Delete anything that is now dead to make sure that we don't instrument
- // dead code. Instrumentation can end up keeping dead code around and
- // dramatically increase code size.
- MPM.addPass(GlobalDCEPass());
- }
+ // Delete anything that is now dead to make sure that we don't instrument
+ // dead code. Instrumentation can end up keeping dead code around and
+ // dramatically increase code size.
+ MPM.addPass(GlobalDCEPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
+ OptimizationLevel Level, bool RunProfileGen,
+ bool IsCS, bool AtomicCounterUpdate,
+ std::string ProfileFile,
+ std::string ProfileRemappingFile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
@@ -1104,6 +1109,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
PTO.EagerlyInvalidateAnalyses));
+ // Invoke the pre-inliner passes for instrumentation PGO or MemProf.
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty()))
+ addPreInlinerPasses(MPM, Level, Phase);
+
// Add all the requested passes for instrumentation PGO, if requested.
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
(PGOOpt->Action == PGOOptions::IRInstr ||
@@ -1111,7 +1122,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level,
/*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
/*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
- PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase,
+ PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
PGOOpt->FS);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
@@ -1332,12 +1343,12 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
- LTOPhase, PGOOpt->FS);
+ PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
- LTOPhase, PGOOpt->FS);
+ PGOOpt->FS);
}
// Re-compute GlobalsAA here prior to function passes. This is particularly
@@ -1831,12 +1842,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
+ PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
/*IsCS=*/true, PGOOpt->AtomicCounterUpdate,
PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
+ PGOOpt->FS);
}
// Break up allocas
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 2b29ea2a65fdc87..eb9866727deef59 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -808,19 +808,19 @@ static void readMemprof(Module &M, Function &F,
auto CalleeGUID = Function::getGUID(Name);
auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),
ProfileHasColumns ? DIL->getColumn() : 0);
- // LeafFound will only be false on the first iteration, since we either
- // set it true or break out of the loop below.
+ // Check if we have found the profile's leaf frame. If yes, collect
+ // the rest of the call's inlined context starting here. If not, see if
+ // we find a match further up the inlined context (in case the profile
+ // was missing debug frames at the leaf).
if (!LeafFound) {
AllocInfoIter = LocHashToAllocInfo.find(StackId);
CallSitesIter = LocHashToCallSites.find(StackId);
- // Check if the leaf is in one of the maps. If not, no need to look
- // further at this call.
- if (AllocInfoIter == LocHashToAllocInfo.end() &&
- CallSitesIter == LocHashToCallSites.end())
- break;
- LeafFound = true;
+ if (AllocInfoIter != LocHashToAllocInfo.end() ||
+ CallSitesIter != LocHashToCallSites.end())
+ LeafFound = true;
}
- InlinedCallStack.push_back(StackId);
+ if (LeafFound)
+ InlinedCallStack.push_back(StackId);
}
// If leaf not in either of the maps, skip inst.
if (!LeafFound)
diff --git a/llvm/test/Other/new-pm-memprof.ll b/llvm/test/Other/new-pm-memprof.ll
new file mode 100644
index 000000000000000..c98a1fdd35d8d5e
--- /dev/null
+++ b/llvm/test/Other/new-pm-memprof.ll
@@ -0,0 +1,12 @@
+;; Ensure we invoke the preinliner when feeding back a memprof profile.
+
+;; The opt invocation will fail as the profdata file is empty, which is fine
+;; since we are simply testing the pass pipeline below.
+; RUN: not opt -debug-pass-manager -passes='default<O2>' -memory-profile-file=/dev/null %s 2>&1 | FileCheck %s
+
+; CHECK: Running pass: InlinerPass on (foo)
+; CHECK: Running pass: MemProfUsePass
+
+define void @foo() {
+ ret void
+}
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
new file mode 100755
index 000000000000000..212f8f8ecce7668
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.exe
diff er
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
new file mode 100644
index 000000000000000..3a06639d3a2bec8
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_missing_leaf.memprofraw
diff er
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh b/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
index cbf9a401a607235..4261c2c5fbe3a3d 100755
--- a/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
+++ b/llvm/test/Transforms/PGOProfile/Inputs/update_memprof_inputs.sh
@@ -94,3 +94,36 @@ ${LLVMPROFDATA} merge --text ${OUTDIR}/memprof_pgo.profraw -o ${OUTDIR}/memprof_
rm ${OUTDIR}/memprof.cc
rm ${OUTDIR}/pgo.exe
rm ${OUTDIR}/memprof_pgo.profraw
+
+# Use musttail to simulate a missing leaf debug frame in the profiled binary.
+# Note we don't currently match onto explicit ::operator new calls, which is
+# why the non-musttail case uses implicit new (which doesn't support musttail).
+# Note that changes in the code below which affect relative line number
+# offsets of calls from their parent function can affect callsite matching in
+# the LLVM IR.
+cat > ${OUTDIR}/memprof_missing_leaf.cc << EOF
+#include <new>
+#ifndef USE_MUSTTAIL
+#define USE_MUSTTAIL 0
+#endif
+
+// clang::musttail requires that the argument signature matches that of the caller.
+void *bar(std::size_t s) {
+#if USE_MUSTTAIL
+ [[clang::musttail]] return ::operator new (s);
+#else
+ return new char[s];
+#endif
+}
+
+int main() {
+ char *a = (char *)bar(1);
+ delete a;
+ return 0;
+}
+EOF
+
+${CLANG} ${COMMON_FLAGS} -fmemory-profile -DUSE_MUSTTAIL=1 ${OUTDIR}/memprof_missing_leaf.cc -o ${OUTDIR}/memprof_missing_leaf.exe
+env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_missing_leaf.exe > ${OUTDIR}/memprof_missing_leaf.memprofraw
+
+rm ${OUTDIR}/memprof_missing_leaf.cc
diff --git a/llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll b/llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll
new file mode 100644
index 000000000000000..e46945b763b1d09
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof_missing_leaf.ll
@@ -0,0 +1,88 @@
+;; Tests memprof profile matching when the leaf frame is missing in the
+;; profile. In this case the call to operator new was inlined before
+;; matching and we are able to match the next call frame up the inlined
+;; context.
+
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
+;; # To generate below LLVM IR for use in matching.
+;; $ clang++ -gmlt -fdebug-info-for-profiling -S memprof_missing_leaf.cc \
+;; -O2 -emit-llvm
+
+; RUN: llvm-profdata merge %S/Inputs/memprof_missing_leaf.memprofraw --profiled-binary %S/Inputs/memprof_missing_leaf.exe -o %t.memprofdata
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -S | FileCheck %s
+
+; CHECK: call {{.*}} @_Znam{{.*}} #[[ATTR:[0-9]+]]
+; CHECK: attributes #[[ATTR]] = {{.*}} "memprof"="notcold"
+
+; ModuleID = '<stdin>'
+source_filename = "memprof_missing_leaf.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) #0
+
+; Function Attrs: mustprogress norecurse uwtable
+define dso_local noundef i32 @main() #1 !dbg !8 {
+entry:
+ %s.addr.i = alloca i64, align 8
+ %retval = alloca i32, align 4
+ %a = alloca ptr, align 8
+ store i32 0, ptr %retval, align 4
+ store i64 1, ptr %s.addr.i, align 8, !tbaa !11
+ %0 = load i64, ptr %s.addr.i, align 8, !dbg !15, !tbaa !11
+ %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef %0) #3, !dbg !18
+ store ptr %call.i, ptr %a, align 8, !dbg !19, !tbaa !20
+ %1 = load ptr, ptr %a, align 8, !dbg !22, !tbaa !20
+ %isnull = icmp eq ptr %1, null, !dbg !23
+ br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !23
+
+delete.notnull: ; preds = %entry
+ call void @_ZdlPv(ptr noundef %1) #4, !dbg !23
+ br label %delete.end, !dbg !23
+
+delete.end: ; preds = %delete.notnull, %entry
+ ret i32 0, !dbg !24
+}
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdlPv(ptr noundef) #2
+
+attributes #0 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress norecurse uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { nobuiltin nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { builtin allocsize(0) }
+attributes #4 = { builtin nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 18.0.0 (git at github.com:llvm/llvm-project.git 71bf052ec90e77cb4aa66505d47cbc4b6016ac1d)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof_missing_leaf.cc", directory: ".", checksumkind: CSK_MD5, checksum: "f1445a8699406a6b826128704d257677")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 15, type: !9, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = !DISubroutineType(types: !10)
+!10 = !{}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"long", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C++ TBAA"}
+!15 = !DILocation(line: 11, column: 19, scope: !16, inlinedAt: !17)
+!16 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barm", scope: !1, file: !1, line: 7, type: !9, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!17 = distinct !DILocation(line: 16, column: 21, scope: !8)
+!18 = !DILocation(line: 11, column: 10, scope: !16, inlinedAt: !17)
+!19 = !DILocation(line: 16, column: 9, scope: !8)
+!20 = !{!21, !21, i64 0}
+!21 = !{!"any pointer", !13, i64 0}
+!22 = !DILocation(line: 17, column: 10, scope: !8)
+!23 = !DILocation(line: 17, column: 3, scope: !8)
+!24 = !DILocation(line: 18, column: 3, scope: !8)
More information about the llvm-commits
mailing list