[llvm] [ctxprof] don't inline weak symbols after instrumentation (PR #128811)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 25 20:07:38 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Mircea Trofin (mtrofin)
<details>
<summary>Changes</summary>
Contextual profiling identifies functions by GUID. Functions that may get overridden by the linker with a prevailing copy may have, during instrumentation, different variants in different modules. If these variants get inlined before linking (here I assume thinlto), they will identify themselves to the ctxprof runtime as their GUID, leading to issues - they may have different counter counts, for instance.
If we block their inlining in the pre-thinlink compilation, only the prevailing copy will survive post-thinlink and the confusion is avoided.
The change introduces a small pass just for this purpose, which marks any symbols that could be affected by the above as `noinline` (even if they were `alwaysinline`). We already carried out some inlining (via the preinliner), before instrumenting, so technically the `alwaysinline` directives were honored.
We could later (different patch) choose to mark them back to their original attribute (none or `alwaysinline`) post-thinlink, if we want to - but experimentally that doesn't really change much of the performance of the instrumented binary.
---
Full diff: https://github.com/llvm/llvm-project/pull/128811.diff
6 Files Affected:
- (modified) llvm/include/llvm/IR/GlobalValue.h (+7)
- (modified) llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h (+15)
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7)
- (modified) llvm/lib/Passes/PassRegistry.def (+1)
- (modified) llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp (+20)
- (added) llvm/test/Transforms/PGOProfile/ctx-instrumentation-block-inline.ll (+25)
``````````diff
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index 2176e2c2cfbfc..c1d3cee636981 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -550,6 +550,13 @@ class GlobalValue : public Constant {
return isDiscardableIfUnused(getLinkage());
}
+ // the symbol in this module may be replaced by a prevailing copy.
+ bool mayBeReplacedByPrevailingCopy() const {
+ return getLinkage() != GlobalValue::ExternalLinkage &&
+ getLinkage() != GlobalValue::InternalLinkage &&
+ getLinkage() != GlobalValue::PrivateLinkage;
+ }
+
bool isWeakForLinker() const { return isWeakForLinker(getLinkage()); }
protected:
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
index f127d16b8de12..8010c1c091e40 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
@@ -24,5 +24,20 @@ class PGOCtxProfLoweringPass : public PassInfoMixin<PGOCtxProfLoweringPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};
+
+// Utility pass blocking inlining for any function that may be overridden during
+// linking by a prevailing copy.
+// This avoids confusingly collecting profiles for the same GUID corresponding
+// to different variants of the function. We could do like PGO and identify
+// functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
+// thinlto to happen before performing any further optimizations, it's
+// unnecessary to collect profiles for non-prevailing copies.
+class NoinlineNonPrevailing : public PassInfoMixin<NoinlineNonPrevailing> {
+public:
+ explicit NoinlineNonPrevailing() = default;
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
+
} // namespace llvm
#endif
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6ca2f90aa0668..f0541dbfe3597 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1249,6 +1249,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(AssignGUIDPass());
if (IsCtxProfUse)
return MPM;
+ // Block further inlining in the instrumented ctxprof case. This avoids
+ // confusingly collecting profiles for the same GUID corresponding to
+ // different variants of the function. We could do like PGO and identify
+ // functions by a (GUID, Hash) tuple, but since the ctxprof "use" waits for
+ // thinlto to happen before performing any further optimizations, it's
+ // unnecessary to collect profiles for non-prevailing copies.
+ MPM.addPass(NoinlineNonPrevailing());
addPostPGOLoopRotation(MPM, Level);
MPM.addPass(PGOCtxProfLoweringPass());
} else if (IsColdFuncOnlyInstrGen) {
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index a664d6fd7085f..bfd952df25e98 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -62,6 +62,7 @@ MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("ctx-instr-gen",
PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
+MODULE_PASS("noinline-nonprevailing", NoinlineNonPrevailing())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("debugify", NewPMDebugifyPass())
MODULE_PASS("dfsan", DataFlowSanitizerPass())
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index e7b7c26c493e5..19d899b08150e 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -351,3 +351,23 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
F.getName());
return true;
}
+
+PreservedAnalyses NoinlineNonPrevailing::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ bool Changed = false;
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ if (F.hasFnAttribute(Attribute::NoInline))
+ continue;
+ if (F.mayBeReplacedByPrevailingCopy()) {
+ F.addFnAttr(Attribute::NoInline);
+ if (F.hasFnAttribute(Attribute::AlwaysInline))
+ F.removeFnAttr(Attribute::AlwaysInline);
+ Changed = true;
+ }
+ }
+ if (Changed)
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation-block-inline.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation-block-inline.ll
new file mode 100644
index 0000000000000..b455f327bf5a6
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation-block-inline.ll
@@ -0,0 +1,25 @@
+; RUN: opt -passes=noinline-nonprevailing -S < %s 2>&1 | FileCheck %s
+
+define void @a() {
+ ret void
+}
+
+define void @b() #0 {
+ ret void
+}
+
+define weak_odr void @c() {
+ ret void
+}
+
+define weak_odr void @d() #0{
+ ret void
+}
+
+attributes #0 = { alwaysinline }
+
+; CHECK: void @a() {
+; CHECK: void @b() #0
+; CHECK: void @c() #1
+; CHECK: void @d() #1
+; CHECK: attributes #1 = { noinline }
``````````
</details>
https://github.com/llvm/llvm-project/pull/128811
More information about the llvm-commits
mailing list