[clang] [llvm] [InstrPGO] Instrument sampling profile based cold function (PR #109837)
Lei Wang via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 2 14:34:16 PDT 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/109837
>From 07a2cab3fa5df2965f9f7da9ee2d3603581a47f1 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 22 Sep 2024 20:23:20 -0700
Subject: [PATCH 1/3] [InstrPGO] Instrument sampling profile based cold
function
---
clang/include/clang/Driver/Options.td | 6 +++++
clang/lib/Driver/ToolChain.cpp | 4 +++-
clang/lib/Driver/ToolChains/Clang.cpp | 18 ++++++++++++++
clang/test/CodeGen/Inputs/pgo-cold-func.prof | 2 ++
.../test/CodeGen/pgo-cold-function-coverage.c | 12 ++++++++++
llvm/lib/Passes/PassBuilderPipelines.cpp | 18 ++++++++++++++
.../Instrumentation/PGOInstrumentation.cpp | 16 +++++++++++++
.../PGOProfile/instr-gen-cold-function.ll | 24 +++++++++++++++++++
8 files changed, 99 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CodeGen/Inputs/pgo-cold-func.prof
create mode 100644 clang/test/CodeGen/pgo-cold-function-coverage.c
create mode 100644 llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 002f60350543d9..6bb92427f2d53f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1784,6 +1784,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
PosFlag<SetTrue, [], [ClangOption, CC1Option],
"Emit extra debug info to make sample profile more accurate">,
NegFlag<SetFalse>>;
+def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">,
+ Group<f_Group>, Visibility<[ClangOption, CLOption]>,
+ HelpText<"Generate instrumented code to cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
+def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">,
+ Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<directory>">,
+ HelpText<"Generate instrumented code to cold functions into <directory>/default.profraw (overridden by LLVM_PROFILE_FILE env var)">;
def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
Group<f_Group>, Visibility<[ClangOption, CLOption]>,
HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 16f9b629fc538c..e56db150c6b47e 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -889,7 +889,9 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
Args.hasArg(options::OPT_fprofile_instr_generate) ||
Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
Args.hasArg(options::OPT_fcreate_profile) ||
- Args.hasArg(options::OPT_forder_file_instrumentation);
+ Args.hasArg(options::OPT_forder_file_instrumentation) ||
+ Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage) ||
+ Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage_EQ);
}
bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 0bab48caf1a5e2..00602a08232ba2 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -649,6 +649,24 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
}
}
+ if (auto *ColdFuncCoverageArg = Args.getLastArg(
+ options::OPT_fprofile_generate_cold_function_coverage,
+ options::OPT_fprofile_generate_cold_function_coverage_EQ)) {
+ SmallString<128> Path(
+ ColdFuncCoverageArg->getOption().matches(
+ options::OPT_fprofile_generate_cold_function_coverage_EQ)
+ ? ColdFuncCoverageArg->getValue()
+ : "");
+ llvm::sys::path::append(Path, "default_%m.profraw");
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back(Args.MakeArgString(
+ Twine("--instrument-sample-cold-function-path=") + Path));
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("--instrument-cold-function-coverage");
+ CmdArgs.push_back("-mllvm");
+ CmdArgs.push_back("--pgo-function-entry-coverage");
+ }
+
Arg *PGOGenArg = nullptr;
if (PGOGenerateArg) {
assert(!CSPGOGenerateArg);
diff --git a/clang/test/CodeGen/Inputs/pgo-cold-func.prof b/clang/test/CodeGen/Inputs/pgo-cold-func.prof
new file mode 100644
index 00000000000000..e50be02e0a8545
--- /dev/null
+++ b/clang/test/CodeGen/Inputs/pgo-cold-func.prof
@@ -0,0 +1,2 @@
+foo:1:1
+ 1: 1
diff --git a/clang/test/CodeGen/pgo-cold-function-coverage.c b/clang/test/CodeGen/pgo-cold-function-coverage.c
new file mode 100644
index 00000000000000..0d2767c022b9f1
--- /dev/null
+++ b/clang/test/CodeGen/pgo-cold-function-coverage.c
@@ -0,0 +1,12 @@
+// Test -fprofile-generate-cold-function-coverage
+// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%S/Inputs/pgo-cold-func.prof -S -emit-llvm -o - %s | FileCheck %s
+
+// CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00"
+
+// CHECK: @__profc_bar
+// CHECK-NOT: @__profc_foo
+
+int bar(int x) { return x;}
+int foo(int x) {
+ return x;
+}
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 8f151a99b11709..f75abe0c7c7649 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -296,7 +296,13 @@ static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
+static cl::opt<std::string> InstrumentSampleColdFuncPath(
+ "instrument-sample-cold-function-path", cl::init(""),
+ cl::desc("File path for instrumenting sampling PGO guided cold functions"),
+ cl::Hidden);
+
extern cl::opt<std::string> UseCtxProfile;
+extern cl::opt<bool> InstrumentColdFunctionCoverage;
namespace llvm {
extern cl::opt<bool> EnableMemProfContextDisambiguation;
@@ -1119,6 +1125,18 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// removed.
MPM.addPass(
PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
+
+ if (InstrumentSampleColdFuncPath.getNumOccurrences() &&
+ Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
+ assert(!InstrumentSampleColdFuncPath.empty() &&
+ "File path is requeired for instrumentation generation");
+ InstrumentColdFunctionCoverage = true;
+ addPreInlinerPasses(MPM, Level, Phase);
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+ /* IsCS */ false, /* AtomicCounterUpdate */ false,
+ InstrumentSampleColdFuncPath, "",
+ IntrusiveRefCntPtr<vfs::FileSystem>());
+ }
}
// Try to perform OpenMP specific optimizations on the module. This is a
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 10442fa0bb9003..890ad853d86461 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -319,6 +319,18 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
cl::desc("Do not instrument functions with the number of critical edges "
" greater than this threshold."));
+cl::opt<bool> InstrumentColdFunctionCoverage(
+ "instrument-cold-function-coverage", cl::init(false), cl::Hidden,
+ cl::desc("Enable cold function coverage instrumentation (currently only "
+ "used under sampling "
+ " PGO pipeline))"));
+
+static cl::opt<uint64_t> ColdFuncCoverageMaxEntryCount(
+ "cold-function-coverage-max-entry-count", cl::init(0), cl::Hidden,
+ cl::desc("When enabling cold function coverage instrumentation, skip "
+ "instrumenting the "
+ "function whose entry count is above the given value"));
+
extern cl::opt<unsigned> MaxNumVTableAnnotations;
namespace llvm {
@@ -1891,6 +1903,10 @@ static bool skipPGOGen(const Function &F) {
return true;
if (F.getInstructionCount() < PGOFunctionSizeThreshold)
return true;
+ if (InstrumentColdFunctionCoverage &&
+ (!F.getEntryCount() ||
+ F.getEntryCount()->getCount() > ColdFuncCoverageMaxEntryCount))
+ return true;
return false;
}
diff --git a/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
new file mode 100644
index 00000000000000..ba1a11f7184356
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function-coverage -S | FileCheck --check-prefixes=COLD %s
+; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function-coverage -cold-function-coverage-max-entry-count=1 -S | FileCheck --check-prefixes=ENTRY-COUNT %s
+
+; COLD: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
+; COLD-NOT: __profn_main
+
+; ENTRY-COUNT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
+; ENTRY-COUNT: call void @llvm.instrprof.increment(ptr @__profn_main, i64 [[#]], i32 1, i32 0)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo() !prof !0 {
+entry:
+ ret void
+}
+
+define i32 @main() !prof !1 {
+entry:
+ ret i32 0
+}
+
+!0 = !{!"function_entry_count", i64 0}
+!1 = !{!"function_entry_count", i64 1}
>From b3054f05007d60900cb02301cf59711c68e70120 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 1 Oct 2024 15:51:37 -0700
Subject: [PATCH 2/3] fix lint in cl desc msg
---
.../Instrumentation/PGOInstrumentation.cpp | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 890ad853d86461..aba1d8cc88c224 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -319,17 +319,16 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
cl::desc("Do not instrument functions with the number of critical edges "
" greater than this threshold."));
-cl::opt<bool> InstrumentColdFunctionCoverage(
- "instrument-cold-function-coverage", cl::init(false), cl::Hidden,
- cl::desc("Enable cold function coverage instrumentation (currently only "
- "used under sampling "
- " PGO pipeline))"));
-
static cl::opt<uint64_t> ColdFuncCoverageMaxEntryCount(
"cold-function-coverage-max-entry-count", cl::init(0), cl::Hidden,
cl::desc("When enabling cold function coverage instrumentation, skip "
- "instrumenting the "
- "function whose entry count is above the given value"));
+ "instrumenting the function whose entry count is above the given "
+ "value"));
+
+cl::opt<bool> InstrumentColdFunctionCoverage(
+ "instrument-cold-function-coverage", cl::init(false), cl::Hidden,
+ cl::desc("Enable cold function coverage instrumentation (currently only "
+ "used under sampling PGO pipeline)"));
extern cl::opt<unsigned> MaxNumVTableAnnotations;
>From 5a2848cd196d167570c4067ff171cc038a70c762 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 2 Oct 2024 10:13:52 -0700
Subject: [PATCH 3/3] address comments, ad fix test, add driver test,
refactoring the addPGOInstrPasses
---
clang/test/CodeGen/Inputs/pgo-cold-func.prof | 2 --
.../test/CodeGen/pgo-cold-function-coverage.c | 13 +++++++--
...fprofile-generate-cold-function-coverage.c | 9 ++++++
llvm/lib/Passes/PassBuilderPipelines.cpp | 28 ++++++++++---------
.../PGOProfile/instr-gen-cold-function.ll | 10 +++----
5 files changed, 39 insertions(+), 23 deletions(-)
delete mode 100644 clang/test/CodeGen/Inputs/pgo-cold-func.prof
create mode 100644 clang/test/Driver/fprofile-generate-cold-function-coverage.c
diff --git a/clang/test/CodeGen/Inputs/pgo-cold-func.prof b/clang/test/CodeGen/Inputs/pgo-cold-func.prof
deleted file mode 100644
index e50be02e0a8545..00000000000000
--- a/clang/test/CodeGen/Inputs/pgo-cold-func.prof
+++ /dev/null
@@ -1,2 +0,0 @@
-foo:1:1
- 1: 1
diff --git a/clang/test/CodeGen/pgo-cold-function-coverage.c b/clang/test/CodeGen/pgo-cold-function-coverage.c
index 0d2767c022b9f1..43ddc125a56ee7 100644
--- a/clang/test/CodeGen/pgo-cold-function-coverage.c
+++ b/clang/test/CodeGen/pgo-cold-function-coverage.c
@@ -1,11 +1,18 @@
// Test -fprofile-generate-cold-function-coverage
-// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%S/Inputs/pgo-cold-func.prof -S -emit-llvm -o - %s | FileCheck %s
+
+// RUN: split-file %s %t
+// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s
// CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00"
-// CHECK: @__profc_bar
-// CHECK-NOT: @__profc_foo
+// CHECK: store i8 0, ptr @__profc_bar, align 1
+// CHECK-NOT: @__profc_foo
+
+//--- pgo-cold-func.prof
+foo:1:1
+ 1: 1
+//--- pgo-cold-func.c
int bar(int x) { return x;}
int foo(int x) {
return x;
diff --git a/clang/test/Driver/fprofile-generate-cold-function-coverage.c b/clang/test/Driver/fprofile-generate-cold-function-coverage.c
new file mode 100644
index 00000000000000..76b1ee4353cdbf
--- /dev/null
+++ b/clang/test/Driver/fprofile-generate-cold-function-coverage.c
@@ -0,0 +1,9 @@
+// RUN: %clang -### -c -fprofile-generate-cold-function-coverage %s 2>&1 | FileCheck %s
+// CHECK: "--instrument-sample-cold-function-path=default_%m.profraw"
+// CHECK: "--instrument-cold-function-coverage"
+// CHECK: "--pgo-function-entry-coverage"
+// CHECK-NOT: "-fprofile-instrument"
+// CHECK-NOT: "-fprofile-instrument-path=
+
+// RUN: %clang -### -c -fprofile-generate-cold-function-coverage=dir %s 2>&1 | FileCheck %s --check-prefix=CHECK-PATH
+// CHECK-PATH: "--instrument-sample-cold-function-path=dir{{/|\\\\}}default_%m.profraw"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index f75abe0c7c7649..be543708a8fe29 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1125,18 +1125,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// removed.
MPM.addPass(
PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
-
- if (InstrumentSampleColdFuncPath.getNumOccurrences() &&
- Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
- assert(!InstrumentSampleColdFuncPath.empty() &&
- "File path is requeired for instrumentation generation");
- InstrumentColdFunctionCoverage = true;
- addPreInlinerPasses(MPM, Level, Phase);
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ false, /* AtomicCounterUpdate */ false,
- InstrumentSampleColdFuncPath, "",
- IntrusiveRefCntPtr<vfs::FileSystem>());
- }
}
// Try to perform OpenMP specific optimizations on the module. This is a
@@ -1202,8 +1190,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
const bool IsCtxProfUse =
!UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
+ // Enable sampling-based cold function coverage instrumentation if
+ // InstrumentSampleColdFuncPath is provided.
+ const bool IsSampleColdFuncCovGen = InstrumentColdFunctionCoverage =
+ IsPGOPreLink && LoadSampleProfile &&
+ !InstrumentSampleColdFuncPath.empty();
+ assert((InstrumentSampleColdFuncPath.empty() || LoadSampleProfile) &&
+ "Sampling-based cold functon coverage is not supported without "
+ "providing sampling PGO profile");
+
if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
- IsCtxProfUse)
+ IsCtxProfUse || IsSampleColdFuncCovGen)
addPreInlinerPasses(MPM, Level, Phase);
// Add all the requested passes for instrumentation PGO, if requested.
@@ -1225,6 +1222,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
return MPM;
addPostPGOLoopRotation(MPM, Level);
MPM.addPass(PGOCtxProfLoweringPass());
+ } else if (IsSampleColdFuncCovGen) {
+ addPGOInstrPasses(
+ MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
+ /* AtomicCounterUpdate */ false, InstrumentSampleColdFuncPath,
+ /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
}
if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
diff --git a/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
index ba1a11f7184356..336fceef8d3a6b 100644
--- a/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
+++ b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function-coverage -S | FileCheck --check-prefixes=COLD %s
-; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function-coverage -cold-function-coverage-max-entry-count=1 -S | FileCheck --check-prefixes=ENTRY-COUNT %s
+; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function-coverage -pgo-function-entry-coverage -S | FileCheck --check-prefixes=COLD %s
+; RUN: opt < %s --passes=pgo-instr-gen -instrument-cold-function-coverage -pgo-function-entry-coverage -cold-function-coverage-max-entry-count=1 -S | FileCheck --check-prefixes=ENTRY-COUNT %s
-; COLD: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
+; COLD: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
; COLD-NOT: __profn_main
-; ENTRY-COUNT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
-; ENTRY-COUNT: call void @llvm.instrprof.increment(ptr @__profn_main, i64 [[#]], i32 1, i32 0)
+; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_foo, i64 [[#]], i32 1, i32 0)
+; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_main, i64 [[#]], i32 1, i32 0)
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
More information about the cfe-commits
mailing list