[clang] [flang] [mlir] [LLVM-Flang][Options] Add support for '-fprofile-sample-use' option (PR #188697)
Kaviya Rajendiran via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 26 00:27:00 PDT 2026
https://github.com/kaviya2510 created https://github.com/llvm/llvm-project/pull/188697
This patch add support to option "-fprofile-sample-use=<sample.prof>" option in llvm-flang.
When the `-fprofile-sample-use=sample.prof` option is passed, the compiler records the profile file path in `SampleProfileFile` . This value is later used by the `SampleProfileLoaderPass`, which loads the sample profile and injects the corresponding profiling metadata in the LLVM IR.
>From 7b04d397d715598d2baa2855dcc5e2af657ade09 Mon Sep 17 00:00:00 2001
From: Kaviya Rajendiran <kaviyara2000 at gmail.com>
Date: Thu, 26 Mar 2026 12:26:57 +0530
Subject: [PATCH] [LLVM-Flang][Options] Add support for '-fprofile-sample-use'
option
---
clang/include/clang/Options/Options.td | 4 +--
clang/lib/Driver/ToolChains/Flang.cpp | 23 +++++++++++++
flang/include/flang/Frontend/CodeGenOptions.h | 3 ++
.../flang/Optimizer/Transforms/Passes.td | 4 +++
flang/include/flang/Tools/CrossToolHelpers.h | 2 ++
flang/lib/Frontend/CompilerInvocation.cpp | 3 ++
flang/lib/Frontend/FrontendActions.cpp | 6 ++++
flang/lib/Optimizer/Passes/Pipelines.cpp | 4 +--
.../lib/Optimizer/Transforms/FunctionAttr.cpp | 4 +++
flang/test/Driver/Inputs/pgo-sample.prof | 2 ++
flang/test/Driver/fprofile-sample-use.f90 | 30 ++++++++++++++++
flang/test/Integration/inputs/pgo-sample.prof | 2 ++
flang/test/Integration/profile-sample-use.f90 | 34 +++++++++++++++++++
mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 3 +-
mlir/lib/Target/LLVMIR/ModuleImport.cpp | 3 ++
mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 3 ++
16 files changed, 125 insertions(+), 5 deletions(-)
create mode 100644 flang/test/Driver/Inputs/pgo-sample.prof
create mode 100644 flang/test/Driver/fprofile-sample-use.f90
create mode 100644 flang/test/Integration/inputs/pgo-sample.prof
create mode 100644 flang/test/Integration/profile-sample-use.f90
diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 215d4e885709c..13fafa1c01bf5 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -1768,10 +1768,10 @@ defm gnu_inline_asm : BoolFOption<"gnu-inline-asm",
PosFlag<SetTrue>>;
def fno_profile_sample_use : Flag<["-"], "fno-profile-sample-use">, Group<f_Group>,
- Visibility<[ClangOption, CLOption]>;
+ Visibility<[ClangOption, CLOption, FlangOption, FC1Option]>;
def fprofile_sample_use_EQ : Joined<["-"], "fprofile-sample-use=">,
Group<f_Group>,
- Visibility<[ClangOption, CLOption, CC1Option]>,
+ Visibility<[ClangOption, CLOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Enable sample-based profile guided optimizations">,
MarshallingInfoString<CodeGenOpts<"SampleProfileFile">>;
def fprofile_sample_accurate : Flag<["-"], "fprofile-sample-accurate">,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index c7716b52009f6..97e9425d13ea2 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -986,6 +986,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
const Driver &D = TC.getDriver();
ArgStringList CmdArgs;
+ bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+ bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
+
// Invoke ourselves in -fc1 mode.
CmdArgs.push_back("-fc1");
@@ -1099,6 +1102,26 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
Args.addAllArgs(
CmdArgs, {options::OPT_fprofile_generate, options::OPT_fprofile_use_EQ});
+ if (!(IsCudaDevice || IsHIPDevice)) {
+ // recognise options: -fprofile-sample-use= and -fno-profile-sample-use=
+ if (Arg *A = getLastProfileSampleUseArg(Args)) {
+
+ auto *PGOArg = Args.getLastArg(options::OPT_fprofile_generate,
+ options::OPT_fprofile_generate_EQ);
+
+ if (PGOArg) {
+ D.Diag(diag::err_drv_argument_not_allowed_with)
+ << PGOArg->getAsString(Args) << A->getAsString(Args);
+ }
+
+ StringRef fname = A->getValue();
+ if (!llvm::sys::fs::exists(fname))
+ D.Diag(diag::err_drv_no_such_file) << fname;
+ else
+ A->render(Args, CmdArgs);
+ }
+ }
+
// Forward flags for OpenMP. We don't do this if the current action is an
// device offloading action other than OpenMP.
if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h b/flang/include/flang/Frontend/CodeGenOptions.h
index 0fc0063128547..ac3392667d79e 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -181,6 +181,9 @@ class CodeGenOptions : public CodeGenOptionsBase {
/// Output filename for the split debug info, not used in the skeleton CU.
std::string SplitDwarfOutput;
+ /// Name of the profile file to use with -fprofile-sample-use.
+ std::string SampleProfileFile;
+
/// Check if Clang profile instrumenation is on.
bool hasProfileClangInstr() const {
return getProfileInstr() == llvm::driver::ProfileClangInstr;
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 82d89c4df42c3..1d636038a72ce 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -463,6 +463,10 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
/*default=*/"",
"Set the prefer-vector-width attribute on functions in the "
"module.">,
+ Option<"UseSampleProfile", "use-sample-profile", "bool",
+ /*default=*/"false",
+ "Set the use-sample-profile attribute on functions in the "
+ "module.">,
Option<"tuneCPU", "tune-cpu", "std::string", /*default=*/"",
"Set the tune-cpu attribute on functions in the module.">,
Option<"setNoCapture", "set-nocapture", "bool", /*default=*/"false",
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 4415a0417be01..9c868c67099f5 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -106,6 +106,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
ApproxFuncFPMath && mathOpts.getFPContractEnabled();
Reciprocals = opts.Reciprocals;
PreferVectorWidth = opts.PreferVectorWidth;
+ UseSampleProfile = !opts.SampleProfileFile.empty();
DebugInfoForProfiling = opts.DebugInfoForProfiling;
if (opts.InstrumentFunctions) {
InstrumentFunctionEntry = "__cyg_profile_func_enter";
@@ -140,6 +141,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
///< functions.
bool NSWOnLoopVarInc = true; ///< Add nsw flag to loop variable increments.
bool EnableOpenMP = false; ///< Enable OpenMP lowering.
+ bool UseSampleProfile = false; /// Enable sample based profiling
bool DebugInfoForProfiling = false; /// Enable extra debugging info
bool EnableOpenMPSimd = false; ///< Enable OpenMP simd-only mode.
bool SkipConvertComplexPow = false; ///< Do not run complex pow conversion.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 8d99fc9d4403e..4190be358b8fa 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -469,6 +469,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
opts.ProfileInstrumentUsePath = A->getValue();
}
+ opts.SampleProfileFile =
+ args.getLastArgValue(clang::options::OPT_fprofile_sample_use_EQ);
+
// -mcmodel option.
if (const llvm::opt::Arg *a =
args.getLastArg(clang::options::OPT_mcmodel_EQ)) {
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index ec9e8f48693fe..4e058786a9a72 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -991,6 +991,12 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
llvm::PGOOptions::NoAction,
llvm::PGOOptions::NoCSAction,
llvm::PGOOptions::ColdFuncOpt::Default, true);
+ } else if (!opts.SampleProfileFile.empty()) {
+ pgoOpt = llvm::PGOOptions(
+ opts.SampleProfileFile, "", opts.ProfileRemappingFile,
+ opts.MemoryProfileUsePath, llvm::PGOOptions::SampleUse,
+ llvm::PGOOptions::NoCSAction, llvm::PGOOptions::ColdFuncOpt::Default,
+ opts.DebugInfoForProfiling, /*PseudoProbeForProfiling=*/false);
}
llvm::StandardInstrumentations si(llvmModule->getContext(),
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 2216fc0c68494..73e647a1c3956 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -420,8 +420,8 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
{framePointerKind, config.InstrumentFunctionEntry,
config.InstrumentFunctionExit, config.NoInfsFPMath, config.NoNaNsFPMath,
config.ApproxFuncFPMath, config.NoSignedZerosFPMath, config.UnsafeFPMath,
- config.Reciprocals, config.PreferVectorWidth, /*tuneCPU=*/"",
- setNoCapture, setNoAlias}));
+ config.Reciprocals, config.PreferVectorWidth, config.UseSampleProfile,
+ /*tuneCPU=*/"", setNoCapture, setNoAlias}));
if (config.EnableOpenMP) {
pm.addNestedPass<mlir::func::FuncOp>(
diff --git a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
index 4655ed6ed0d40..c4e5d49d7e64f 100644
--- a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
+++ b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
@@ -99,6 +99,10 @@ void FunctionAttrPass::runOnOperation() {
func->setAttr(
mlir::LLVM::LLVMFuncOp::getPreferVectorWidthAttrName(llvmFuncOpName),
mlir::StringAttr::get(context, preferVectorWidth));
+ if (UseSampleProfile)
+ func->setAttr(
+ mlir::LLVM::LLVMFuncOp::getUseSampleProfileAttrName(llvmFuncOpName),
+ mlir::BoolAttr::get(context, true));
LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
}
diff --git a/flang/test/Driver/Inputs/pgo-sample.prof b/flang/test/Driver/Inputs/pgo-sample.prof
new file mode 100644
index 0000000000000..1ba66a38d713b
--- /dev/null
+++ b/flang/test/Driver/Inputs/pgo-sample.prof
@@ -0,0 +1,2 @@
+hot_:100:100
+ 2: 100
diff --git a/flang/test/Driver/fprofile-sample-use.f90 b/flang/test/Driver/fprofile-sample-use.f90
new file mode 100644
index 0000000000000..f2271081a019d
--- /dev/null
+++ b/flang/test/Driver/fprofile-sample-use.f90
@@ -0,0 +1,30 @@
+! Test to check the working of option "-fprofile-sample-use".
+
+! RUN: %flang -### -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s 2>&1 | FileCheck %s --check-prefix=PROFILE-SAMPLE-USE
+! RUN: %flang -### %s 2>&1 | FileCheck %s --check-prefix=NO-PROFILE-SAMPLE-USE
+! RUN: %flang -### -fprofile-sample-use=%S/Inputs/pgo-sample.prof -fno-profile-sample-use %s 2>&1 | FileCheck %s --check-prefix=NO-PROFILE-SAMPLE-USE
+! RUN: %flang -### -fno-profile-sample-use %s 2>&1 | FileCheck %s --check-prefix=NO-PROFILE-SAMPLE-USE
+! RUN: not %flang -fsyntax-only -fprofile-sample-use=%t/missing-profile.prof %s 2>&1 | FileCheck %s --check-prefix=PROFILE-SAMPLE-USE-NO-FILE
+! RUN: not %flang -fsyntax-only -fprofile-generate -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s 2>&1 | FileCheck %s --check-prefix=PROFILE-SAMPLE-USE-ERROR
+
+! PROFILE-SAMPLE-USE: "-fprofile-sample-use={{.*}}/Inputs/pgo-sample.prof"
+! NO-PROFILE-SAMPLE-USE-NOT: "-fprofile-sample-use"
+! PROFILE-SAMPLE-USE-NO-FILE: error: no such file or directory: {{.*}}missing-profile.prof{{.*}}
+! PROFILE-SAMPLE-USE-ERROR: error: invalid argument '-fprofile-generate' not allowed with '-fprofile-sample-use={{.*}}'
+
+integer function hot(x)
+ integer, intent(in) :: x
+ hot = x*2
+end function hot
+
+integer function cold(x)
+ integer, intent(in) :: x
+ cold = x - 10
+end function
+
+program test_sample_use
+ integer :: i, r
+ do i = 1, 100
+ r = hot(i)
+ end do
+ end program test_sample_use
diff --git a/flang/test/Integration/inputs/pgo-sample.prof b/flang/test/Integration/inputs/pgo-sample.prof
new file mode 100644
index 0000000000000..1ba66a38d713b
--- /dev/null
+++ b/flang/test/Integration/inputs/pgo-sample.prof
@@ -0,0 +1,2 @@
+hot_:100:100
+ 2: 100
diff --git a/flang/test/Integration/profile-sample-use.f90 b/flang/test/Integration/profile-sample-use.f90
new file mode 100644
index 0000000000000..c92a2f235a0ab
--- /dev/null
+++ b/flang/test/Integration/profile-sample-use.f90
@@ -0,0 +1,34 @@
+! Test to check the working of option "-fprofile-sample-use".
+! RUN: %flang -S -emit-llvm -g -fprofile-sample-use=%S/inputs/pgo-sample.prof -o - %s | FileCheck %s
+
+! CHECK: attributes #[[A:.*]] = { {{.*}}"use-sample-profile"{{.*}} }
+! CHECK: !{i32 {{.*}}, !"ProfileSummary"{{.*}}}
+! CHECK: !{!"ProfileFormat", !"SampleProfile"}
+! CHECK: !{!"TotalCount", i64 100}
+! CHECK: !{!"MaxCount", i64 100}
+! CHECK: !{!"MaxInternalCount", i64 0}
+! CHECK: !{!"MaxFunctionCount", i64 100}
+! CHECK: !{!"NumCounts", i64 1}
+! CHECK: !{!"NumFunctions", i64 1}
+! CHECK: !{!"IsPartialProfile", i64 0}
+! CHECK: !{!"PartialProfileRatio", double 0.000000e+00}
+! CHECK: distinct !DISubprogram(name: "hot", linkageName: "hot_", scope: !1
+! CHECK: !{!"function_entry_count", i64 101}
+
+integer function hot(x)
+ integer, intent(in) :: x
+ hot = x * 2
+end function hot
+
+integer function cold(x)
+ integer, intent(in) :: x
+ cold = x - 10
+end function cold
+
+program test_sample_use
+ implicit none
+ integer :: i, r
+ do i = 1, 100
+ r = hot(i)
+ end do
+end program test_sample_use
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 75c47f087f78e..c3cf417d6f0b9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -2051,7 +2051,8 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
OptionalAttr<DenseI32ArrayAttr>:$work_group_size_hint,
OptionalAttr<DenseI32ArrayAttr>:$reqd_work_group_size,
OptionalAttr<I32Attr>:$intel_reqd_sub_group_size,
- OptionalAttr<UWTableKindAttr>:$uwtable_kind
+ OptionalAttr<UWTableKindAttr>:$uwtable_kind,
+ OptionalAttr<BoolAttr>:$use_sample_profile
);
let regions = (region AnyRegion:$body);
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index 25aaccecc56a2..4ea16800d8982 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -2880,6 +2880,9 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func,
.value()));
}
+ if (func->hasFnAttribute("use-sample-profile"))
+ funcOp.setUseSampleProfile(true);
+
if (llvm::Attribute attr = func->getFnAttribute("target-cpu");
attr.isStringAttribute())
funcOp.setTargetCpuAttr(StringAttr::get(context, attr.getValueAsString()));
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index f4a6e5f6fc8f6..cf398f151ed0b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1564,6 +1564,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
if (auto preferVectorWidth = func.getPreferVectorWidth())
llvmFunc->addFnAttr("prefer-vector-width", *preferVectorWidth);
+ if (func.getUseSampleProfile())
+ llvmFunc->addFnAttr("use-sample-profile");
+
if (auto attr = func.getVscaleRange())
llvmFunc->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
getLLVMContext(), attr->getMinRange().getInt(),
More information about the cfe-commits
mailing list