[clang] [flang] [mlir] [LLVM-Flang][Options] Add support for '-fprofile-sample-use' option (PR #188697)

Kaviya Rajendiran via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 26 00:27:00 PDT 2026


https://github.com/kaviya2510 created https://github.com/llvm/llvm-project/pull/188697

This patch add support to option "-fprofile-sample-use=<sample.prof>" option in llvm-flang.

When the `-fprofile-sample-use=sample.prof` option is passed, the compiler records the profile file path in `SampleProfileFile` . This value is later used by the `SampleProfileLoaderPass`, which loads the sample profile and injects the corresponding profiling metadata in the LLVM IR.

>From 7b04d397d715598d2baa2855dcc5e2af657ade09 Mon Sep 17 00:00:00 2001
From: Kaviya Rajendiran <kaviyara2000 at gmail.com>
Date: Thu, 26 Mar 2026 12:26:57 +0530
Subject: [PATCH] [LLVM-Flang][Options] Add support for '-fprofile-sample-use'
 option

---
 clang/include/clang/Options/Options.td        |  4 +--
 clang/lib/Driver/ToolChains/Flang.cpp         | 23 +++++++++++++
 flang/include/flang/Frontend/CodeGenOptions.h |  3 ++
 .../flang/Optimizer/Transforms/Passes.td      |  4 +++
 flang/include/flang/Tools/CrossToolHelpers.h  |  2 ++
 flang/lib/Frontend/CompilerInvocation.cpp     |  3 ++
 flang/lib/Frontend/FrontendActions.cpp        |  6 ++++
 flang/lib/Optimizer/Passes/Pipelines.cpp      |  4 +--
 .../lib/Optimizer/Transforms/FunctionAttr.cpp |  4 +++
 flang/test/Driver/Inputs/pgo-sample.prof      |  2 ++
 flang/test/Driver/fprofile-sample-use.f90     | 30 ++++++++++++++++
 flang/test/Integration/inputs/pgo-sample.prof |  2 ++
 flang/test/Integration/profile-sample-use.f90 | 34 +++++++++++++++++++
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  3 +-
 mlir/lib/Target/LLVMIR/ModuleImport.cpp       |  3 ++
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  |  3 ++
 16 files changed, 125 insertions(+), 5 deletions(-)
 create mode 100644 flang/test/Driver/Inputs/pgo-sample.prof
 create mode 100644 flang/test/Driver/fprofile-sample-use.f90
 create mode 100644 flang/test/Integration/inputs/pgo-sample.prof
 create mode 100644 flang/test/Integration/profile-sample-use.f90

diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td
index 215d4e885709c..13fafa1c01bf5 100644
--- a/clang/include/clang/Options/Options.td
+++ b/clang/include/clang/Options/Options.td
@@ -1768,10 +1768,10 @@ defm gnu_inline_asm : BoolFOption<"gnu-inline-asm",
   PosFlag<SetTrue>>;
 
 def fno_profile_sample_use : Flag<["-"], "fno-profile-sample-use">, Group<f_Group>,
-    Visibility<[ClangOption, CLOption]>;
+    Visibility<[ClangOption, CLOption, FlangOption, FC1Option]>;
 def fprofile_sample_use_EQ : Joined<["-"], "fprofile-sample-use=">,
     Group<f_Group>,
-    Visibility<[ClangOption, CLOption, CC1Option]>,
+    Visibility<[ClangOption, CLOption, CC1Option, FlangOption, FC1Option]>,
     HelpText<"Enable sample-based profile guided optimizations">,
     MarshallingInfoString<CodeGenOpts<"SampleProfileFile">>;
 def fprofile_sample_accurate : Flag<["-"], "fprofile-sample-accurate">,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index c7716b52009f6..97e9425d13ea2 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -986,6 +986,9 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
   const Driver &D = TC.getDriver();
   ArgStringList CmdArgs;
 
+  bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
+  bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
+
   // Invoke ourselves in -fc1 mode.
   CmdArgs.push_back("-fc1");
 
@@ -1099,6 +1102,26 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.addAllArgs(
       CmdArgs, {options::OPT_fprofile_generate, options::OPT_fprofile_use_EQ});
 
+  if (!(IsCudaDevice || IsHIPDevice)) {
+    // recognise options: -fprofile-sample-use= and -fno-profile-sample-use=
+    if (Arg *A = getLastProfileSampleUseArg(Args)) {
+
+      auto *PGOArg = Args.getLastArg(options::OPT_fprofile_generate,
+                                     options::OPT_fprofile_generate_EQ);
+
+      if (PGOArg) {
+        D.Diag(diag::err_drv_argument_not_allowed_with)
+            << PGOArg->getAsString(Args) << A->getAsString(Args);
+      }
+
+      StringRef fname = A->getValue();
+      if (!llvm::sys::fs::exists(fname))
+        D.Diag(diag::err_drv_no_such_file) << fname;
+      else
+        A->render(Args, CmdArgs);
+    }
+  }
+
   // Forward flags for OpenMP. We don't do this if the current action is an
   // device offloading action other than OpenMP.
   if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h b/flang/include/flang/Frontend/CodeGenOptions.h
index 0fc0063128547..ac3392667d79e 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -181,6 +181,9 @@ class CodeGenOptions : public CodeGenOptionsBase {
   /// Output filename for the split debug info, not used in the skeleton CU.
   std::string SplitDwarfOutput;
 
+  /// Name of the profile file to use with -fprofile-sample-use.
+  std::string SampleProfileFile;
+
   /// Check if Clang profile instrumenation is on.
   bool hasProfileClangInstr() const {
     return getProfileInstr() == llvm::driver::ProfileClangInstr;
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 82d89c4df42c3..1d636038a72ce 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -463,6 +463,10 @@ def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
               /*default=*/"",
               "Set the prefer-vector-width attribute on functions in the "
               "module.">,
+       Option<"UseSampleProfile", "use-sample-profile", "bool",
+              /*default=*/"false",
+              "Set the use-sample-profile attribute on functions in the "
+              "module.">,
        Option<"tuneCPU", "tune-cpu", "std::string", /*default=*/"",
               "Set the tune-cpu attribute on functions in the module.">,
        Option<"setNoCapture", "set-nocapture", "bool", /*default=*/"false",
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 4415a0417be01..9c868c67099f5 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -106,6 +106,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
         ApproxFuncFPMath && mathOpts.getFPContractEnabled();
     Reciprocals = opts.Reciprocals;
     PreferVectorWidth = opts.PreferVectorWidth;
+    UseSampleProfile = !opts.SampleProfileFile.empty();
     DebugInfoForProfiling = opts.DebugInfoForProfiling;
     if (opts.InstrumentFunctions) {
       InstrumentFunctionEntry = "__cyg_profile_func_enter";
@@ -140,6 +141,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
                                       ///< functions.
   bool NSWOnLoopVarInc = true; ///< Add nsw flag to loop variable increments.
   bool EnableOpenMP = false; ///< Enable OpenMP lowering.
+  bool UseSampleProfile = false; /// Enable sample based profiling
   bool DebugInfoForProfiling = false; /// Enable extra debugging info
   bool EnableOpenMPSimd = false; ///< Enable OpenMP simd-only mode.
   bool SkipConvertComplexPow = false; ///< Do not run complex pow conversion.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 8d99fc9d4403e..4190be358b8fa 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -469,6 +469,9 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
     opts.ProfileInstrumentUsePath = A->getValue();
   }
 
+  opts.SampleProfileFile =
+      args.getLastArgValue(clang::options::OPT_fprofile_sample_use_EQ);
+
   // -mcmodel option.
   if (const llvm::opt::Arg *a =
           args.getLastArg(clang::options::OPT_mcmodel_EQ)) {
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index ec9e8f48693fe..4e058786a9a72 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -991,6 +991,12 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
                               llvm::PGOOptions::NoAction,
                               llvm::PGOOptions::NoCSAction,
                               llvm::PGOOptions::ColdFuncOpt::Default, true);
+  } else if (!opts.SampleProfileFile.empty()) {
+    pgoOpt = llvm::PGOOptions(
+        opts.SampleProfileFile, "", opts.ProfileRemappingFile,
+        opts.MemoryProfileUsePath, llvm::PGOOptions::SampleUse,
+        llvm::PGOOptions::NoCSAction, llvm::PGOOptions::ColdFuncOpt::Default,
+        opts.DebugInfoForProfiling, /*PseudoProbeForProfiling=*/false);
   }
 
   llvm::StandardInstrumentations si(llvmModule->getContext(),
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 2216fc0c68494..73e647a1c3956 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -420,8 +420,8 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
       {framePointerKind, config.InstrumentFunctionEntry,
        config.InstrumentFunctionExit, config.NoInfsFPMath, config.NoNaNsFPMath,
        config.ApproxFuncFPMath, config.NoSignedZerosFPMath, config.UnsafeFPMath,
-       config.Reciprocals, config.PreferVectorWidth, /*tuneCPU=*/"",
-       setNoCapture, setNoAlias}));
+       config.Reciprocals, config.PreferVectorWidth, config.UseSampleProfile,
+       /*tuneCPU=*/"", setNoCapture, setNoAlias}));
 
   if (config.EnableOpenMP) {
     pm.addNestedPass<mlir::func::FuncOp>(
diff --git a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
index 4655ed6ed0d40..c4e5d49d7e64f 100644
--- a/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
+++ b/flang/lib/Optimizer/Transforms/FunctionAttr.cpp
@@ -99,6 +99,10 @@ void FunctionAttrPass::runOnOperation() {
     func->setAttr(
         mlir::LLVM::LLVMFuncOp::getPreferVectorWidthAttrName(llvmFuncOpName),
         mlir::StringAttr::get(context, preferVectorWidth));
+  if (UseSampleProfile)
+    func->setAttr(
+        mlir::LLVM::LLVMFuncOp::getUseSampleProfileAttrName(llvmFuncOpName),
+        mlir::BoolAttr::get(context, true));
 
   LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
 }
diff --git a/flang/test/Driver/Inputs/pgo-sample.prof b/flang/test/Driver/Inputs/pgo-sample.prof
new file mode 100644
index 0000000000000..1ba66a38d713b
--- /dev/null
+++ b/flang/test/Driver/Inputs/pgo-sample.prof
@@ -0,0 +1,2 @@
+hot_:100:100
+  2: 100
diff --git a/flang/test/Driver/fprofile-sample-use.f90 b/flang/test/Driver/fprofile-sample-use.f90
new file mode 100644
index 0000000000000..f2271081a019d
--- /dev/null
+++ b/flang/test/Driver/fprofile-sample-use.f90
@@ -0,0 +1,30 @@
+! Test to check the working of option "-fprofile-sample-use".
+
+! RUN: %flang -### -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s 2>&1 | FileCheck %s --check-prefix=PROFILE-SAMPLE-USE
+! RUN: %flang -### %s 2>&1 | FileCheck %s --check-prefix=NO-PROFILE-SAMPLE-USE
+! RUN: %flang -### -fprofile-sample-use=%S/Inputs/pgo-sample.prof -fno-profile-sample-use %s 2>&1 | FileCheck %s --check-prefix=NO-PROFILE-SAMPLE-USE
+! RUN: %flang -### -fno-profile-sample-use %s 2>&1 | FileCheck %s --check-prefix=NO-PROFILE-SAMPLE-USE
+! RUN: not %flang -fsyntax-only -fprofile-sample-use=%t/missing-profile.prof %s 2>&1 | FileCheck %s --check-prefix=PROFILE-SAMPLE-USE-NO-FILE
+! RUN: not %flang -fsyntax-only -fprofile-generate -fprofile-sample-use=%S/Inputs/pgo-sample.prof %s 2>&1 | FileCheck %s --check-prefix=PROFILE-SAMPLE-USE-ERROR
+
+! PROFILE-SAMPLE-USE: "-fprofile-sample-use={{.*}}/Inputs/pgo-sample.prof"
+! NO-PROFILE-SAMPLE-USE-NOT: "-fprofile-sample-use"
+! PROFILE-SAMPLE-USE-NO-FILE: error: no such file or directory: {{.*}}missing-profile.prof{{.*}}
+! PROFILE-SAMPLE-USE-ERROR: error: invalid argument '-fprofile-generate' not allowed with '-fprofile-sample-use={{.*}}'
+
+integer function hot(x)
+   integer, intent(in) :: x
+   hot = x*2
+end function hot
+
+integer function cold(x)
+   integer, intent(in) :: x
+   cold = x - 10
+end function
+
+program test_sample_use
+    integer :: i, r
+    do i = 1, 100
+       r = hot(i)
+    end do
+ end program test_sample_use
diff --git a/flang/test/Integration/inputs/pgo-sample.prof b/flang/test/Integration/inputs/pgo-sample.prof
new file mode 100644
index 0000000000000..1ba66a38d713b
--- /dev/null
+++ b/flang/test/Integration/inputs/pgo-sample.prof
@@ -0,0 +1,2 @@
+hot_:100:100
+  2: 100
diff --git a/flang/test/Integration/profile-sample-use.f90 b/flang/test/Integration/profile-sample-use.f90
new file mode 100644
index 0000000000000..c92a2f235a0ab
--- /dev/null
+++ b/flang/test/Integration/profile-sample-use.f90
@@ -0,0 +1,34 @@
+! Test to check the working of option "-fprofile-sample-use".
+! RUN: %flang -S -emit-llvm -g -fprofile-sample-use=%S/inputs/pgo-sample.prof -o - %s | FileCheck %s
+
+! CHECK: attributes #[[A:.*]] = { {{.*}}"use-sample-profile"{{.*}} }
+! CHECK: !{i32 {{.*}}, !"ProfileSummary"{{.*}}}
+! CHECK: !{!"ProfileFormat", !"SampleProfile"}
+! CHECK: !{!"TotalCount", i64 100}
+! CHECK: !{!"MaxCount", i64 100}
+! CHECK: !{!"MaxInternalCount", i64 0}
+! CHECK: !{!"MaxFunctionCount", i64 100}
+! CHECK: !{!"NumCounts", i64 1}
+! CHECK: !{!"NumFunctions", i64 1}
+! CHECK: !{!"IsPartialProfile", i64 0}
+! CHECK: !{!"PartialProfileRatio", double 0.000000e+00}
+! CHECK: distinct !DISubprogram(name: "hot", linkageName: "hot_", scope: !1
+! CHECK: !{!"function_entry_count", i64 101}
+
+integer function hot(x)
+   integer, intent(in) :: x
+   hot = x * 2
+end function hot
+
+integer function cold(x)
+   integer, intent(in) :: x
+   cold = x - 10
+end function cold
+
+program test_sample_use
+   implicit none
+   integer :: i, r
+   do i = 1, 100
+      r = hot(i)
+   end do
+end program test_sample_use
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 75c47f087f78e..c3cf417d6f0b9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -2051,7 +2051,8 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [
     OptionalAttr<DenseI32ArrayAttr>:$work_group_size_hint,
     OptionalAttr<DenseI32ArrayAttr>:$reqd_work_group_size,
     OptionalAttr<I32Attr>:$intel_reqd_sub_group_size,
-    OptionalAttr<UWTableKindAttr>:$uwtable_kind
+    OptionalAttr<UWTableKindAttr>:$uwtable_kind,
+    OptionalAttr<BoolAttr>:$use_sample_profile
   );
 
   let regions = (region AnyRegion:$body);
diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
index 25aaccecc56a2..4ea16800d8982 100644
--- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp
@@ -2880,6 +2880,9 @@ void ModuleImport::processFunctionAttributes(llvm::Function *func,
                                  .value()));
   }
 
+  if (func->hasFnAttribute("use-sample-profile"))
+    funcOp.setUseSampleProfile(true);
+
   if (llvm::Attribute attr = func->getFnAttribute("target-cpu");
       attr.isStringAttribute())
     funcOp.setTargetCpuAttr(StringAttr::get(context, attr.getValueAsString()));
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index f4a6e5f6fc8f6..cf398f151ed0b 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -1564,6 +1564,9 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
   if (auto preferVectorWidth = func.getPreferVectorWidth())
     llvmFunc->addFnAttr("prefer-vector-width", *preferVectorWidth);
 
+  if (func.getUseSampleProfile())
+    llvmFunc->addFnAttr("use-sample-profile");
+
   if (auto attr = func.getVscaleRange())
     llvmFunc->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
         getLLVMContext(), attr->getMinRange().getInt(),



More information about the cfe-commits mailing list