[flang-commits] [flang] [flang][Driver] Enable -Os and -Oz in flang (PR #164707)
Tarun Prabhu via flang-commits
flang-commits at lists.llvm.org
Thu Oct 23 11:14:30 PDT 2025
https://github.com/tarunprabhu updated https://github.com/llvm/llvm-project/pull/164707
>From 2d755d71187888356af0a394a6d1d2d6cfeb88e0 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Wed, 22 Oct 2025 14:44:31 -0600
Subject: [PATCH 1/2] [flang] Enable -Os and -Oz in flang
The implementation adheres closely to the implementation in clang. The effect
on the pass pipelines have been tested.
---
.../include/flang/Frontend/CodeGenOptions.def | 2 ++
flang/lib/Frontend/CompilerInvocation.cpp | 23 +++++++++++++++++++
flang/lib/Frontend/FrontendActions.cpp | 11 ++++++++-
.../Driver/default-optimization-pipelines.f90 | 14 ++++++++++-
4 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index dc3da7ba5c7f3..5892d3ef24e5a 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -20,6 +20,8 @@ CODEGENOPT(Name, Bits, Default)
#endif
CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
+/// The -Os (==1) or -Oz (==2) option is specified.
+CODEGENOPT(OptimizeSize, 2, 0)
CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
///< pass manager.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 548ca675db5ea..6dfff3b001aea 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -114,6 +114,10 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
assert(a->getOption().matches(clang::driver::options::OPT_O));
+ llvm::StringRef s(a->getValue());
+ if (s == "s" || s == "z")
+ return 2;
+
return getLastArgIntValue(args, clang::driver::options::OPT_O, defaultOpt,
diags);
}
@@ -121,6 +125,24 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
return defaultOpt;
}
+/// Extracts the size-optimization level from \a args
+static unsigned getOptimizationLevelSize(llvm::opt::ArgList &args) {
+ if (llvm::opt::Arg *a =
+ args.getLastArg(clang::driver::options::OPT_O_Group)) {
+ if (a->getOption().matches(clang::driver::options::OPT_O)) {
+ switch (a->getValue()[0]) {
+ default:
+ return 0;
+ case 's':
+ return 1;
+ case 'z':
+ return 2;
+ }
+ }
+ }
+ return 0;
+}
+
bool Fortran::frontend::parseDiagnosticArgs(clang::DiagnosticOptions &opts,
llvm::opt::ArgList &args) {
opts.ShowColors = parseShowColorsArgs(args);
@@ -273,6 +295,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
llvm::opt::ArgList &args,
clang::DiagnosticsEngine &diags) {
opts.OptimizationLevel = getOptimizationLevel(args, diags);
+ opts.OptimizeSize = getOptimizationLevelSize(args);
if (args.hasFlag(clang::driver::options::OPT_fdebug_pass_manager,
clang::driver::options::OPT_fno_debug_pass_manager, false))
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 0c630d2ba876d..d6fb98bc930c6 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -602,7 +602,16 @@ mapToLevel(const Fortran::frontend::CodeGenOptions &opts) {
case 1:
return llvm::OptimizationLevel::O1;
case 2:
- return llvm::OptimizationLevel::O2;
+ switch (opts.OptimizeSize) {
+ default:
+ llvm_unreachable("Invalid optimization level for size!");
+ case 0:
+ return llvm::OptimizationLevel::O2;
+ case 1:
+ return llvm::OptimizationLevel::Os;
+ case 2:
+ return llvm::OptimizationLevel::Oz;
+ }
case 3:
return llvm::OptimizationLevel::O3;
}
diff --git a/flang/test/Driver/default-optimization-pipelines.f90 b/flang/test/Driver/default-optimization-pipelines.f90
index 08e407f73da5c..18108cd632220 100644
--- a/flang/test/Driver/default-optimization-pipelines.f90
+++ b/flang/test/Driver/default-optimization-pipelines.f90
@@ -14,10 +14,16 @@
! RUN: %flang_fc1 -S -O2 %s -flto=full -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-LTO
! RUN: %flang_fc1 -S -O2 %s -flto=thin -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-THINLTO
-! Verify that only the left-most `-O{n}` is used
+! Verify that only the right-most `-O{n}` is used
! RUN: %flang -S -O2 -O0 %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
! RUN: %flang_fc1 -S -O2 -O0 %s -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
+! Verify that passing -Os/-Oz have the desired effect on the pass pipelines.
+! RUN: %flang -S -Os %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN: | FileCheck %s --check-prefix=CHECK-OSIZE
+! RUN: %flang -S -Oz %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN: | FileCheck %s --check-prefix=CHECK-OSIZE
+
! CHECK-O0-NOT: Running pass: SimplifyCFGPass on simple_loop_
! CHECK-O0: Running analysis: TargetLibraryAnalysis on simple_loop_
! CHECK-O0-ANYLTO: Running pass: CanonicalizeAliasesPass on [module]
@@ -33,6 +39,12 @@
! CHECK-O2-THINLTO: Running pass: CanonicalizeAliasesPass on [module]
! CHECK-O2-THINLTO: Running pass: NameAnonGlobalPass on [module]
+! -Os/-Oz imply -O2, so check that a pass that runs on O2 is run. Then check
+! that passes like LibShrinkWrap, that should not be run when optimizing for
+! size, are not run (see llvm/lib/Passes/PassBuilderPipelines.cpp).
+! CHECK-OSIZE: Running pass: SimplifyCFGPass on simple_loop_
+! CHECK-OSIZE-NOT: Running pass: LibCallsShrinkWrapPass on simple_loop_
+
subroutine simple_loop
integer :: i
do i=1,5
>From c5fc8ef95f9775890cd022b59a7f6855950814d4 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun at lanl.gov>
Date: Thu, 23 Oct 2025 12:14:18 -0600
Subject: [PATCH 2/2] Do not run some MLIR passes when compiling with -Os/-Oz
since they may result in an increase in code size
---
flang/lib/Optimizer/Passes/Pipelines.cpp | 36 ++++++++++++++----------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 103e736accca0..c1b0bbc231652 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -169,20 +169,22 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) {
/// \param pm - MLIR pass manager that will hold the pipeline definition
void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig &pc) {
+ llvm::OptimizationLevel optLevel = pc.OptLevel;
+
// Early Optimizer EP Callback
- pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel);
+ pc.invokeFIROptEarlyEPCallbacks(pm, optLevel);
// simplify the IR
mlir::GreedyRewriteConfig config;
config.setRegionSimplificationLevel(
mlir::GreedySimplifyRegionLevel::Disabled);
pm.addPass(mlir::createCSEPass());
- fir::addAVC(pm, pc.OptLevel);
+ fir::addAVC(pm, optLevel);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createCharacterConversion);
pm.addPass(mlir::createCanonicalizerPass(config));
pm.addPass(fir::createSimplifyRegionLite());
- if (pc.OptLevel.isOptimizingForSpeed()) {
+ if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize()) {
// These passes may increase code size.
pm.addPass(fir::createSimplifyIntrinsics());
pm.addPass(fir::createAlgebraicSimplificationPass(config));
@@ -190,7 +192,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConstantArgumentGlobalisationOpt());
}
- if (pc.LoopVersioning)
+ if (pc.LoopVersioning && !optLevel.isOptimizingForSize())
pm.addPass(fir::createLoopVersioning());
pm.addPass(mlir::createCSEPass());
@@ -201,7 +203,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
fir::addMemoryAllocationOpt(pm);
// FIR Inliner Callback
- pc.invokeFIRInlinerCallback(pm, pc.OptLevel);
+ pc.invokeFIRInlinerCallback(pm, optLevel);
pm.addPass(fir::createSimplifyRegionLite());
pm.addPass(mlir::createCSEPass());
@@ -212,13 +214,14 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
// Optimize redundant array repacking operations,
// if the source is known to be contiguous.
- if (pc.OptLevel.isOptimizingForSpeed())
+ if (optLevel.isOptimizingForSpeed())
pm.addPass(fir::createOptimizeArrayRepacking());
pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
- pm.addPass(fir::createSimplifyFIROperations(
- {/*preferInlineImplementation=*/pc.OptLevel.isOptimizingForSpeed()}));
+ bool preferInlineImplementation =
+ optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize();
+ pm.addPass(fir::createSimplifyFIROperations({preferInlineImplementation}));
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createStackReclaim);
@@ -232,11 +235,11 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConvertComplexPow());
pm.addPass(mlir::createCSEPass());
- if (pc.OptLevel.isOptimizingForSpeed())
+ if (optLevel.isOptimizingForSpeed())
pm.addPass(fir::createSetRuntimeCallAttributes());
// Last Optimizer EP Callback
- pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel);
+ pc.invokeFIROptLastEPCallbacks(pm, optLevel);
}
/// Create a pass pipeline for lowering from HLFIR to FIR
@@ -256,8 +259,9 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createSimplifyHLFIRIntrinsics);
}
- addNestedPassToAllTopLevelOperations<PassConstructor>(
- pm, hlfir::createInlineElementals);
+ if (!optLevel.isOptimizingForSize())
+ addNestedPassToAllTopLevelOperations<PassConstructor>(
+ pm, hlfir::createInlineElementals);
if (optLevel.isOptimizingForSpeed()) {
addCanonicalizerPassWithoutRegionSimplification(pm);
pm.addPass(mlir::createCSEPass());
@@ -271,8 +275,10 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
pm, hlfir::createPropagateFortranVariableAttributes);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createOptimizedBufferization);
- addNestedPassToAllTopLevelOperations<PassConstructor>(
- pm, hlfir::createInlineHLFIRAssign);
+
+ if (!optLevel.isOptimizingForSize())
+ addNestedPassToAllTopLevelOperations<PassConstructor>(
+ pm, hlfir::createInlineHLFIRAssign);
if (optLevel == llvm::OptimizationLevel::O3) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
@@ -297,7 +303,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
// TODO: we can remove the previous InlineHLFIRAssign, when
// FIR AliasAnalysis is good enough to say that a temporary
// array does not alias with any user object.
- if (optLevel.isOptimizingForSpeed())
+ if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize())
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
pm.addPass(hlfir::createConvertHLFIRtoFIR());
More information about the flang-commits
mailing list