[flang-commits] [flang] [flang][Driver] Enable -Os and -Oz in flang (PR #164707)
Tarun Prabhu via flang-commits
flang-commits at lists.llvm.org
Sun Oct 26 10:57:00 PDT 2025
https://github.com/tarunprabhu updated https://github.com/llvm/llvm-project/pull/164707
>From 2d755d71187888356af0a394a6d1d2d6cfeb88e0 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Wed, 22 Oct 2025 14:44:31 -0600
Subject: [PATCH 1/3] [flang] Enable -Os and -Oz in flang
The implementation adheres closely to the implementation in clang. The effect
on the pass pipelines have been tested.
---
.../include/flang/Frontend/CodeGenOptions.def | 2 ++
flang/lib/Frontend/CompilerInvocation.cpp | 23 +++++++++++++++++++
flang/lib/Frontend/FrontendActions.cpp | 11 ++++++++-
.../Driver/default-optimization-pipelines.f90 | 14 ++++++++++-
4 files changed, 48 insertions(+), 2 deletions(-)
diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index dc3da7ba5c7f3..5892d3ef24e5a 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -20,6 +20,8 @@ CODEGENOPT(Name, Bits, Default)
#endif
CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
+/// The -Os (==1) or -Oz (==2) option is specified.
+CODEGENOPT(OptimizeSize, 2, 0)
CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
///< pass manager.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 548ca675db5ea..6dfff3b001aea 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -114,6 +114,10 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
assert(a->getOption().matches(clang::driver::options::OPT_O));
+ llvm::StringRef s(a->getValue());
+ if (s == "s" || s == "z")
+ return 2;
+
return getLastArgIntValue(args, clang::driver::options::OPT_O, defaultOpt,
diags);
}
@@ -121,6 +125,24 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
return defaultOpt;
}
+/// Extracts the size-optimization level from \a args
+static unsigned getOptimizationLevelSize(llvm::opt::ArgList &args) {
+ if (llvm::opt::Arg *a =
+ args.getLastArg(clang::driver::options::OPT_O_Group)) {
+ if (a->getOption().matches(clang::driver::options::OPT_O)) {
+ switch (a->getValue()[0]) {
+ default:
+ return 0;
+ case 's':
+ return 1;
+ case 'z':
+ return 2;
+ }
+ }
+ }
+ return 0;
+}
+
bool Fortran::frontend::parseDiagnosticArgs(clang::DiagnosticOptions &opts,
llvm::opt::ArgList &args) {
opts.ShowColors = parseShowColorsArgs(args);
@@ -273,6 +295,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
llvm::opt::ArgList &args,
clang::DiagnosticsEngine &diags) {
opts.OptimizationLevel = getOptimizationLevel(args, diags);
+ opts.OptimizeSize = getOptimizationLevelSize(args);
if (args.hasFlag(clang::driver::options::OPT_fdebug_pass_manager,
clang::driver::options::OPT_fno_debug_pass_manager, false))
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 0c630d2ba876d..d6fb98bc930c6 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -602,7 +602,16 @@ mapToLevel(const Fortran::frontend::CodeGenOptions &opts) {
case 1:
return llvm::OptimizationLevel::O1;
case 2:
- return llvm::OptimizationLevel::O2;
+ switch (opts.OptimizeSize) {
+ default:
+ llvm_unreachable("Invalid optimization level for size!");
+ case 0:
+ return llvm::OptimizationLevel::O2;
+ case 1:
+ return llvm::OptimizationLevel::Os;
+ case 2:
+ return llvm::OptimizationLevel::Oz;
+ }
case 3:
return llvm::OptimizationLevel::O3;
}
diff --git a/flang/test/Driver/default-optimization-pipelines.f90 b/flang/test/Driver/default-optimization-pipelines.f90
index 08e407f73da5c..18108cd632220 100644
--- a/flang/test/Driver/default-optimization-pipelines.f90
+++ b/flang/test/Driver/default-optimization-pipelines.f90
@@ -14,10 +14,16 @@
! RUN: %flang_fc1 -S -O2 %s -flto=full -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-LTO
! RUN: %flang_fc1 -S -O2 %s -flto=thin -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-THINLTO
-! Verify that only the left-most `-O{n}` is used
+! Verify that only the right-most `-O{n}` is used
! RUN: %flang -S -O2 -O0 %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
! RUN: %flang_fc1 -S -O2 -O0 %s -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
+! Verify that passing -Os/-Oz have the desired effect on the pass pipelines.
+! RUN: %flang -S -Os %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN: | FileCheck %s --check-prefix=CHECK-OSIZE
+! RUN: %flang -S -Oz %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN: | FileCheck %s --check-prefix=CHECK-OSIZE
+
! CHECK-O0-NOT: Running pass: SimplifyCFGPass on simple_loop_
! CHECK-O0: Running analysis: TargetLibraryAnalysis on simple_loop_
! CHECK-O0-ANYLTO: Running pass: CanonicalizeAliasesPass on [module]
@@ -33,6 +39,12 @@
! CHECK-O2-THINLTO: Running pass: CanonicalizeAliasesPass on [module]
! CHECK-O2-THINLTO: Running pass: NameAnonGlobalPass on [module]
+! -Os/-Oz imply -O2, so check that a pass that runs on O2 is run. Then check
+! that passes like LibShrinkWrap, that should not be run when optimizing for
+! size, are not run (see llvm/lib/Passes/PassBuilderPipelines.cpp).
+! CHECK-OSIZE: Running pass: SimplifyCFGPass on simple_loop_
+! CHECK-OSIZE-NOT: Running pass: LibCallsShrinkWrapPass on simple_loop_
+
subroutine simple_loop
integer :: i
do i=1,5
>From c5fc8ef95f9775890cd022b59a7f6855950814d4 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun at lanl.gov>
Date: Thu, 23 Oct 2025 12:14:18 -0600
Subject: [PATCH 2/3] Do not run some MLIR passes when compiling with -Os/-Oz
since they may result in an increase in code size
---
flang/lib/Optimizer/Passes/Pipelines.cpp | 36 ++++++++++++++----------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 103e736accca0..c1b0bbc231652 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -169,20 +169,22 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) {
/// \param pm - MLIR pass manager that will hold the pipeline definition
void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig &pc) {
+ llvm::OptimizationLevel optLevel = pc.OptLevel;
+
// Early Optimizer EP Callback
- pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel);
+ pc.invokeFIROptEarlyEPCallbacks(pm, optLevel);
// simplify the IR
mlir::GreedyRewriteConfig config;
config.setRegionSimplificationLevel(
mlir::GreedySimplifyRegionLevel::Disabled);
pm.addPass(mlir::createCSEPass());
- fir::addAVC(pm, pc.OptLevel);
+ fir::addAVC(pm, optLevel);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createCharacterConversion);
pm.addPass(mlir::createCanonicalizerPass(config));
pm.addPass(fir::createSimplifyRegionLite());
- if (pc.OptLevel.isOptimizingForSpeed()) {
+ if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize()) {
// These passes may increase code size.
pm.addPass(fir::createSimplifyIntrinsics());
pm.addPass(fir::createAlgebraicSimplificationPass(config));
@@ -190,7 +192,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConstantArgumentGlobalisationOpt());
}
- if (pc.LoopVersioning)
+ if (pc.LoopVersioning && !optLevel.isOptimizingForSize())
pm.addPass(fir::createLoopVersioning());
pm.addPass(mlir::createCSEPass());
@@ -201,7 +203,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
fir::addMemoryAllocationOpt(pm);
// FIR Inliner Callback
- pc.invokeFIRInlinerCallback(pm, pc.OptLevel);
+ pc.invokeFIRInlinerCallback(pm, optLevel);
pm.addPass(fir::createSimplifyRegionLite());
pm.addPass(mlir::createCSEPass());
@@ -212,13 +214,14 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
// Optimize redundant array repacking operations,
// if the source is known to be contiguous.
- if (pc.OptLevel.isOptimizingForSpeed())
+ if (optLevel.isOptimizingForSpeed())
pm.addPass(fir::createOptimizeArrayRepacking());
pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
- pm.addPass(fir::createSimplifyFIROperations(
- {/*preferInlineImplementation=*/pc.OptLevel.isOptimizingForSpeed()}));
+ bool preferInlineImplementation =
+ optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize();
+ pm.addPass(fir::createSimplifyFIROperations({preferInlineImplementation}));
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createStackReclaim);
@@ -232,11 +235,11 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConvertComplexPow());
pm.addPass(mlir::createCSEPass());
- if (pc.OptLevel.isOptimizingForSpeed())
+ if (optLevel.isOptimizingForSpeed())
pm.addPass(fir::createSetRuntimeCallAttributes());
// Last Optimizer EP Callback
- pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel);
+ pc.invokeFIROptLastEPCallbacks(pm, optLevel);
}
/// Create a pass pipeline for lowering from HLFIR to FIR
@@ -256,8 +259,9 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createSimplifyHLFIRIntrinsics);
}
- addNestedPassToAllTopLevelOperations<PassConstructor>(
- pm, hlfir::createInlineElementals);
+ if (!optLevel.isOptimizingForSize())
+ addNestedPassToAllTopLevelOperations<PassConstructor>(
+ pm, hlfir::createInlineElementals);
if (optLevel.isOptimizingForSpeed()) {
addCanonicalizerPassWithoutRegionSimplification(pm);
pm.addPass(mlir::createCSEPass());
@@ -271,8 +275,10 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
pm, hlfir::createPropagateFortranVariableAttributes);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createOptimizedBufferization);
- addNestedPassToAllTopLevelOperations<PassConstructor>(
- pm, hlfir::createInlineHLFIRAssign);
+
+ if (!optLevel.isOptimizingForSize())
+ addNestedPassToAllTopLevelOperations<PassConstructor>(
+ pm, hlfir::createInlineHLFIRAssign);
if (optLevel == llvm::OptimizationLevel::O3) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
@@ -297,7 +303,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
// TODO: we can remove the previous InlineHLFIRAssign, when
// FIR AliasAnalysis is good enough to say that a temporary
// array does not alias with any user object.
- if (optLevel.isOptimizingForSpeed())
+ if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize())
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
pm.addPass(hlfir::createConvertHLFIRtoFIR());
>From 7154eb8449809179f459d3ae72f01f02e1c441ce Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Sun, 26 Oct 2025 11:56:46 -0600
Subject: [PATCH 3/3] Use explicit speedup and size levels since
isOptimizingForSize assumes that speedLevel == 0, when, in fact, sizeLevel >
1 => speedupLevel == 2.
---
flang/lib/Optimizer/Passes/Pipelines.cpp | 43 ++++++++++++++----------
1 file changed, 25 insertions(+), 18 deletions(-)
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index c1b0bbc231652..7da3e0c0a888a 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -56,7 +56,7 @@ void addCfgConversionPass(mlir::PassManager &pm,
void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) {
ArrayValueCopyOptions options;
- options.optimizeConflicts = optLevel.isOptimizingForSpeed();
+ options.optimizeConflicts = optLevel != llvm::OptimizationLevel::O0;
addNestedPassConditionally<mlir::func::FuncOp>(
pm, disableFirAvc, [&]() { return createArrayValueCopyPass(options); });
}
@@ -170,6 +170,8 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) {
void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig &pc) {
llvm::OptimizationLevel optLevel = pc.OptLevel;
+ unsigned speedupLevel = optLevel.getSpeedupLevel();
+ unsigned sizeLevel = optLevel.getSizeLevel();
// Early Optimizer EP Callback
pc.invokeFIROptEarlyEPCallbacks(pm, optLevel);
@@ -184,7 +186,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm, fir::createCharacterConversion);
pm.addPass(mlir::createCanonicalizerPass(config));
pm.addPass(fir::createSimplifyRegionLite());
- if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize()) {
+ if (speedupLevel && !sizeLevel) {
// These passes may increase code size.
pm.addPass(fir::createSimplifyIntrinsics());
pm.addPass(fir::createAlgebraicSimplificationPass(config));
@@ -192,7 +194,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConstantArgumentGlobalisationOpt());
}
- if (pc.LoopVersioning && !optLevel.isOptimizingForSize())
+ if (pc.LoopVersioning && !sizeLevel)
pm.addPass(fir::createLoopVersioning());
pm.addPass(mlir::createCSEPass());
@@ -214,13 +216,12 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
// Optimize redundant array repacking operations,
// if the source is known to be contiguous.
- if (optLevel.isOptimizingForSpeed())
+ if (speedupLevel)
pm.addPass(fir::createOptimizeArrayRepacking());
pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
- bool preferInlineImplementation =
- optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize();
+ bool preferInlineImplementation = speedupLevel && !sizeLevel;
pm.addPass(fir::createSimplifyFIROperations({preferInlineImplementation}));
addNestedPassToAllTopLevelOperations<PassConstructor>(
@@ -235,7 +236,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createConvertComplexPow());
pm.addPass(mlir::createCSEPass());
- if (optLevel.isOptimizingForSpeed())
+ if (speedupLevel)
pm.addPass(fir::createSetRuntimeCallAttributes());
// Last Optimizer EP Callback
@@ -250,19 +251,24 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
EnableOpenMP enableOpenMP,
llvm::OptimizationLevel optLevel) {
- if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) {
+ // if sizeLevel > 0 (this is the case when either -Os or -Oz is provided on
+ // the command line), the speedupLevel is guaranteed to be 2.
+ unsigned speedupLevel = optLevel.getSpeedupLevel();
+ unsigned sizeLevel = optLevel.getSizeLevel();
+
+ if (speedupLevel) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createExpressionSimplification);
}
- if (optLevel.isOptimizingForSpeed()) {
+ if (speedupLevel) {
addCanonicalizerPassWithoutRegionSimplification(pm);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createSimplifyHLFIRIntrinsics);
}
- if (!optLevel.isOptimizingForSize())
+ if (!sizeLevel)
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineElementals);
- if (optLevel.isOptimizingForSpeed()) {
+ if (speedupLevel) {
addCanonicalizerPassWithoutRegionSimplification(pm);
pm.addPass(mlir::createCSEPass());
// Run SimplifyHLFIRIntrinsics pass late after CSE,
@@ -275,8 +281,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
pm, hlfir::createPropagateFortranVariableAttributes);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createOptimizedBufferization);
-
- if (!optLevel.isOptimizingForSize())
+ if (!sizeLevel)
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
@@ -293,7 +298,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
// from hlfir.elemental lowering, if the result is an empty array.
// This helps to avoid long running loops for elementals with
// shapes like (0, HUGE).
- if (optLevel.isOptimizingForSpeed())
+ if (speedupLevel)
bufferizeOptions.optimizeEmptyElementals = true;
pm.addPass(hlfir::createBufferizeHLFIR(bufferizeOptions));
// Run hlfir.assign inlining again after BufferizeHLFIR,
@@ -303,7 +308,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
// TODO: we can remove the previous InlineHLFIRAssign, when
// FIR AliasAnalysis is good enough to say that a temporary
// array does not alias with any user object.
- if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize())
+ if (speedupLevel && !sizeLevel)
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);
pm.addPass(hlfir::createConvertHLFIRtoFIR());
@@ -360,10 +365,12 @@ void createDebugPasses(mlir::PassManager &pm,
void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
MLIRToLLVMPassPipelineConfig config,
llvm::StringRef inputFilename) {
+ unsigned speedupLevel = config.OptLevel.getSpeedupLevel();
+
pm.addPass(fir::createMIFOpConversion());
fir::addBoxedProcedurePass(pm);
- if (config.OptLevel.isOptimizingForSpeed() && config.AliasAnalysis &&
- !disableFirAliasTags && !useOldAliasTags)
+ if (speedupLevel && config.AliasAnalysis && !disableFirAliasTags &&
+ !useOldAliasTags)
pm.addPass(fir::createAddAliasTags());
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createAbstractResultOpt);
@@ -395,7 +402,7 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
// TODO: re-enable setNoAlias by default (when optimizing for speed) once
// function specialization is fixed.
bool setNoAlias = forceNoAlias;
- bool setNoCapture = config.OptLevel.isOptimizingForSpeed();
+ bool setNoCapture = speedupLevel;
pm.addPass(fir::createFunctionAttr(
{framePointerKind, config.InstrumentFunctionEntry,
More information about the flang-commits
mailing list