[flang-commits] [flang] [flang][Driver] Enable -Os and -Oz in flang (PR #164707)

Tarun Prabhu via flang-commits flang-commits at lists.llvm.org
Thu Oct 23 11:14:30 PDT 2025


https://github.com/tarunprabhu updated https://github.com/llvm/llvm-project/pull/164707

>From 2d755d71187888356af0a394a6d1d2d6cfeb88e0 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Wed, 22 Oct 2025 14:44:31 -0600
Subject: [PATCH 1/2] [flang] Enable -Os and -Oz in flang

The implementation adheres closely to the implementation in clang. The effect
on the pass pipelines have been tested.
---
 .../include/flang/Frontend/CodeGenOptions.def |  2 ++
 flang/lib/Frontend/CompilerInvocation.cpp     | 23 +++++++++++++++++++
 flang/lib/Frontend/FrontendActions.cpp        | 11 ++++++++-
 .../Driver/default-optimization-pipelines.f90 | 14 ++++++++++-
 4 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index dc3da7ba5c7f3..5892d3ef24e5a 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -20,6 +20,8 @@ CODEGENOPT(Name, Bits, Default)
 #endif
 
 CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
+/// The -Os (==1) or -Oz (==2) option is specified.
+CODEGENOPT(OptimizeSize, 2, 0) 
 
 CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
                                    ///< pass manager.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 548ca675db5ea..6dfff3b001aea 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -114,6 +114,10 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
 
     assert(a->getOption().matches(clang::driver::options::OPT_O));
 
+    llvm::StringRef s(a->getValue());
+    if (s == "s" || s == "z")
+      return 2;
+
     return getLastArgIntValue(args, clang::driver::options::OPT_O, defaultOpt,
                               diags);
   }
@@ -121,6 +125,24 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
   return defaultOpt;
 }
 
+/// Extracts the size-optimization level from \a args
+static unsigned getOptimizationLevelSize(llvm::opt::ArgList &args) {
+  if (llvm::opt::Arg *a =
+          args.getLastArg(clang::driver::options::OPT_O_Group)) {
+    if (a->getOption().matches(clang::driver::options::OPT_O)) {
+      switch (a->getValue()[0]) {
+      default:
+        return 0;
+      case 's':
+        return 1;
+      case 'z':
+        return 2;
+      }
+    }
+  }
+  return 0;
+}
+
 bool Fortran::frontend::parseDiagnosticArgs(clang::DiagnosticOptions &opts,
                                             llvm::opt::ArgList &args) {
   opts.ShowColors = parseShowColorsArgs(args);
@@ -273,6 +295,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
                              llvm::opt::ArgList &args,
                              clang::DiagnosticsEngine &diags) {
   opts.OptimizationLevel = getOptimizationLevel(args, diags);
+  opts.OptimizeSize = getOptimizationLevelSize(args);
 
   if (args.hasFlag(clang::driver::options::OPT_fdebug_pass_manager,
                    clang::driver::options::OPT_fno_debug_pass_manager, false))
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 0c630d2ba876d..d6fb98bc930c6 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -602,7 +602,16 @@ mapToLevel(const Fortran::frontend::CodeGenOptions &opts) {
   case 1:
     return llvm::OptimizationLevel::O1;
   case 2:
-    return llvm::OptimizationLevel::O2;
+    switch (opts.OptimizeSize) {
+    default:
+      llvm_unreachable("Invalid optimization level for size!");
+    case 0:
+      return llvm::OptimizationLevel::O2;
+    case 1:
+      return llvm::OptimizationLevel::Os;
+    case 2:
+      return llvm::OptimizationLevel::Oz;
+    }
   case 3:
     return llvm::OptimizationLevel::O3;
   }
diff --git a/flang/test/Driver/default-optimization-pipelines.f90 b/flang/test/Driver/default-optimization-pipelines.f90
index 08e407f73da5c..18108cd632220 100644
--- a/flang/test/Driver/default-optimization-pipelines.f90
+++ b/flang/test/Driver/default-optimization-pipelines.f90
@@ -14,10 +14,16 @@
 ! RUN: %flang_fc1 -S -O2 %s -flto=full -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-LTO
 ! RUN: %flang_fc1 -S -O2 %s -flto=thin -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-THINLTO
 
-! Verify that only the left-most `-O{n}` is used
+! Verify that only the right-most `-O{n}` is used
 ! RUN: %flang -S -O2 -O0 %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
 ! RUN: %flang_fc1 -S -O2 -O0 %s -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
 
+! Verify that passing -Os/-Oz have the desired effect on the pass pipelines.
+! RUN: %flang -S -Os %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN:     | FileCheck %s --check-prefix=CHECK-OSIZE
+! RUN: %flang -S -Oz %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN:     | FileCheck %s --check-prefix=CHECK-OSIZE
+
 ! CHECK-O0-NOT: Running pass: SimplifyCFGPass on simple_loop_
 ! CHECK-O0: Running analysis: TargetLibraryAnalysis on simple_loop_
 ! CHECK-O0-ANYLTO: Running pass: CanonicalizeAliasesPass on [module]
@@ -33,6 +39,12 @@
 ! CHECK-O2-THINLTO: Running pass: CanonicalizeAliasesPass on [module]
 ! CHECK-O2-THINLTO: Running pass: NameAnonGlobalPass on [module]
 
+! -Os/-Oz imply -O2, so check that a pass that runs on O2 is run. Then check
+! that passes like LibShrinkWrap, that should not be run when optimizing for
+! size, are not run (see llvm/lib/Passes/PassBuilderPipelines.cpp).
+! CHECK-OSIZE: Running pass: SimplifyCFGPass on simple_loop_
+! CHECK-OSIZE-NOT: Running pass: LibCallsShrinkWrapPass on simple_loop_
+
 subroutine simple_loop
   integer :: i
   do i=1,5

>From c5fc8ef95f9775890cd022b59a7f6855950814d4 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun at lanl.gov>
Date: Thu, 23 Oct 2025 12:14:18 -0600
Subject: [PATCH 2/2] Do not run some MLIR passes when compiling with -Os/-Oz
 since they may result in an increase in code size

---
 flang/lib/Optimizer/Passes/Pipelines.cpp | 36 ++++++++++++++----------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 103e736accca0..c1b0bbc231652 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -169,20 +169,22 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) {
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
                                            MLIRToLLVMPassPipelineConfig &pc) {
+  llvm::OptimizationLevel optLevel = pc.OptLevel;
+
   // Early Optimizer EP Callback
-  pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel);
+  pc.invokeFIROptEarlyEPCallbacks(pm, optLevel);
 
   // simplify the IR
   mlir::GreedyRewriteConfig config;
   config.setRegionSimplificationLevel(
       mlir::GreedySimplifyRegionLevel::Disabled);
   pm.addPass(mlir::createCSEPass());
-  fir::addAVC(pm, pc.OptLevel);
+  fir::addAVC(pm, optLevel);
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createCharacterConversion);
   pm.addPass(mlir::createCanonicalizerPass(config));
   pm.addPass(fir::createSimplifyRegionLite());
-  if (pc.OptLevel.isOptimizingForSpeed()) {
+  if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize()) {
     // These passes may increase code size.
     pm.addPass(fir::createSimplifyIntrinsics());
     pm.addPass(fir::createAlgebraicSimplificationPass(config));
@@ -190,7 +192,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
       pm.addPass(fir::createConstantArgumentGlobalisationOpt());
   }
 
-  if (pc.LoopVersioning)
+  if (pc.LoopVersioning && !optLevel.isOptimizingForSize())
     pm.addPass(fir::createLoopVersioning());
 
   pm.addPass(mlir::createCSEPass());
@@ -201,7 +203,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     fir::addMemoryAllocationOpt(pm);
 
   // FIR Inliner Callback
-  pc.invokeFIRInlinerCallback(pm, pc.OptLevel);
+  pc.invokeFIRInlinerCallback(pm, optLevel);
 
   pm.addPass(fir::createSimplifyRegionLite());
   pm.addPass(mlir::createCSEPass());
@@ -212,13 +214,14 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 
   // Optimize redundant array repacking operations,
   // if the source is known to be contiguous.
-  if (pc.OptLevel.isOptimizingForSpeed())
+  if (optLevel.isOptimizingForSpeed())
     pm.addPass(fir::createOptimizeArrayRepacking());
   pm.addPass(fir::createLowerRepackArraysPass());
   // Expand FIR operations that may use SCF dialect for their
   // implementation. This is a mandatory pass.
-  pm.addPass(fir::createSimplifyFIROperations(
-      {/*preferInlineImplementation=*/pc.OptLevel.isOptimizingForSpeed()}));
+  bool preferInlineImplementation =
+      optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize();
+  pm.addPass(fir::createSimplifyFIROperations({preferInlineImplementation}));
 
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createStackReclaim);
@@ -232,11 +235,11 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     pm.addPass(fir::createConvertComplexPow());
   pm.addPass(mlir::createCSEPass());
 
-  if (pc.OptLevel.isOptimizingForSpeed())
+  if (optLevel.isOptimizingForSpeed())
     pm.addPass(fir::createSetRuntimeCallAttributes());
 
   // Last Optimizer EP Callback
-  pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel);
+  pc.invokeFIROptLastEPCallbacks(pm, optLevel);
 }
 
 /// Create a pass pipeline for lowering from HLFIR to FIR
@@ -256,8 +259,9 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createSimplifyHLFIRIntrinsics);
   }
-  addNestedPassToAllTopLevelOperations<PassConstructor>(
-      pm, hlfir::createInlineElementals);
+  if (!optLevel.isOptimizingForSize())
+    addNestedPassToAllTopLevelOperations<PassConstructor>(
+        pm, hlfir::createInlineElementals);
   if (optLevel.isOptimizingForSpeed()) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
     pm.addPass(mlir::createCSEPass());
@@ -271,8 +275,10 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
         pm, hlfir::createPropagateFortranVariableAttributes);
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createOptimizedBufferization);
-    addNestedPassToAllTopLevelOperations<PassConstructor>(
-        pm, hlfir::createInlineHLFIRAssign);
+
+    if (!optLevel.isOptimizingForSize())
+      addNestedPassToAllTopLevelOperations<PassConstructor>(
+          pm, hlfir::createInlineHLFIRAssign);
 
     if (optLevel == llvm::OptimizationLevel::O3) {
       addNestedPassToAllTopLevelOperations<PassConstructor>(
@@ -297,7 +303,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
   // TODO: we can remove the previous InlineHLFIRAssign, when
   // FIR AliasAnalysis is good enough to say that a temporary
   // array does not alias with any user object.
-  if (optLevel.isOptimizingForSpeed())
+  if (optLevel.isOptimizingForSpeed() && !optLevel.isOptimizingForSize())
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createInlineHLFIRAssign);
   pm.addPass(hlfir::createConvertHLFIRtoFIR());



More information about the flang-commits mailing list