[flang-commits] [flang] [DRAFT][flang][Driver] Enable -Os and -Oz in flang (PR #164707)

Fri Nov 7 04:45:05 PST 2025

https://github.com/tarunprabhu updated https://github.com/llvm/llvm-project/pull/164707

>From 4db45f6d17cd2e518c87e9b306add174df0ebf39 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Wed, 22 Oct 2025 14:44:31 -0600
Subject: [PATCH 1/2] [flang] Enable -Os and -Oz in flang

For the LLVM passes, the implementation mirrors that in clang. A number of
FIR and MLIR passes that may result in an increase in code size are not run
when either of these options is provided. Explicit speedup and size levels are
used instead of isOptimizingForSize since the latter assumes that speedupLevel
is 0 if optimizing for size. However, optimizing for size implies that the
speedup level is 2
---
 .../include/flang/Frontend/CodeGenOptions.def |  2 +
 flang/lib/Frontend/CompilerInvocation.cpp     | 23 ++++++++
 flang/lib/Frontend/FrontendActions.cpp        | 11 +++-
 flang/lib/Optimizer/Passes/Pipelines.cpp      | 59 +++++++++++--------
 .../Driver/default-optimization-pipelines.f90 | 14 ++++-
 5 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def
index dc3da7ba5c7f3..5892d3ef24e5a 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.def
+++ b/flang/include/flang/Frontend/CodeGenOptions.def
@@ -20,6 +20,8 @@ CODEGENOPT(Name, Bits, Default)
 #endif
 
 CODEGENOPT(OptimizationLevel, 2, 0) ///< The -O[0-3] option specified.
+/// The -Os (==1) or -Oz (==2) option is specified.
+CODEGENOPT(OptimizeSize, 2, 0) 
 
 CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new
                                    ///< pass manager.
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index f05c4cfccf7fc..382fe7c7fa022 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -114,6 +114,10 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
 
     assert(a->getOption().matches(clang::driver::options::OPT_O));
 
+    llvm::StringRef s(a->getValue());
+    if (s == "s" || s == "z")
+      return 2;
+
     return getLastArgIntValue(args, clang::driver::options::OPT_O, defaultOpt,
                               diags);
   }
@@ -121,6 +125,24 @@ static unsigned getOptimizationLevel(llvm::opt::ArgList &args,
   return defaultOpt;
 }
 
+/// Extracts the size-optimization level from \a args
+static unsigned getOptimizationLevelSize(llvm::opt::ArgList &args) {
+  if (llvm::opt::Arg *a =
+          args.getLastArg(clang::driver::options::OPT_O_Group)) {
+    if (a->getOption().matches(clang::driver::options::OPT_O)) {
+      switch (a->getValue()[0]) {
+      default:
+        return 0;
+      case 's':
+        return 1;
+      case 'z':
+        return 2;
+      }
+    }
+  }
+  return 0;
+}
+
 bool Fortran::frontend::parseDiagnosticArgs(clang::DiagnosticOptions &opts,
                                             llvm::opt::ArgList &args) {
   opts.ShowColors = parseShowColorsArgs(args);
@@ -273,6 +295,7 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
                              llvm::opt::ArgList &args,
                              clang::DiagnosticsEngine &diags) {
   opts.OptimizationLevel = getOptimizationLevel(args, diags);
+  opts.OptimizeSize = getOptimizationLevelSize(args);
 
   if (args.hasFlag(clang::driver::options::OPT_fdebug_pass_manager,
                    clang::driver::options::OPT_fno_debug_pass_manager, false))
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 159d08a2797b3..d8fb466797671 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -602,7 +602,16 @@ mapToLevel(const Fortran::frontend::CodeGenOptions &opts) {
   case 1:
     return llvm::OptimizationLevel::O1;
   case 2:
-    return llvm::OptimizationLevel::O2;
+    switch (opts.OptimizeSize) {
+    default:
+      llvm_unreachable("Invalid optimization level for size!");
+    case 0:
+      return llvm::OptimizationLevel::O2;
+    case 1:
+      return llvm::OptimizationLevel::Os;
+    case 2:
+      return llvm::OptimizationLevel::Oz;
+    }
   case 3:
     return llvm::OptimizationLevel::O3;
   }
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 103e736accca0..7da3e0c0a888a 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -56,7 +56,7 @@ void addCfgConversionPass(mlir::PassManager &pm,
 
 void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) {
   ArrayValueCopyOptions options;
-  options.optimizeConflicts = optLevel.isOptimizingForSpeed();
+  options.optimizeConflicts = optLevel != llvm::OptimizationLevel::O0;
   addNestedPassConditionally<mlir::func::FuncOp>(
       pm, disableFirAvc, [&]() { return createArrayValueCopyPass(options); });
 }
@@ -169,20 +169,24 @@ void registerDefaultInlinerPass(MLIRToLLVMPassPipelineConfig &config) {
 /// \param pm - MLIR pass manager that will hold the pipeline definition
 void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
                                            MLIRToLLVMPassPipelineConfig &pc) {
+  llvm::OptimizationLevel optLevel = pc.OptLevel;
+  unsigned speedupLevel = optLevel.getSpeedupLevel();
+  unsigned sizeLevel = optLevel.getSizeLevel();
+
   // Early Optimizer EP Callback
-  pc.invokeFIROptEarlyEPCallbacks(pm, pc.OptLevel);
+  pc.invokeFIROptEarlyEPCallbacks(pm, optLevel);
 
   // simplify the IR
   mlir::GreedyRewriteConfig config;
   config.setRegionSimplificationLevel(
       mlir::GreedySimplifyRegionLevel::Disabled);
   pm.addPass(mlir::createCSEPass());
-  fir::addAVC(pm, pc.OptLevel);
+  fir::addAVC(pm, optLevel);
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createCharacterConversion);
   pm.addPass(mlir::createCanonicalizerPass(config));
   pm.addPass(fir::createSimplifyRegionLite());
-  if (pc.OptLevel.isOptimizingForSpeed()) {
+  if (speedupLevel && !sizeLevel) {
     // These passes may increase code size.
     pm.addPass(fir::createSimplifyIntrinsics());
     pm.addPass(fir::createAlgebraicSimplificationPass(config));
@@ -190,7 +194,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
       pm.addPass(fir::createConstantArgumentGlobalisationOpt());
   }
 
-  if (pc.LoopVersioning)
+  if (pc.LoopVersioning && !sizeLevel)
     pm.addPass(fir::createLoopVersioning());
 
   pm.addPass(mlir::createCSEPass());
@@ -201,7 +205,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     fir::addMemoryAllocationOpt(pm);
 
   // FIR Inliner Callback
-  pc.invokeFIRInlinerCallback(pm, pc.OptLevel);
+  pc.invokeFIRInlinerCallback(pm, optLevel);
 
   pm.addPass(fir::createSimplifyRegionLite());
   pm.addPass(mlir::createCSEPass());
@@ -212,13 +216,13 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 
   // Optimize redundant array repacking operations,
   // if the source is known to be contiguous.
-  if (pc.OptLevel.isOptimizingForSpeed())
+  if (speedupLevel)
     pm.addPass(fir::createOptimizeArrayRepacking());
   pm.addPass(fir::createLowerRepackArraysPass());
   // Expand FIR operations that may use SCF dialect for their
   // implementation. This is a mandatory pass.
-  pm.addPass(fir::createSimplifyFIROperations(
-      {/*preferInlineImplementation=*/pc.OptLevel.isOptimizingForSpeed()}));
+  bool preferInlineImplementation = speedupLevel && !sizeLevel;
+  pm.addPass(fir::createSimplifyFIROperations({preferInlineImplementation}));
 
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createStackReclaim);
@@ -232,11 +236,11 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
     pm.addPass(fir::createConvertComplexPow());
   pm.addPass(mlir::createCSEPass());
 
-  if (pc.OptLevel.isOptimizingForSpeed())
+  if (speedupLevel)
     pm.addPass(fir::createSetRuntimeCallAttributes());
 
   // Last Optimizer EP Callback
-  pc.invokeFIROptLastEPCallbacks(pm, pc.OptLevel);
+  pc.invokeFIROptLastEPCallbacks(pm, optLevel);
 }
 
 /// Create a pass pipeline for lowering from HLFIR to FIR
@@ -247,18 +251,24 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
                                   EnableOpenMP enableOpenMP,
                                   llvm::OptimizationLevel optLevel) {
-  if (optLevel.getSizeLevel() > 0 || optLevel.getSpeedupLevel() > 0) {
+  // if sizeLevel > 0 (this is the case when either -Os or -Oz is provided on
+  // the command line), the speedupLevel is guaranteed to be 2.
+  unsigned speedupLevel = optLevel.getSpeedupLevel();
+  unsigned sizeLevel = optLevel.getSizeLevel();
+
+  if (speedupLevel) {
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createExpressionSimplification);
   }
-  if (optLevel.isOptimizingForSpeed()) {
+  if (speedupLevel) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createSimplifyHLFIRIntrinsics);
   }
-  addNestedPassToAllTopLevelOperations<PassConstructor>(
-      pm, hlfir::createInlineElementals);
-  if (optLevel.isOptimizingForSpeed()) {
+  if (!sizeLevel)
+    addNestedPassToAllTopLevelOperations<PassConstructor>(
+        pm, hlfir::createInlineElementals);
+  if (speedupLevel) {
     addCanonicalizerPassWithoutRegionSimplification(pm);
     pm.addPass(mlir::createCSEPass());
     // Run SimplifyHLFIRIntrinsics pass late after CSE,
@@ -271,8 +281,9 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
         pm, hlfir::createPropagateFortranVariableAttributes);
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createOptimizedBufferization);
-    addNestedPassToAllTopLevelOperations<PassConstructor>(
-        pm, hlfir::createInlineHLFIRAssign);
+    if (!sizeLevel)
+      addNestedPassToAllTopLevelOperations<PassConstructor>(
+          pm, hlfir::createInlineHLFIRAssign);
 
     if (optLevel == llvm::OptimizationLevel::O3) {
       addNestedPassToAllTopLevelOperations<PassConstructor>(
@@ -287,7 +298,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
   // from hlfir.elemental lowering, if the result is an empty array.
   // This helps to avoid long running loops for elementals with
   // shapes like (0, HUGE).
-  if (optLevel.isOptimizingForSpeed())
+  if (speedupLevel)
     bufferizeOptions.optimizeEmptyElementals = true;
   pm.addPass(hlfir::createBufferizeHLFIR(bufferizeOptions));
   // Run hlfir.assign inlining again after BufferizeHLFIR,
@@ -297,7 +308,7 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm,
   // TODO: we can remove the previous InlineHLFIRAssign, when
   // FIR AliasAnalysis is good enough to say that a temporary
   // array does not alias with any user object.
-  if (optLevel.isOptimizingForSpeed())
+  if (speedupLevel && !sizeLevel)
     addNestedPassToAllTopLevelOperations<PassConstructor>(
         pm, hlfir::createInlineHLFIRAssign);
   pm.addPass(hlfir::createConvertHLFIRtoFIR());
@@ -354,10 +365,12 @@ void createDebugPasses(mlir::PassManager &pm,
 void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
                                          MLIRToLLVMPassPipelineConfig config,
                                          llvm::StringRef inputFilename) {
+  unsigned speedupLevel = config.OptLevel.getSpeedupLevel();
+
   pm.addPass(fir::createMIFOpConversion());
   fir::addBoxedProcedurePass(pm);
-  if (config.OptLevel.isOptimizingForSpeed() && config.AliasAnalysis &&
-      !disableFirAliasTags && !useOldAliasTags)
+  if (speedupLevel && config.AliasAnalysis && !disableFirAliasTags &&
+      !useOldAliasTags)
     pm.addPass(fir::createAddAliasTags());
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createAbstractResultOpt);
@@ -389,7 +402,7 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
   // TODO: re-enable setNoAlias by default (when optimizing for speed) once
   // function specialization is fixed.
   bool setNoAlias = forceNoAlias;
-  bool setNoCapture = config.OptLevel.isOptimizingForSpeed();
+  bool setNoCapture = speedupLevel;
 
   pm.addPass(fir::createFunctionAttr(
       {framePointerKind, config.InstrumentFunctionEntry,
diff --git a/flang/test/Driver/default-optimization-pipelines.f90 b/flang/test/Driver/default-optimization-pipelines.f90
index 08e407f73da5c..18108cd632220 100644
--- a/flang/test/Driver/default-optimization-pipelines.f90
+++ b/flang/test/Driver/default-optimization-pipelines.f90
@@ -14,10 +14,16 @@
 ! RUN: %flang_fc1 -S -O2 %s -flto=full -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-LTO
 ! RUN: %flang_fc1 -S -O2 %s -flto=thin -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O2-THINLTO
 
-! Verify that only the left-most `-O{n}` is used
+! Verify that only the right-most `-O{n}` is used
 ! RUN: %flang -S -O2 -O0 %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
 ! RUN: %flang_fc1 -S -O2 -O0 %s -fdebug-pass-manager -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-O0
 
+! Verify that passing -Os/-Oz have the desired effect on the pass pipelines.
+! RUN: %flang -S -Os %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN:     | FileCheck %s --check-prefix=CHECK-OSIZE
+! RUN: %flang -S -Oz %s -Xflang -fdebug-pass-manager -o /dev/null 2>&1 \
+! RUN:     | FileCheck %s --check-prefix=CHECK-OSIZE
+
 ! CHECK-O0-NOT: Running pass: SimplifyCFGPass on simple_loop_
 ! CHECK-O0: Running analysis: TargetLibraryAnalysis on simple_loop_
 ! CHECK-O0-ANYLTO: Running pass: CanonicalizeAliasesPass on [module]
@@ -33,6 +39,12 @@
 ! CHECK-O2-THINLTO: Running pass: CanonicalizeAliasesPass on [module]
 ! CHECK-O2-THINLTO: Running pass: NameAnonGlobalPass on [module]
 
+! -Os/-Oz imply -O2, so check that a pass that runs on O2 is run. Then check
+! that passes like LibShrinkWrap, that should not be run when optimizing for
+! size, are not run (see llvm/lib/Passes/PassBuilderPipelines.cpp).
+! CHECK-OSIZE: Running pass: SimplifyCFGPass on simple_loop_
+! CHECK-OSIZE-NOT: Running pass: LibCallsShrinkWrapPass on simple_loop_
+
 subroutine simple_loop
   integer :: i
   do i=1,5

>From 5d29984c3fae8e4d9cab779d8f5416081f0a6757 Mon Sep 17 00:00:00 2001
From: Tarun Prabhu <tarun.prabhu at gmail.com>
Date: Fri, 7 Nov 2025 04:23:03 -0700
Subject: [PATCH 2/2] Test MLIR pipeline

---
 flang/test/Driver/mlir-pass-pipeline.f90 | 373 ++++++++++++-----------
 1 file changed, 203 insertions(+), 170 deletions(-)

diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 3b6a9d7cda7ed..24462642e983d 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -1,173 +1,206 @@
-! Test the MLIR pass pipeline
-
-! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline -o /dev/null %s 2>&1 | FileCheck --check-prefixes=ALL %s
-! -O0 is the default:
-! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline %s -O0 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL %s
-! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline %s -O2 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL,O2 %s
-
 ! REQUIRES: asserts
-
-end program
-
-! ALL: Pass statistics report
-! ALL: Fortran::lower::VerifierPass
-
-! ALL: Pass statistics report
-
-! ALL: Fortran::lower::VerifierPass
-! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
-! O2-NEXT: 'fir.global' Pipeline
-! O2-NEXT:   ExpressionSimplification
-! O2-NEXT: 'func.func' Pipeline
-! O2-NEXT:   ExpressionSimplification
-! O2-NEXT: 'omp.declare_reduction' Pipeline
-! O2-NEXT:   ExpressionSimplification
-! O2-NEXT: 'omp.private' Pipeline
-! O2-NEXT:   ExpressionSimplification
-! O2-NEXT: Canonicalizer
-! ALL:     Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
-! ALL-NEXT:'fir.global' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! ALL:       InlineElementals
-! ALL-NEXT:'func.func' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! ALL:       InlineElementals
-! ALL-NEXT:'omp.declare_reduction' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! ALL:       InlineElementals
-! ALL-NEXT:'omp.private' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! ALL:       InlineElementals
-! O2-NEXT: Canonicalizer
-! O2-NEXT: CSE
-! O2-NEXT: (S) {{.*}} num-cse'd
-! O2-NEXT: (S) {{.*}} num-dce'd
-! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
-! O2-NEXT: 'fir.global' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! O2-NEXT:   PropagateFortranVariableAttributes
-! O2-NEXT:   OptimizedBufferization
-! O2-NEXT:   InlineHLFIRAssign
-! O2-NEXT: 'func.func' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! O2-NEXT:   PropagateFortranVariableAttributes
-! O2-NEXT:   OptimizedBufferization
-! O2-NEXT:   InlineHLFIRAssign
-! O2-NEXT: 'omp.declare_reduction' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! O2-NEXT:   PropagateFortranVariableAttributes
-! O2-NEXT:   OptimizedBufferization
-! O2-NEXT:   InlineHLFIRAssign
-! O2-NEXT: 'omp.private' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! O2-NEXT:   PropagateFortranVariableAttributes
-! O2-NEXT:   OptimizedBufferization
-! O2-NEXT:   InlineHLFIRAssign
-! ALL: LowerHLFIROrderedAssignments
-! ALL-NEXT: LowerHLFIRIntrinsics
-! ALL-NEXT: BufferizeHLFIR
-! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
-! O2-NEXT:   'fir.global' Pipeline
-! O2-NEXT:     InlineHLFIRAssign
-! O2-NEXT:   'func.func' Pipeline
-! O2-NEXT:     InlineHLFIRAssign
-! O2-NEXT:   'omp.declare_reduction' Pipeline
-! O2-NEXT:     InlineHLFIRAssign
-! O2-NEXT:   'omp.private' Pipeline
-! O2-NEXT:     InlineHLFIRAssign
-! ALL-NEXT: ConvertHLFIRtoFIR
-! ALL-NEXT: CSE
-! Ideally, we need an output with only the pass names, but
-! there is currently no way to get that, so in order to
-! guarantee that the passes are in the expected order
-! (i.e. use -NEXT) we have to check the statistics output as well.
-! ALL-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
-! ALL-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
-
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
-! ALL-NEXT: 'fir.global' Pipeline
-! ALL-NEXT:   CharacterConversion
-! ALL-NEXT: 'func.func' Pipeline
-! ALL-NEXT:   ArrayValueCopy
-! ALL-NEXT:   CharacterConversion
-! ALL-NEXT: 'omp.declare_reduction' Pipeline
-! ALL-NEXT:   CharacterConversion
-! ALL-NEXT: 'omp.private' Pipeline
-! ALL-NEXT:   CharacterConversion
-
-! ALL-NEXT: Canonicalizer
-! ALL-NEXT: SimplifyRegionLite
-!  O2-NEXT: SimplifyIntrinsics
-!  O2-NEXT: AlgebraicSimplification
-! ALL-NEXT: CSE
-! ALL-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
-! ALL-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
-
-! ALL-NEXT: 'func.func' Pipeline
-! ALL-NEXT:   MemoryAllocationOpt
-
-! ALL-NEXT: Inliner
-! ALL-NEXT: SimplifyRegionLite
-! ALL-NEXT: CSE
-! ALL-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
-! ALL-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
-
-! ALL-NEXT: PolymorphicOpConversion
-! ALL-NEXT: AssumedRankOpConversion
-! O2-NEXT:  'func.func' Pipeline
-! O2-NEXT:    OptimizeArrayRepacking
-! ALL-NEXT: LowerRepackArraysPass
-! ALL-NEXT: SimplifyFIROperations
-
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
-! ALL-NEXT:    'fir.global' Pipeline
-! ALL-NEXT:      StackReclaim
-! ALL-NEXT:      CFGConversion
+!
+! Test the MLIR pass pipeline
+!
+! The table below summarizes the relationship between the optimization levels
+! -O<N> and the integer speedup and size levels.
+!
+!          .------------------------------.
+!          |  Level  |  Speedup  |  Size  |
+!          |------------------------------|
+!          |   -O0   |     0     |   0    |
+!          |   -O1   |     1     |   0    |
+!          |   -O2   |     2     |   0    |
+!          |   -O3   |     3     |   0    |
+!          |   -Os   |     2     |   1    |
+!          |   -Oz   |     2     |   2    |
+!          '------------------------------'
+!
+! Since the speedup level for -Os and -Oz is the same as that of -O2, most of
+! the passes that are run at -O2 are run at -Os and -Oz as well, except for any
+! passes that might increase the size of the code. The names of the FileCheck
+! prefixes in the RUN below lines indicate at which optimization levels the
+! corresponding output is expected. For instance:
+!
+!     ALL    O0, O1, O2, O3, Os, Oz (note that -O1 and -O3 are not explicitly
+!            tested here)
+!     O2     O2 only, but not Os or Oz
+!     O02    O0 and O2, but not Os or Oz
+!     O2SZ   O2, Os, and Oz
+!
+! NOTE: At the time of writing, the same set of MLIR passes is run for both
+! optimization level -Os and -Oz.
+!
+! RUN: %flang_fc1 -S -o /dev/null %s -mmlir --mlir-pass-statistics \
+! RUN:     -mmlir --mlir-pass-statistics-display=pipeline 2>&1 \
+! RUN:     | FileCheck --check-prefixes=ALL,O02 %s
+!
+! RUN: %flang_fc1 -O0 -S -o /dev/null %s -mmlir --mlir-pass-statistics \
+! RUN:     -mmlir --mlir-pass-statistics-display=pipeline 2>&1 \
+! RUN:     | FileCheck --check-prefixes=ALL,O02 %s
+!
+! RUN: %flang_fc1 -O2 -S -o /dev/null %s -mmlir --mlir-pass-statistics \
+! RUN:     -mmlir --mlir-pass-statistics-display=pipeline 2>&1 \
+! RUN:     | FileCheck --check-prefixes=ALL,O02,O2,O2SZ %s
+!
+! RUN: %flang_fc1 -Os -S -o /dev/null %s -mmlir --mlir-pass-statistics \
+! RUN:     -mmlir --mlir-pass-statistics-display=pipeline 2>&1 \
+! RUN:     | FileCheck --check-prefixes=ALL,O2 %s
+!
+! RUN: %flang_fc1 -Oz -S -o /dev/null %s -mmlir --mlir-pass-statistics \
+! RUN:     -mmlir --mlir-pass-statistics-display=pipeline 2>&1 \
+! RUN:     | FileCheck --check-prefixes=ALL,O2 %s
+!
+! Ideally, we need an output with only the pass names, but there is currently no
+! way to get that, so in order to guarantee that the passes are in the expected
+! order (i.e. use -NEXT) we have to check the statistics output as well.
+!
+! ALL:         Pass statistics report
+! ALL:         Fortran::lower::VerifierPass
+! ALL:         Pass statistics report
+! ALL:         Fortran::lower::VerifierPass
+! O2-NEXT:     Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! O2-NEXT:       'fir.global' Pipeline
+! O2-NEXT:         ExpressionSimplification
+! O2-NEXT:       'func.func' Pipeline
+! O2-NEXT:         ExpressionSimplification
+! O2-NEXT:       'omp.declare_reduction' Pipeline
+! O2-NEXT:         ExpressionSimplification
+! O2-NEXT:       'omp.private' Pipeline
+! O2-NEXT:         ExpressionSimplification
+! O2-NEXT:     Canonicalizer
+! ALL-NEXT:    Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT:      'fir.global' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O02-NEXT:        InlineElementals
+! ALL-NEXT:      'func.func' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O02-NEXT:        InlineElementals
+! ALL-NEXT:      'omp.declare_reduction' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O02-NEXT:        InlineElementals
+! ALL-NEXT:      'omp.private' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O02-NEXT:        InlineElementals
+! O2-NEXT:     Canonicalizer
+! O2-NEXT:     CSE
+! O2-NEXT:       (S) {{.*}} num-cse'd
+! O2-NEXT:       (S) {{.*}} num-dce'd
+! O2-NEXT:     Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! O2-NEXT:       'fir.global' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O2-NEXT:         PropagateFortranVariableAttributes
+! O2-NEXT:         OptimizedBufferization
+! O2SZ-NEXT:       InlineHLFIRAssign
+! O2-NEXT:       'func.func' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O2-NEXT:         PropagateFortranVariableAttributes
+! O2-NEXT:         OptimizedBufferization
+! O2SZ-NEXT:       InlineHLFIRAssign
+! O2-NEXT:       'omp.declare_reduction' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O2-NEXT:         PropagateFortranVariableAttributes
+! O2-NEXT:         OptimizedBufferization
+! O2SZ-NEXT:       InlineHLFIRAssign
+! O2-NEXT:       'omp.private' Pipeline
+! O2-NEXT:         SimplifyHLFIRIntrinsics
+! O2-NEXT:         PropagateFortranVariableAttributes
+! O2-NEXT:         OptimizedBufferization
+! O2SZ-NEXT:       InlineHLFIRAssign
+! ALL:         LowerHLFIROrderedAssignments
+! ALL-NEXT:    LowerHLFIRIntrinsics
+! ALL-NEXT:    BufferizeHLFIR
+! O2SZ-NEXT:   Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! O2SZ-NEXT:     'fir.global' Pipeline
+! O2SZ-NEXT:       InlineHLFIRAssign
+! O2SZ-NEXT:     'func.func' Pipeline
+! O2SZ-NEXT:       InlineHLFIRAssign
+! O2SZ-NEXT:     'omp.declare_reduction' Pipeline
+! O2SZ-NEXT:       InlineHLFIRAssign
+! O2SZ-NEXT:     'omp.private' Pipeline
+! O2SZ-NEXT:       InlineHLFIRAssign
+! ALL-NEXT:    ConvertHLFIRtoFIR
+! ALL-NEXT:    CSE
+! ALL-NEXT:      (S) 0 num-cse'd - Number of operations CSE'd
+! ALL-NEXT:      (S) 0 num-dce'd - Number of operations DCE'd
+! ALL-NEXT:    Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT:      'fir.global' Pipeline
+! ALL-NEXT:        CharacterConversion
+! ALL-NEXT:      'func.func' Pipeline
+! ALL-NEXT:        ArrayValueCopy
+! ALL-NEXT:        CharacterConversion
+! ALL-NEXT:      'omp.declare_reduction' Pipeline
+! ALL-NEXT:        CharacterConversion
+! ALL-NEXT:      'omp.private' Pipeline
+! ALL-NEXT:        CharacterConversion
+! ALL-NEXT:    Canonicalizer
+! ALL-NEXT:    SimplifyRegionLite
+! O2SZ-NEXT:   SimplifyIntrinsics
+! O2SZ-NEXT:   AlgebraicSimplification
+! ALL-NEXT:    CSE
+! ALL-NEXT:      (S) 0 num-cse'd - Number of operations CSE'd
+! ALL-NEXT:      (S) 0 num-dce'd - Number of operations DCE'd
 ! ALL-NEXT:    'func.func' Pipeline
-! ALL-NEXT:      StackReclaim
-! ALL-NEXT:      CFGConversion
-! ALL-NEXT:   'omp.declare_reduction' Pipeline
-! ALL-NEXT:      StackReclaim
-! ALL-NEXT:      CFGConversion
-! ALL-NEXT:   'omp.private' Pipeline
-! ALL-NEXT:      StackReclaim
-! ALL-NEXT:      CFGConversion
-
-! ALL-NEXT: SCFToControlFlow
-! ALL-NEXT: Canonicalizer
-! ALL-NEXT: SimplifyRegionLite
-! ALL-NEXT: ConvertComplexPow
-! ALL-NEXT: CSE
-! ALL-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
-! ALL-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
-! O2-NEXT:  'func.func' Pipeline
-! O2-NEXT:    SetRuntimeCallAttributes
-! ALL-NEXT: MIFOpConversion 
-! ALL-NEXT: BoxedProcedurePass
-! O2-NEXT:  AddAliasTags
-
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
-! ALL-NEXT:   'fir.global' Pipeline
-! ALL-NEXT:    AbstractResultOpt
-! ALL-NEXT:  'func.func' Pipeline
-! ALL-NEXT:    AbstractResultOpt
-! ALL-NEXT:  'gpu.module' Pipeline
-! ALL-NEXT:   Pipeline Collection : ['func.func', 'gpu.func'] 
-! ALL-NEXT:   'func.func' Pipeline 
-! ALL-NEXT:   AbstractResultOpt
-! ALL-NEXT:   'gpu.func' Pipeline 
-! ALL-NEXT:   AbstractResultOpt
-! ALL-NEXT:  'omp.declare_reduction' Pipeline
-! ALL-NEXT:    AbstractResultOpt
-! ALL-NEXT:  'omp.private' Pipeline
-! ALL-NEXT:    AbstractResultOpt
+! ALL-NEXT:      MemoryAllocationOpt
+! ALL-NEXT:    Inliner
+! ALL-NEXT:    SimplifyRegionLite
+! ALL-NEXT:    CSE
+! ALL-NEXT:      (S) 0 num-cse'd - Number of operations CSE'd
+! ALL-NEXT:      (S) 0 num-dce'd - Number of operations DCE'd
+! ALL-NEXT:    PolymorphicOpConversion
+! ALL-NEXT:    AssumedRankOpConversion
+! O2-NEXT:     'func.func' Pipeline
+! O2-NEXT:       OptimizeArrayRepacking
+! ALL-NEXT:    LowerRepackArraysPass
+! ALL-NEXT:    SimplifyFIROperations
+! ALL-NEXT:    Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT:      'fir.global' Pipeline
+! ALL-NEXT:        StackReclaim
+! ALL-NEXT:        CFGConversion
+! ALL-NEXT:      'func.func' Pipeline
+! ALL-NEXT:        StackReclaim
+! ALL-NEXT:        CFGConversion
+! ALL-NEXT:      'omp.declare_reduction' Pipeline
+! ALL-NEXT:        StackReclaim
+! ALL-NEXT:        CFGConversion
+! ALL-NEXT:      'omp.private' Pipeline
+! ALL-NEXT:        StackReclaim
+! ALL-NEXT:        CFGConversion
+! ALL-NEXT:    SCFToControlFlow
+! ALL-NEXT:    Canonicalizer
+! ALL-NEXT:    SimplifyRegionLite
+! ALL-NEXT:    ConvertComplexPow
+! ALL-NEXT:    CSE
+! ALL-NEXT:      (S) 0 num-cse'd - Number of operations CSE'd
+! ALL-NEXT:      (S) 0 num-dce'd - Number of operations DCE'd
+! O2-NEXT:     'func.func' Pipeline
+! O2-NEXT:       SetRuntimeCallAttributes
+! ALL-NEXT:    MIFOpConversion
+! ALL-NEXT:    BoxedProcedurePass
+! O2-NEXT:     AddAliasTags
+! ALL-NEXT:   Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT:     'fir.global' Pipeline
+! ALL-NEXT:        AbstractResultOpt
+! ALL-NEXT:     'func.func' Pipeline
+! ALL-NEXT:        AbstractResultOpt
+! ALL-NEXT:     'gpu.module' Pipeline
+! ALL-NEXT:       Pipeline Collection : ['func.func', 'gpu.func']
+! ALL-NEXT:         'func.func' Pipeline
+! ALL-NEXT:           AbstractResultOpt
+! ALL-NEXT:         'gpu.func' Pipeline
+! ALL-NEXT:           AbstractResultOpt
+! ALL-NEXT:     'omp.declare_reduction' Pipeline
+! ALL-NEXT:       AbstractResultOpt
+! ALL-NEXT:     'omp.private' Pipeline
+! ALL-NEXT:       AbstractResultOpt
+! ALL-NEXT:    CodeGenRewrite
+! ALL-NEXT:      (S) 0 num-dce'd - Number of operations eliminated
+! ALL-NEXT:    ExternalNameConversion
+! ALL-NEXT:    TargetRewrite
+! ALL-NEXT:    CompilerGeneratedNamesConversion
+! ALL-NEXT:    'func.func' Pipeline
+! ALL-NEXT:      FunctionAttr
+! ALL-NEXT:    FIRToLLVMLowering
+! ALL-NOT:     LLVMIRLoweringPass
 
-! ALL-NEXT: CodeGenRewrite
-! ALL-NEXT:   (S) 0 num-dce'd - Number of operations eliminated
-! ALL-NEXT: ExternalNameConversion
-! ALL-NEXT: TargetRewrite
-! ALL-NEXT: CompilerGeneratedNamesConversion
-! ALL-NEXT:  'func.func' Pipeline
-! ALL-NEXT:   FunctionAttr
-! ALL-NEXT: FIRToLLVMLowering
-! ALL-NOT: LLVMIRLoweringPass
+end program