[flang-commits] [flang] [flang][fir] Add affine optimization pass pipeline. (PR #138627)

Mon May 5 22:46:58 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-flang-openmp

Author: MingYan (NexMing)

<details>
<summary>Changes</summary>

Currently, the FIR dialect is directly lowered to the LLVM dialect. We can first convert the FIR dialect to the Affine dialect, perform optimizations on top of it, and then lower it to the FIR dialect. The optimization passes are currently experimental, so it's important to actively identify and address issues.

---
Full diff: https://github.com/llvm/llvm-project/pull/138627.diff


6 Files Affected:

- (modified) flang/include/flang/Optimizer/Passes/CommandLineOpts.h (+1) 
- (modified) flang/include/flang/Optimizer/Passes/Pipelines.h (+2-2) 
- (modified) flang/lib/Optimizer/Passes/CMakeLists.txt (+1) 
- (modified) flang/lib/Optimizer/Passes/CommandLineOpts.cpp (+1) 
- (modified) flang/lib/Optimizer/Passes/Pipelines.cpp (+17) 
- (added) flang/test/Lower/OpenMP/auto-omp.f90 (+52) 


``````````diff

diff --git a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
index 1cfaf285e75e6..320c561953213 100644
--- a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
+++ b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
@@ -42,6 +42,7 @@ extern llvm::cl::opt<bool> disableCfgConversion;
 extern llvm::cl::opt<bool> disableFirAvc;
 extern llvm::cl::opt<bool> disableFirMao;
 
+extern llvm::cl::opt<bool> enableAffineOpt;
 extern llvm::cl::opt<bool> disableFirAliasTags;
 extern llvm::cl::opt<bool> useOldAliasTags;
 
diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h
index a3f59ee8dd013..5c87b1ce609ef 100644
--- a/flang/include/flang/Optimizer/Passes/Pipelines.h
+++ b/flang/include/flang/Optimizer/Passes/Pipelines.h
@@ -18,8 +18,8 @@
 #include "flang/Optimizer/Passes/CommandLineOpts.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "flang/Tools/CrossToolHelpers.h"
-#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
-#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/Passes.h"
+#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/Pass/PassManager.h"
diff --git a/flang/lib/Optimizer/Passes/CMakeLists.txt b/flang/lib/Optimizer/Passes/CMakeLists.txt
index 1c19a5765aff1..ad6c714c28bec 100644
--- a/flang/lib/Optimizer/Passes/CMakeLists.txt
+++ b/flang/lib/Optimizer/Passes/CMakeLists.txt
@@ -21,6 +21,7 @@ add_flang_library(flangPasses
   MLIRPass
   MLIRReconcileUnrealizedCasts
   MLIRSCFToControlFlow
+  MLIRSCFToOpenMP
   MLIRSupport
   MLIRTransforms
 )
diff --git a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
index f95a280883cba..b8ae6ede423e3 100644
--- a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
+++ b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
@@ -55,6 +55,7 @@ cl::opt<bool> useOldAliasTags(
     cl::desc("Use a single TBAA tree for all functions and do not use "
              "the FIR alias tags pass"),
     cl::init(false), cl::Hidden);
+EnableOption(AffineOpt, "affine-opt", "affine optimization");
 
 /// CodeGen Passes
 DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index a3ef473ea39b7..e1653cdb1e874 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -211,6 +211,23 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
 
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createStackReclaim);
+
+  if (enableAffineOpt && pc.OptLevel.isOptimizingForSpeed()) {
+    pm.addPass(fir::createPromoteToAffinePass());
+    pm.addPass(mlir::createCSEPass());
+    pm.addPass(mlir::affine::createAffineLoopInvariantCodeMotionPass());
+    pm.addPass(mlir::affine::createAffineLoopNormalizePass());
+    pm.addPass(mlir::affine::createSimplifyAffineStructuresPass());
+    pm.addPass(mlir::affine::createAffineParallelize(
+        mlir::affine::AffineParallelizeOptions{1, false}));
+    pm.addPass(fir::createAffineDemotionPass());
+    pm.addPass(mlir::createLowerAffinePass());
+    if (pc.EnableOpenMP) {
+      pm.addPass(mlir::createConvertSCFToOpenMPPass());
+      pm.addPass(mlir::createCanonicalizerPass());
+    }
+  }
+
   // convert control flow to CFG form
   fir::addCfgConversionPass(pm, pc);
   pm.addPass(mlir::createSCFToControlFlowPass());
diff --git a/flang/test/Lower/OpenMP/auto-omp.f90 b/flang/test/Lower/OpenMP/auto-omp.f90
new file mode 100644
index 0000000000000..d66e6c3f3a3a0
--- /dev/null
+++ b/flang/test/Lower/OpenMP/auto-omp.f90
@@ -0,0 +1,52 @@
+! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s \
+! RUN: | FileCheck %s
+
+subroutine foo(a)
+  integer, dimension(100, 100), intent(out) :: a
+  a = 1
+end subroutine foo
+
+!CHECK-LABEL: entry:
+!CHECK:         %[[VAL_0:.*]] = alloca { ptr }, align 8
+!CHECK:         %[[VAL_1:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK:         store ptr %[[VAL_2:.*]], ptr %[[VAL_0]], align 8
+!CHECK:         call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo_..omp_par, ptr nonnull %[[VAL_0]])
+!CHECK:         ret void
+!CHECK:       omp.par.entry:
+!CHECK:         %[[VAL_3:.*]] = load ptr, ptr %[[VAL_4:.*]], align 8, !align !3
+!CHECK:         %[[VAL_5:.*]] = alloca i32, align 4
+!CHECK:         %[[VAL_6:.*]] = alloca i64, align 8
+!CHECK:         %[[VAL_7:.*]] = alloca i64, align 8
+!CHECK:         %[[VAL_8:.*]] = alloca i64, align 8
+!CHECK:         store i64 0, ptr %[[VAL_6]], align 8
+!CHECK:         store i64 99, ptr %[[VAL_7]], align 8
+!CHECK:         store i64 1, ptr %[[VAL_8]], align 8
+!CHECK:         %[[VAL_9:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK:         call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0)
+!CHECK:         %[[VAL_10:.*]] = load i64, ptr %[[VAL_6]], align 8
+!CHECK:         %[[VAL_11:.*]] = load i64, ptr %[[VAL_7]], align 8
+!CHECK:         %[[VAL_12:.*]] = sub i64 %[[VAL_11]], %[[VAL_10]]
+!CHECK:         %[[VAL_13:.*]] = icmp eq i64 %[[VAL_12]], -1
+!CHECK:         br i1 %[[VAL_13]], label %[[VAL_14:.*]], label %[[VAL_15:.*]]
+!CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_16:.*]], %[[VAL_17:.*]]
+!CHECK:         call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]])
+!CHECK:         %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK:         call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]])
+!CHECK:         ret void
+!CHECK:       omp_loop.body:                                    ; preds = %[[VAL_17]], %[[VAL_16]]
+!CHECK:         %[[VAL_19:.*]] = phi i64 [ %[[VAL_20:.*]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ]
+!CHECK:         %[[VAL_21:.*]] = add i64 %[[VAL_19]], %[[VAL_10]]
+!CHECK:         %[[VAL_22:.*]] = mul i64 %[[VAL_21]], 400
+!CHECK:         %[[VAL_23:.*]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]]
+!CHECK:         br label %[[VAL_24:.*]]
+!CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_24]]
+!CHECK:         %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1
+!CHECK:         %[[VAL_25:.*]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]]
+!CHECK:         br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]]
+!CHECK:       omp.loop_nest.region6:                            ; preds = %[[VAL_15]], %[[VAL_24]]
+!CHECK:         %[[VAL_26:.*]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.*]], %[[VAL_24]] ]
+!CHECK:         %[[VAL_28:.*]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]]
+!CHECK:         store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4
+!CHECK:         %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1
+!CHECK:         %[[VAL_29:.*]] = icmp eq i64 %[[VAL_27]], 100
+!CHECK:         br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/138627