[flang-commits] [flang] [flang][fir] Add affine optimization pass pipeline. (PR #138627)

Thu May 8 01:19:23 PDT 2025

https://github.com/NexMing updated https://github.com/llvm/llvm-project/pull/138627

>From ea6a6e5721d301647770ef05548555b05f1092f7 Mon Sep 17 00:00:00 2001
From: yanming <ming.yan at terapines.com>
Date: Wed, 30 Apr 2025 16:32:14 +0800
Subject: [PATCH 1/2] [flang][fir] Add affine optimization pass pipeline.

---
 .../flang/Optimizer/Passes/CommandLineOpts.h  |  1 +
 .../flang/Optimizer/Passes/Pipelines.h        |  3 ++
 flang/lib/Optimizer/Passes/CMakeLists.txt     |  1 +
 .../lib/Optimizer/Passes/CommandLineOpts.cpp  |  1 +
 flang/lib/Optimizer/Passes/Pipelines.cpp      | 17 ++++++
 flang/test/Driver/mlir-pass-pipeline.f90      | 14 +++++
 flang/test/Integration/OpenMP/auto-omp.f90    | 52 +++++++++++++++++++
 7 files changed, 89 insertions(+)
 create mode 100644 flang/test/Integration/OpenMP/auto-omp.f90

diff --git a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
index 1cfaf285e75e6..320c561953213 100644
--- a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
+++ b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
@@ -42,6 +42,7 @@ extern llvm::cl::opt<bool> disableCfgConversion;
 extern llvm::cl::opt<bool> disableFirAvc;
 extern llvm::cl::opt<bool> disableFirMao;
 
+extern llvm::cl::opt<bool> enableAffineOpt;
 extern llvm::cl::opt<bool> disableFirAliasTags;
 extern llvm::cl::opt<bool> useOldAliasTags;
 
diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h
index a3f59ee8dd013..7680987367256 100644
--- a/flang/include/flang/Optimizer/Passes/Pipelines.h
+++ b/flang/include/flang/Optimizer/Passes/Pipelines.h
@@ -18,8 +18,11 @@
 #include "flang/Optimizer/Passes/CommandLineOpts.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "flang/Tools/CrossToolHelpers.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
 #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/SCFToOpenMP/SCFToOpenMP.h"
+#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
 #include "mlir/Pass/PassManager.h"
diff --git a/flang/lib/Optimizer/Passes/CMakeLists.txt b/flang/lib/Optimizer/Passes/CMakeLists.txt
index 1c19a5765aff1..ad6c714c28bec 100644
--- a/flang/lib/Optimizer/Passes/CMakeLists.txt
+++ b/flang/lib/Optimizer/Passes/CMakeLists.txt
@@ -21,6 +21,7 @@ add_flang_library(flangPasses
   MLIRPass
   MLIRReconcileUnrealizedCasts
   MLIRSCFToControlFlow
+  MLIRSCFToOpenMP
   MLIRSupport
   MLIRTransforms
 )
diff --git a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
index f95a280883cba..b8ae6ede423e3 100644
--- a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
+++ b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
@@ -55,6 +55,7 @@ cl::opt<bool> useOldAliasTags(
     cl::desc("Use a single TBAA tree for all functions and do not use "
              "the FIR alias tags pass"),
     cl::init(false), cl::Hidden);
+EnableOption(AffineOpt, "affine-opt", "affine optimization");
 
 /// CodeGen Passes
 DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index a3ef473ea39b7..f85de45f6029d 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -209,8 +209,25 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
   if (pc.AliasAnalysis && !disableFirAliasTags && !useOldAliasTags)
     pm.addPass(fir::createAddAliasTags());
 
+  if (enableAffineOpt && pc.OptLevel.isOptimizingForSpeed()) {
+    pm.addPass(fir::createPromoteToAffinePass());
+    pm.addPass(mlir::createCSEPass());
+    pm.addPass(mlir::affine::createAffineLoopInvariantCodeMotionPass());
+    pm.addPass(mlir::affine::createAffineLoopNormalizePass());
+    pm.addPass(mlir::affine::createSimplifyAffineStructuresPass());
+    pm.addPass(mlir::affine::createAffineParallelize(
+        mlir::affine::AffineParallelizeOptions{1, false}));
+    pm.addPass(fir::createAffineDemotionPass());
+    pm.addPass(mlir::createLowerAffinePass());
+    if (pc.EnableOpenMP) {
+      pm.addPass(mlir::createConvertSCFToOpenMPPass());
+      pm.addPass(mlir::createCanonicalizerPass());
+    }
+  }
+
   addNestedPassToAllTopLevelOperations<PassConstructor>(
       pm, fir::createStackReclaim);
+
   // convert control flow to CFG form
   fir::addCfgConversionPass(pm, pc);
   pm.addPass(mlir::createSCFToControlFlowPass());
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 45370895db397..188a42d231500 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -4,6 +4,7 @@
 ! -O0 is the default:
 ! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline %s -O0 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL %s
 ! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline %s -O2 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL,O2 %s
+! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline -mllvm --enable-affine-opt %s -O2 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL,O2,AFFINE %s
 
 ! REQUIRES: asserts
 
@@ -105,6 +106,19 @@
 ! ALL-NEXT: SimplifyFIROperations
 ! O2-NEXT:  AddAliasTags
 
+! AFFINE-NEXT: 'func.func' Pipeline
+! AFFINE-NEXT:   AffineDialectPromotion
+! AFFINE-NEXT: CSE
+! AFFINE-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
+! AFFINE-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
+! AFFINE-NEXT: 'func.func' Pipeline
+! AFFINE-NEXT:   AffineLoopInvariantCodeMotion
+! AFFINE-NEXT:   AffineLoopNormalize
+! AFFINE-NEXT:   SimplifyAffineStructures
+! AFFINE-NEXT:   AffineParallelize
+! AFFINE-NEXT:   AffineDialectDemotion
+! AFFINE-NEXT: LowerAffinePass
+
 ! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT:    'fir.global' Pipeline
 ! ALL-NEXT:      StackReclaim
diff --git a/flang/test/Integration/OpenMP/auto-omp.f90 b/flang/test/Integration/OpenMP/auto-omp.f90
new file mode 100644
index 0000000000000..7e348bfb41c17
--- /dev/null
+++ b/flang/test/Integration/OpenMP/auto-omp.f90
@@ -0,0 +1,52 @@
+! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s \
+! RUN: | FileCheck %s
+
+!CHECK-LABEL: entry:
+!CHECK:         %[[VAL_0:.*]] = alloca { ptr }, align 8
+!CHECK:         %[[VAL_1:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK:         store ptr %[[VAL_2:.*]], ptr %[[VAL_0]], align 8
+!CHECK:         call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo_..omp_par, ptr nonnull %[[VAL_0]])
+!CHECK:         ret void
+!CHECK:       omp.par.entry:
+!CHECK:         %[[VAL_3:.*]] = load ptr, ptr %[[VAL_4:.*]], align 8, !align !3
+!CHECK:         %[[VAL_5:.*]] = alloca i32, align 4
+!CHECK:         %[[VAL_6:.*]] = alloca i64, align 8
+!CHECK:         %[[VAL_7:.*]] = alloca i64, align 8
+!CHECK:         %[[VAL_8:.*]] = alloca i64, align 8
+!CHECK:         store i64 0, ptr %[[VAL_6]], align 8
+!CHECK:         store i64 99, ptr %[[VAL_7]], align 8
+!CHECK:         store i64 1, ptr %[[VAL_8]], align 8
+!CHECK:         %[[VAL_9:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK:         call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0)
+!CHECK:         %[[VAL_10:.*]] = load i64, ptr %[[VAL_6]], align 8
+!CHECK:         %[[VAL_11:.*]] = load i64, ptr %[[VAL_7]], align 8
+!CHECK:         %[[VAL_12:.*]] = sub i64 %[[VAL_11]], %[[VAL_10]]
+!CHECK:         %[[VAL_13:.*]] = icmp eq i64 %[[VAL_12]], -1
+!CHECK:         br i1 %[[VAL_13]], label %[[VAL_14:.*]], label %[[VAL_15:.*]]
+!CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_16:.*]], %[[VAL_17:.*]]
+!CHECK:         call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]])
+!CHECK:         %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK:         call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]])
+!CHECK:         ret void
+!CHECK:       omp_loop.body:                                    ; preds = %[[VAL_17]], %[[VAL_16]]
+!CHECK:         %[[VAL_19:.*]] = phi i64 [ %[[VAL_20:.*]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ]
+!CHECK:         %[[VAL_21:.*]] = add i64 %[[VAL_19]], %[[VAL_10]]
+!CHECK:         %[[VAL_22:.*]] = mul i64 %[[VAL_21]], 400
+!CHECK:         %[[VAL_23:.*]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]]
+!CHECK:         br label %[[VAL_24:.*]]
+!CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_24]]
+!CHECK:         %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1
+!CHECK:         %[[VAL_25:.*]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]]
+!CHECK:         br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]]
+!CHECK:       omp.loop_nest.region6:                            ; preds = %[[VAL_15]], %[[VAL_24]]
+!CHECK:         %[[VAL_26:.*]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.*]], %[[VAL_24]] ]
+!CHECK:         %[[VAL_28:.*]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]]
+!CHECK:         store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4
+!CHECK:         %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1
+!CHECK:         %[[VAL_29:.*]] = icmp eq i64 %[[VAL_27]], 100
+!CHECK:         br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]
+
+subroutine foo(a)
+  integer, dimension(100, 100), intent(out) :: a
+  a = 1
+end subroutine foo

>From 99ecb0b36284e5a6eb42797f6330cf69c0d37b5b Mon Sep 17 00:00:00 2001
From: yanming <ming.yan at terapines.com>
Date: Thu, 8 May 2025 16:17:48 +0800
Subject: [PATCH 2/2] Fix the failed test.

---
 flang/test/Integration/OpenMP/auto-omp.f90 | 46 +---------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

diff --git a/flang/test/Integration/OpenMP/auto-omp.f90 b/flang/test/Integration/OpenMP/auto-omp.f90
index 7e348bfb41c17..bf7da292552d8 100644
--- a/flang/test/Integration/OpenMP/auto-omp.f90
+++ b/flang/test/Integration/OpenMP/auto-omp.f90
@@ -1,50 +1,8 @@
 ! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s \
 ! RUN: | FileCheck %s
 
-!CHECK-LABEL: entry:
-!CHECK:         %[[VAL_0:.*]] = alloca { ptr }, align 8
-!CHECK:         %[[VAL_1:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
-!CHECK:         store ptr %[[VAL_2:.*]], ptr %[[VAL_0]], align 8
-!CHECK:         call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo_..omp_par, ptr nonnull %[[VAL_0]])
-!CHECK:         ret void
-!CHECK:       omp.par.entry:
-!CHECK:         %[[VAL_3:.*]] = load ptr, ptr %[[VAL_4:.*]], align 8, !align !3
-!CHECK:         %[[VAL_5:.*]] = alloca i32, align 4
-!CHECK:         %[[VAL_6:.*]] = alloca i64, align 8
-!CHECK:         %[[VAL_7:.*]] = alloca i64, align 8
-!CHECK:         %[[VAL_8:.*]] = alloca i64, align 8
-!CHECK:         store i64 0, ptr %[[VAL_6]], align 8
-!CHECK:         store i64 99, ptr %[[VAL_7]], align 8
-!CHECK:         store i64 1, ptr %[[VAL_8]], align 8
-!CHECK:         %[[VAL_9:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
-!CHECK:         call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0)
-!CHECK:         %[[VAL_10:.*]] = load i64, ptr %[[VAL_6]], align 8
-!CHECK:         %[[VAL_11:.*]] = load i64, ptr %[[VAL_7]], align 8
-!CHECK:         %[[VAL_12:.*]] = sub i64 %[[VAL_11]], %[[VAL_10]]
-!CHECK:         %[[VAL_13:.*]] = icmp eq i64 %[[VAL_12]], -1
-!CHECK:         br i1 %[[VAL_13]], label %[[VAL_14:.*]], label %[[VAL_15:.*]]
-!CHECK:       omp_loop.exit:                                    ; preds = %[[VAL_16:.*]], %[[VAL_17:.*]]
-!CHECK:         call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]])
-!CHECK:         %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
-!CHECK:         call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]])
-!CHECK:         ret void
-!CHECK:       omp_loop.body:                                    ; preds = %[[VAL_17]], %[[VAL_16]]
-!CHECK:         %[[VAL_19:.*]] = phi i64 [ %[[VAL_20:.*]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ]
-!CHECK:         %[[VAL_21:.*]] = add i64 %[[VAL_19]], %[[VAL_10]]
-!CHECK:         %[[VAL_22:.*]] = mul i64 %[[VAL_21]], 400
-!CHECK:         %[[VAL_23:.*]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]]
-!CHECK:         br label %[[VAL_24:.*]]
-!CHECK:       omp_loop.inc:                                     ; preds = %[[VAL_24]]
-!CHECK:         %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1
-!CHECK:         %[[VAL_25:.*]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]]
-!CHECK:         br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]]
-!CHECK:       omp.loop_nest.region6:                            ; preds = %[[VAL_15]], %[[VAL_24]]
-!CHECK:         %[[VAL_26:.*]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.*]], %[[VAL_24]] ]
-!CHECK:         %[[VAL_28:.*]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]]
-!CHECK:         store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4
-!CHECK:         %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1
-!CHECK:         %[[VAL_29:.*]] = icmp eq i64 %[[VAL_27]], 100
-!CHECK:         br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]
+!CHECK-LABEL: define void @foo_(ptr captures(none) %0) {{.*}} {
+!CHECK: call void{{.*}}@__kmpc_fork_call{{.*}}@[[OMP_OUTLINED_FN_1:.*]])
 
 subroutine foo(a)
   integer, dimension(100, 100), intent(out) :: a