[flang-commits] [flang] [flang][fir] Add affine optimization pass pipeline. (PR #138627)
via flang-commits
flang-commits at lists.llvm.org
Thu May 8 01:19:23 PDT 2025
https://github.com/NexMing updated https://github.com/llvm/llvm-project/pull/138627
>From ea6a6e5721d301647770ef05548555b05f1092f7 Mon Sep 17 00:00:00 2001
From: yanming <ming.yan at terapines.com>
Date: Wed, 30 Apr 2025 16:32:14 +0800
Subject: [PATCH 1/2] [flang][fir] Add affine optimization pass pipeline.
---
.../flang/Optimizer/Passes/CommandLineOpts.h | 1 +
.../flang/Optimizer/Passes/Pipelines.h | 3 ++
flang/lib/Optimizer/Passes/CMakeLists.txt | 1 +
.../lib/Optimizer/Passes/CommandLineOpts.cpp | 1 +
flang/lib/Optimizer/Passes/Pipelines.cpp | 17 ++++++
flang/test/Driver/mlir-pass-pipeline.f90 | 14 +++++
flang/test/Integration/OpenMP/auto-omp.f90 | 52 +++++++++++++++++++
7 files changed, 89 insertions(+)
create mode 100644 flang/test/Integration/OpenMP/auto-omp.f90
diff --git a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
index 1cfaf285e75e6..320c561953213 100644
--- a/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
+++ b/flang/include/flang/Optimizer/Passes/CommandLineOpts.h
@@ -42,6 +42,7 @@ extern llvm::cl::opt<bool> disableCfgConversion;
extern llvm::cl::opt<bool> disableFirAvc;
extern llvm::cl::opt<bool> disableFirMao;
+extern llvm::cl::opt<bool> enableAffineOpt;
extern llvm::cl::opt<bool> disableFirAliasTags;
extern llvm::cl::opt<bool> useOldAliasTags;
diff --git a/flang/include/flang/Optimizer/Passes/Pipelines.h b/flang/include/flang/Optimizer/Passes/Pipelines.h
index a3f59ee8dd013..7680987367256 100644
--- a/flang/include/flang/Optimizer/Passes/Pipelines.h
+++ b/flang/include/flang/Optimizer/Passes/Pipelines.h
@@ -18,8 +18,11 @@
#include "flang/Optimizer/Passes/CommandLineOpts.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "flang/Tools/CrossToolHelpers.h"
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/SCFToOpenMP/SCFToOpenMP.h"
+#include "mlir/Dialect/Affine/Passes.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Pass/PassManager.h"
diff --git a/flang/lib/Optimizer/Passes/CMakeLists.txt b/flang/lib/Optimizer/Passes/CMakeLists.txt
index 1c19a5765aff1..ad6c714c28bec 100644
--- a/flang/lib/Optimizer/Passes/CMakeLists.txt
+++ b/flang/lib/Optimizer/Passes/CMakeLists.txt
@@ -21,6 +21,7 @@ add_flang_library(flangPasses
MLIRPass
MLIRReconcileUnrealizedCasts
MLIRSCFToControlFlow
+ MLIRSCFToOpenMP
MLIRSupport
MLIRTransforms
)
diff --git a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
index f95a280883cba..b8ae6ede423e3 100644
--- a/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
+++ b/flang/lib/Optimizer/Passes/CommandLineOpts.cpp
@@ -55,6 +55,7 @@ cl::opt<bool> useOldAliasTags(
cl::desc("Use a single TBAA tree for all functions and do not use "
"the FIR alias tags pass"),
cl::init(false), cl::Hidden);
+EnableOption(AffineOpt, "affine-opt", "affine optimization");
/// CodeGen Passes
DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index a3ef473ea39b7..f85de45f6029d 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -209,8 +209,25 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
if (pc.AliasAnalysis && !disableFirAliasTags && !useOldAliasTags)
pm.addPass(fir::createAddAliasTags());
+ if (enableAffineOpt && pc.OptLevel.isOptimizingForSpeed()) {
+ pm.addPass(fir::createPromoteToAffinePass());
+ pm.addPass(mlir::createCSEPass());
+ pm.addPass(mlir::affine::createAffineLoopInvariantCodeMotionPass());
+ pm.addPass(mlir::affine::createAffineLoopNormalizePass());
+ pm.addPass(mlir::affine::createSimplifyAffineStructuresPass());
+ pm.addPass(mlir::affine::createAffineParallelize(
+ mlir::affine::AffineParallelizeOptions{1, false}));
+ pm.addPass(fir::createAffineDemotionPass());
+ pm.addPass(mlir::createLowerAffinePass());
+ if (pc.EnableOpenMP) {
+ pm.addPass(mlir::createConvertSCFToOpenMPPass());
+ pm.addPass(mlir::createCanonicalizerPass());
+ }
+ }
+
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, fir::createStackReclaim);
+
// convert control flow to CFG form
fir::addCfgConversionPass(pm, pc);
pm.addPass(mlir::createSCFToControlFlowPass());
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 45370895db397..188a42d231500 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -4,6 +4,7 @@
! -O0 is the default:
! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline %s -O0 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL %s
! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline %s -O2 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL,O2 %s
+! RUN: %flang_fc1 -S -mmlir --mlir-pass-statistics -mmlir --mlir-pass-statistics-display=pipeline -mllvm --enable-affine-opt %s -O2 -o /dev/null 2>&1 | FileCheck --check-prefixes=ALL,O2,AFFINE %s
! REQUIRES: asserts
@@ -105,6 +106,19 @@
! ALL-NEXT: SimplifyFIROperations
! O2-NEXT: AddAliasTags
+! AFFINE-NEXT: 'func.func' Pipeline
+! AFFINE-NEXT: AffineDialectPromotion
+! AFFINE-NEXT: CSE
+! AFFINE-NEXT: (S) 0 num-cse'd - Number of operations CSE'd
+! AFFINE-NEXT: (S) 0 num-dce'd - Number of operations DCE'd
+! AFFINE-NEXT: 'func.func' Pipeline
+! AFFINE-NEXT: AffineLoopInvariantCodeMotion
+! AFFINE-NEXT: AffineLoopNormalize
+! AFFINE-NEXT: SimplifyAffineStructures
+! AFFINE-NEXT: AffineParallelize
+! AFFINE-NEXT: AffineDialectDemotion
+! AFFINE-NEXT: LowerAffinePass
+
! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
! ALL-NEXT: 'fir.global' Pipeline
! ALL-NEXT: StackReclaim
diff --git a/flang/test/Integration/OpenMP/auto-omp.f90 b/flang/test/Integration/OpenMP/auto-omp.f90
new file mode 100644
index 0000000000000..7e348bfb41c17
--- /dev/null
+++ b/flang/test/Integration/OpenMP/auto-omp.f90
@@ -0,0 +1,52 @@
+! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s \
+! RUN: | FileCheck %s
+
+!CHECK-LABEL: entry:
+!CHECK: %[[VAL_0:.*]] = alloca { ptr }, align 8
+!CHECK: %[[VAL_1:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK: store ptr %[[VAL_2:.*]], ptr %[[VAL_0]], align 8
+!CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo_..omp_par, ptr nonnull %[[VAL_0]])
+!CHECK: ret void
+!CHECK: omp.par.entry:
+!CHECK: %[[VAL_3:.*]] = load ptr, ptr %[[VAL_4:.*]], align 8, !align !3
+!CHECK: %[[VAL_5:.*]] = alloca i32, align 4
+!CHECK: %[[VAL_6:.*]] = alloca i64, align 8
+!CHECK: %[[VAL_7:.*]] = alloca i64, align 8
+!CHECK: %[[VAL_8:.*]] = alloca i64, align 8
+!CHECK: store i64 0, ptr %[[VAL_6]], align 8
+!CHECK: store i64 99, ptr %[[VAL_7]], align 8
+!CHECK: store i64 1, ptr %[[VAL_8]], align 8
+!CHECK: %[[VAL_9:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK: call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0)
+!CHECK: %[[VAL_10:.*]] = load i64, ptr %[[VAL_6]], align 8
+!CHECK: %[[VAL_11:.*]] = load i64, ptr %[[VAL_7]], align 8
+!CHECK: %[[VAL_12:.*]] = sub i64 %[[VAL_11]], %[[VAL_10]]
+!CHECK: %[[VAL_13:.*]] = icmp eq i64 %[[VAL_12]], -1
+!CHECK: br i1 %[[VAL_13]], label %[[VAL_14:.*]], label %[[VAL_15:.*]]
+!CHECK: omp_loop.exit: ; preds = %[[VAL_16:.*]], %[[VAL_17:.*]]
+!CHECK: call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]])
+!CHECK: %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
+!CHECK: call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]])
+!CHECK: ret void
+!CHECK: omp_loop.body: ; preds = %[[VAL_17]], %[[VAL_16]]
+!CHECK: %[[VAL_19:.*]] = phi i64 [ %[[VAL_20:.*]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ]
+!CHECK: %[[VAL_21:.*]] = add i64 %[[VAL_19]], %[[VAL_10]]
+!CHECK: %[[VAL_22:.*]] = mul i64 %[[VAL_21]], 400
+!CHECK: %[[VAL_23:.*]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]]
+!CHECK: br label %[[VAL_24:.*]]
+!CHECK: omp_loop.inc: ; preds = %[[VAL_24]]
+!CHECK: %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1
+!CHECK: %[[VAL_25:.*]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]]
+!CHECK: br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]]
+!CHECK: omp.loop_nest.region6: ; preds = %[[VAL_15]], %[[VAL_24]]
+!CHECK: %[[VAL_26:.*]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.*]], %[[VAL_24]] ]
+!CHECK: %[[VAL_28:.*]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]]
+!CHECK: store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4
+!CHECK: %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1
+!CHECK: %[[VAL_29:.*]] = icmp eq i64 %[[VAL_27]], 100
+!CHECK: br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]
+
+subroutine foo(a)
+ integer, dimension(100, 100), intent(out) :: a
+ a = 1
+end subroutine foo
>From 99ecb0b36284e5a6eb42797f6330cf69c0d37b5b Mon Sep 17 00:00:00 2001
From: yanming <ming.yan at terapines.com>
Date: Thu, 8 May 2025 16:17:48 +0800
Subject: [PATCH 2/2] Fix the failed test.
---
flang/test/Integration/OpenMP/auto-omp.f90 | 46 +---------------------
1 file changed, 2 insertions(+), 44 deletions(-)
diff --git a/flang/test/Integration/OpenMP/auto-omp.f90 b/flang/test/Integration/OpenMP/auto-omp.f90
index 7e348bfb41c17..bf7da292552d8 100644
--- a/flang/test/Integration/OpenMP/auto-omp.f90
+++ b/flang/test/Integration/OpenMP/auto-omp.f90
@@ -1,50 +1,8 @@
! RUN: %flang_fc1 -O1 -mllvm --enable-affine-opt -emit-llvm -fopenmp -o - %s \
! RUN: | FileCheck %s
-!CHECK-LABEL: entry:
-!CHECK: %[[VAL_0:.*]] = alloca { ptr }, align 8
-!CHECK: %[[VAL_1:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
-!CHECK: store ptr %[[VAL_2:.*]], ptr %[[VAL_0]], align 8
-!CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr nonnull @1, i32 1, ptr nonnull @foo_..omp_par, ptr nonnull %[[VAL_0]])
-!CHECK: ret void
-!CHECK: omp.par.entry:
-!CHECK: %[[VAL_3:.*]] = load ptr, ptr %[[VAL_4:.*]], align 8, !align !3
-!CHECK: %[[VAL_5:.*]] = alloca i32, align 4
-!CHECK: %[[VAL_6:.*]] = alloca i64, align 8
-!CHECK: %[[VAL_7:.*]] = alloca i64, align 8
-!CHECK: %[[VAL_8:.*]] = alloca i64, align 8
-!CHECK: store i64 0, ptr %[[VAL_6]], align 8
-!CHECK: store i64 99, ptr %[[VAL_7]], align 8
-!CHECK: store i64 1, ptr %[[VAL_8]], align 8
-!CHECK: %[[VAL_9:.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @1)
-!CHECK: call void @__kmpc_for_static_init_8u(ptr nonnull @1, i32 %[[VAL_9]], i32 34, ptr nonnull %[[VAL_5]], ptr nonnull %[[VAL_6]], ptr nonnull %[[VAL_7]], ptr nonnull %[[VAL_8]], i64 1, i64 0)
-!CHECK: %[[VAL_10:.*]] = load i64, ptr %[[VAL_6]], align 8
-!CHECK: %[[VAL_11:.*]] = load i64, ptr %[[VAL_7]], align 8
-!CHECK: %[[VAL_12:.*]] = sub i64 %[[VAL_11]], %[[VAL_10]]
-!CHECK: %[[VAL_13:.*]] = icmp eq i64 %[[VAL_12]], -1
-!CHECK: br i1 %[[VAL_13]], label %[[VAL_14:.*]], label %[[VAL_15:.*]]
-!CHECK: omp_loop.exit: ; preds = %[[VAL_16:.*]], %[[VAL_17:.*]]
-!CHECK: call void @__kmpc_for_static_fini(ptr nonnull @1, i32 %[[VAL_9]])
-!CHECK: %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @1)
-!CHECK: call void @__kmpc_barrier(ptr nonnull @2, i32 %[[VAL_18]])
-!CHECK: ret void
-!CHECK: omp_loop.body: ; preds = %[[VAL_17]], %[[VAL_16]]
-!CHECK: %[[VAL_19:.*]] = phi i64 [ %[[VAL_20:.*]], %[[VAL_16]] ], [ 0, %[[VAL_17]] ]
-!CHECK: %[[VAL_21:.*]] = add i64 %[[VAL_19]], %[[VAL_10]]
-!CHECK: %[[VAL_22:.*]] = mul i64 %[[VAL_21]], 400
-!CHECK: %[[VAL_23:.*]] = getelementptr i8, ptr %[[VAL_3]], i64 %[[VAL_22]]
-!CHECK: br label %[[VAL_24:.*]]
-!CHECK: omp_loop.inc: ; preds = %[[VAL_24]]
-!CHECK: %[[VAL_20]] = add nuw i64 %[[VAL_19]], 1
-!CHECK: %[[VAL_25:.*]] = icmp eq i64 %[[VAL_19]], %[[VAL_12]]
-!CHECK: br i1 %[[VAL_25]], label %[[VAL_14]], label %[[VAL_15]]
-!CHECK: omp.loop_nest.region6: ; preds = %[[VAL_15]], %[[VAL_24]]
-!CHECK: %[[VAL_26:.*]] = phi i64 [ 0, %[[VAL_15]] ], [ %[[VAL_27:.*]], %[[VAL_24]] ]
-!CHECK: %[[VAL_28:.*]] = getelementptr i32, ptr %[[VAL_23]], i64 %[[VAL_26]]
-!CHECK: store i32 1, ptr %[[VAL_28]], align 4, !tbaa !4
-!CHECK: %[[VAL_27]] = add nuw nsw i64 %[[VAL_26]], 1
-!CHECK: %[[VAL_29:.*]] = icmp eq i64 %[[VAL_27]], 100
-!CHECK: br i1 %[[VAL_29]], label %[[VAL_16]], label %[[VAL_24]]
+!CHECK-LABEL: define void @foo_(ptr captures(none) %0) {{.*}} {
+!CHECK: call void{{.*}}@__kmpc_fork_call{{.*}}@[[OMP_OUTLINED_FN_1:.*]])
subroutine foo(a)
integer, dimension(100, 100), intent(out) :: a
More information about the flang-commits
mailing list