[flang-commits] [flang] [flang][cuda] Change how abstract result pass is scheduled on func.func and gpu.func (PR #119034)

Fri Dec 6 13:33:27 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-flang-driver

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

Use `pm.nest` to schedule the pass on nested `func.func` and `gpu.func` in the `gpu.module`.

AbstractResult pass is not meant to run on the whole gpu.module at once. 

---
Full diff: https://github.com/llvm/llvm-project/pull/119034.diff


6 Files Affected:

- (modified) flang/lib/Optimizer/Passes/Pipelines.cpp (+9-2) 
- (modified) flang/lib/Optimizer/Transforms/AbstractResult.cpp (+4-11) 
- (modified) flang/test/Driver/bbc-mlir-pass-pipeline.f90 (+2-7) 
- (modified) flang/test/Driver/mlir-debug-pass-pipeline.f90 (+8-11) 
- (modified) flang/test/Driver/mlir-pass-pipeline.f90 (+9-15) 
- (modified) flang/test/Fir/basic-program.fir (+9-15) 


``````````diff

diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 0743fb60aa847a..ff79c811541c44 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -16,8 +16,14 @@ namespace fir {
 void addNestedPassToAllTopLevelOperations(mlir::PassManager &pm,
                                           PassConstructor ctor) {
   addNestedPassToOps<mlir::func::FuncOp, mlir::omp::DeclareReductionOp,
-                     mlir::omp::PrivateClauseOp, fir::GlobalOp,
-                     mlir::gpu::GPUModuleOp>(pm, ctor);
+                     mlir::omp::PrivateClauseOp, fir::GlobalOp>(pm, ctor);
+}
+
+template <typename NestOpTy>
+void addNestedPassToNest(mlir::PassManager &pm, PassConstructor ctor) {
+  mlir::OpPassManager &nestPM = pm.nest<NestOpTy>();
+  nestPM.addNestedPass<mlir::func::FuncOp>(ctor());
+  nestPM.addNestedPass<mlir::gpu::GPUFuncOp>(ctor());
 }
 
 void addNestedPassToAllTopLevelOperationsConditionally(
@@ -266,6 +272,7 @@ void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
                                          llvm::StringRef inputFilename) {
   fir::addBoxedProcedurePass(pm);
   addNestedPassToAllTopLevelOperations(pm, fir::createAbstractResultOpt);
+  addNestedPassToNest<mlir::gpu::GPUModuleOp>(pm, fir::createAbstractResultOpt);
   fir::addCodeGenRewritePass(
       pm, (config.DebugInfo != llvm::codegenoptions::NoDebugInfo));
   fir::addExternalNameConversionPass(pm, config.Underscoring);
diff --git a/flang/lib/Optimizer/Transforms/AbstractResult.cpp b/flang/lib/Optimizer/Transforms/AbstractResult.cpp
index 2eca349110f3af..2ed66cc83eefb5 100644
--- a/flang/lib/Optimizer/Transforms/AbstractResult.cpp
+++ b/flang/lib/Optimizer/Transforms/AbstractResult.cpp
@@ -460,17 +460,10 @@ class AbstractResultOpt
     const bool shouldBoxResult = this->passResultAsBox.getValue();
 
     mlir::TypeSwitch<mlir::Operation *, void>(op)
-        .Case<mlir::func::FuncOp, fir::GlobalOp>([&](auto op) {
-          runOnSpecificOperation(op, shouldBoxResult, patterns, target);
-        })
-        .Case<mlir::gpu::GPUModuleOp>([&](auto op) {
-          auto gpuMod = mlir::dyn_cast<mlir::gpu::GPUModuleOp>(*op);
-          for (auto funcOp : gpuMod.template getOps<mlir::func::FuncOp>())
-            runOnSpecificOperation(funcOp, shouldBoxResult, patterns, target);
-          for (auto gpuFuncOp : gpuMod.template getOps<mlir::gpu::GPUFuncOp>())
-            runOnSpecificOperation(gpuFuncOp, shouldBoxResult, patterns,
-                                   target);
-        });
+        .Case<mlir::func::FuncOp, fir::GlobalOp, mlir::gpu::GPUFuncOp>(
+            [&](auto op) {
+              runOnSpecificOperation(op, shouldBoxResult, patterns, target);
+            });
 
     // Convert the calls and, if needed,  the ReturnOp in the function body.
     target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
index 1f09e7ad4c2f5a..5520d750e2ce1c 100644
--- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90
+++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
@@ -17,14 +17,12 @@
 ! CHECK-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
 ! CHECK-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
 
-! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! CHECK-NEXT: 'fir.global' Pipeline
 ! CHECK-NEXT:   CharacterConversion
 ! CHECK-NEXT: 'func.func' Pipeline
 ! CHECK-NEXT:   ArrayValueCopy
 ! CHECK-NEXT:   CharacterConversion
-! CHECK-NEXT: 'gpu.module' Pipeline
-! CHECK-NEXT:   CharacterConversion
 ! CHECK-NEXT: 'omp.declare_reduction' Pipeline
 ! CHECK-NEXT:   CharacterConversion
 ! CHECK-NEXT: 'omp.private' Pipeline
@@ -50,16 +48,13 @@
 ! CHECK-NEXT: PolymorphicOpConversion
 ! CHECK-NEXT: AssumedRankOpConversion
 
-! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! CHECK-NEXT: 'fir.global' Pipeline
 ! CHECK-NEXT:   StackReclaim
 ! CHECK-NEXT:   CFGConversion
 ! CHECK-NEXT: 'func.func' Pipeline
 ! CHECK-NEXT:   StackReclaim
 ! CHECK-NEXT:   CFGConversion
-! CHECK-NEXT: 'gpu.module' Pipeline
-! CHECK-NEXT:   StackReclaim
-! CHECK-NEXT:   CFGConversion
 ! CHECK-NEXT: 'omp.declare_reduction' Pipeline
 ! CHECK-NEXT:   StackReclaim
 ! CHECK-NEXT:   CFGConversion
diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90
index 4326953421e4bd..edc6f59b0ad7c9 100644
--- a/flang/test/Driver/mlir-debug-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90
@@ -28,13 +28,11 @@
 ! ALL: Pass statistics report
 
 ! ALL: Fortran::lower::VerifierPass
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT: 'fir.global' Pipeline
 ! ALL-NEXT:   InlineElementals
 ! ALL-NEXT: 'func.func' Pipeline
 ! ALL-NEXT:   InlineElementals
-! ALL-NEXT: 'gpu.module' Pipeline
-! ALL-NEXT:   InlineElementals
 ! ALL-NEXT: 'omp.declare_reduction' Pipeline
 ! ALL-NEXT:   InlineElementals
 ! ALL-NEXT: 'omp.private' Pipeline
@@ -51,14 +49,12 @@
 ! ALL-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
 ! ALL-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
 
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT: 'fir.global' Pipeline
 ! ALL-NEXT:   CharacterConversion
 ! ALL-NEXT: 'func.func' Pipeline
 ! ALL-NEXT:   ArrayValueCopy
 ! ALL-NEXT:   CharacterConversion
-! ALL-NEXT: 'gpu.module' Pipeline
-! ALL-NEXT:   CharacterConversion
 ! ALL-NEXT: 'omp.declare_reduction' Pipeline
 ! ALL-NEXT:   CharacterConversion
 ! ALL-NEXT: 'omp.private' Pipeline
@@ -82,16 +78,13 @@
 ! ALL-NEXT: PolymorphicOpConversion
 ! ALL-NEXT: AssumedRankOpConversion
 
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT:   'fir.global' Pipeline
 ! ALL-NEXT:     StackReclaim
 ! ALL-NEXT:     CFGConversion
 ! ALL-NEXT:   'func.func' Pipeline
 ! ALL-NEXT:     StackReclaim
 ! ALL-NEXT:     CFGConversion
-! ALL-NEXT:   'gpu.module' Pipeline
-! ALL-NEXT:     StackReclaim
-! ALL-NEXT:     CFGConversion
 ! ALL-NEXT:   'omp.declare_reduction' Pipeline
 ! ALL-NEXT:     StackReclaim
 ! ALL-NEXT:     CFGConversion
@@ -112,7 +105,11 @@
 ! ALL-NEXT:   'func.func' Pipeline
 ! ALL-NEXT:     AbstractResultOpt
 ! ALL-NEXT:   'gpu.module' Pipeline
-! ALL-NEXT:     AbstractResultOpt
+! ALL-NEXT:   Pipeline Collection : ['func.func', 'gpu.func'] 
+! ALL-NEXT:   'func.func' Pipeline 
+! ALL-NEXT:   AbstractResultOpt
+! ALL-NEXT:   'gpu.func' Pipeline 
+! ALL-NEXT:   AbstractResultOpt
 ! ALL-NEXT:   'omp.declare_reduction' Pipeline
 ! ALL-NEXT:     AbstractResultOpt
 ! ALL-NEXT:   'omp.private' Pipeline
diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90
index 6ffdbb0234e856..b30affe691b840 100644
--- a/flang/test/Driver/mlir-pass-pipeline.f90
+++ b/flang/test/Driver/mlir-pass-pipeline.f90
@@ -16,16 +16,13 @@
 
 ! ALL: Fortran::lower::VerifierPass
 ! O2-NEXT: Canonicalizer
-! ALL:     Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL:     Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT:'fir.global' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
 ! ALL-NEXT:'func.func' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
-! ALL-NEXT:'gpu.module' Pipeline
-! O2-NEXT:   SimplifyHLFIRIntrinsics
-! ALL:       InlineElementals
 ! ALL-NEXT:'omp.declare_reduction' Pipeline
 ! O2-NEXT:   SimplifyHLFIRIntrinsics
 ! ALL:       InlineElementals
@@ -36,13 +33,11 @@
 ! O2-NEXT: CSE
 ! O2-NEXT: (S) {{.*}} num-cse'd
 ! O2-NEXT: (S) {{.*}} num-dce'd
-! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! O2-NEXT: 'fir.global' Pipeline
 ! O2-NEXT:   OptimizedBufferization
 ! O2-NEXT: 'func.func' Pipeline
 ! O2-NEXT:   OptimizedBufferization
-! O2-NEXT: 'gpu.module' Pipeline
-! O2-NEXT:   OptimizedBufferization
 ! O2-NEXT: 'omp.declare_reduction' Pipeline
 ! O2-NEXT:   OptimizedBufferization
 ! O2-NEXT: 'omp.private' Pipeline
@@ -59,14 +54,12 @@
 ! ALL-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
 ! ALL-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
 
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT: 'fir.global' Pipeline
 ! ALL-NEXT:   CharacterConversion
 ! ALL-NEXT: 'func.func' Pipeline
 ! ALL-NEXT:   ArrayValueCopy
 ! ALL-NEXT:   CharacterConversion
-! ALL-NEXT: 'gpu.module' Pipeline
-! ALL-NEXT:   CharacterConversion
 ! ALL-NEXT: 'omp.declare_reduction' Pipeline
 ! ALL-NEXT:   CharacterConversion
 ! ALL-NEXT: 'omp.private' Pipeline
@@ -93,16 +86,13 @@
 ! ALL-NEXT: AssumedRankOpConversion
 ! O2-NEXT:  AddAliasTags
 
-! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 ! ALL-NEXT:    'fir.global' Pipeline
 ! ALL-NEXT:      StackReclaim
 ! ALL-NEXT:      CFGConversion
 ! ALL-NEXT:    'func.func' Pipeline
 ! ALL-NEXT:      StackReclaim
 ! ALL-NEXT:      CFGConversion
-! ALL-NEXT:   'gpu.module' Pipeline
-! ALL-NEXT:      StackReclaim
-! ALL-NEXT:      CFGConversion
 ! ALL-NEXT:   'omp.declare_reduction' Pipeline
 ! ALL-NEXT:      StackReclaim
 ! ALL-NEXT:      CFGConversion
@@ -124,7 +114,11 @@
 ! ALL-NEXT:  'func.func' Pipeline
 ! ALL-NEXT:    AbstractResultOpt
 ! ALL-NEXT:  'gpu.module' Pipeline
-! ALL-NEXT:    AbstractResultOpt
+! ALL-NEXT:   Pipeline Collection : ['func.func', 'gpu.func'] 
+! ALL-NEXT:   'func.func' Pipeline 
+! ALL-NEXT:   AbstractResultOpt
+! ALL-NEXT:   'gpu.func' Pipeline 
+! ALL-NEXT:   AbstractResultOpt
 ! ALL-NEXT:  'omp.declare_reduction' Pipeline
 ! ALL-NEXT:    AbstractResultOpt
 ! ALL-NEXT:  'omp.private' Pipeline
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index 50b91ce340b3a6..d2788008c3893e 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -17,16 +17,13 @@ func.func @_QQmain() {
 // PASSES: Pass statistics report
 
 // PASSES:        Canonicalizer
-// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
-// PASSES-NEXT: 'gpu.module' Pipeline
-// PASSES-NEXT:   SimplifyHLFIRIntrinsics
-// PASSES-NEXT:   InlineElementals
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:   SimplifyHLFIRIntrinsics
 // PASSES-NEXT:   InlineElementals
@@ -37,13 +34,11 @@ func.func @_QQmain() {
 // PASSES-NEXT:   CSE
 // PASSES-NEXT:    (S) 0 num-cse'd - Number of operations CSE'd
 // PASSES-NEXT:    (S) 0 num-dce'd - Number of operations DCE'd
-// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
-// PASSES-NEXT: 'gpu.module' Pipeline
-// PASSES-NEXT:    OptimizedBufferization
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:    OptimizedBufferization
 // PASSES-NEXT: 'omp.private' Pipeline
@@ -57,14 +52,12 @@ func.func @_QQmain() {
 // PASSES-NEXT:   (S) 0 num-cse'd - Number of operations CSE'd
 // PASSES-NEXT:   (S) 0 num-dce'd - Number of operations DCE'd
 
-// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:   CharacterConversion
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:   ArrayValueCopy
 // PASSES-NEXT:   CharacterConversion
-// PASSES-NEXT: 'gpu.module' Pipeline
-// PASSES-NEXT:   CharacterConversion
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:   CharacterConversion
 // PASSES-NEXT: 'omp.private' Pipeline
@@ -91,16 +84,13 @@ func.func @_QQmain() {
 // PASSES-NEXT: AssumedRankOpConversion
 // PASSES-NEXT: AddAliasTags
 
-// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'gpu.module', 'omp.declare_reduction', 'omp.private']
+// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
 // PASSES-NEXT: 'fir.global' Pipeline
 // PASSES-NEXT:   StackReclaim
 // PASSES-NEXT:   CFGConversion
 // PASSES-NEXT: 'func.func' Pipeline
 // PASSES-NEXT:   StackReclaim
 // PASSES-NEXT:   CFGConversion
-// PASSES-NEXT: 'gpu.module' Pipeline
-// PASSES-NEXT:   StackReclaim
-// PASSES-NEXT:   CFGConversion
 // PASSES-NEXT: 'omp.declare_reduction' Pipeline
 // PASSES-NEXT:   StackReclaim
 // PASSES-NEXT:   CFGConversion
@@ -122,7 +112,11 @@ func.func @_QQmain() {
 // PASSES-NEXT:  'func.func' Pipeline
 // PASSES-NEXT:    AbstractResultOpt
 // PASSES-NEXT:  'gpu.module' Pipeline
-// PASSES-NEXT:    AbstractResultOpt
+// PASSES-NEXT:   Pipeline Collection : ['func.func', 'gpu.func'] 
+// PASSES-NEXT:   'func.func' Pipeline 
+// PASSES-NEXT:   AbstractResultOpt
+// PASSES-NEXT:   'gpu.func' Pipeline 
+// PASSES-NEXT:   AbstractResultOpt
 // PASSES-NEXT:  'omp.declare_reduction' Pipeline
 // PASSES-NEXT:    AbstractResultOpt
 // PASSES-NEXT:  'omp.private' Pipeline

``````````

</details>


https://github.com/llvm/llvm-project/pull/119034