[flang-commits] [flang] [flang][cuda] Support memory cleanup at a return statement (PR #116304)

via flang-commits flang-commits at lists.llvm.org
Thu Nov 14 23:37:49 PST 2024


https://github.com/khaki3 updated https://github.com/llvm/llvm-project/pull/116304

>From 798f1bc5ad4e0b8e2e591ac5eadd6b09b8d4700f Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 14 Nov 2024 16:17:57 -0800
Subject: [PATCH 1/3] [flang][cuf] Support memory finalization at early exit
 points

---
 flang/include/flang/Lower/StatementContext.h | 11 +++---
 flang/lib/Lower/Bridge.cpp                   | 37 +++++++++-----------
 flang/test/Lower/CUDA/cuda-return.cuf        | 14 ++++++++
 3 files changed, 38 insertions(+), 24 deletions(-)
 create mode 100644 flang/test/Lower/CUDA/cuda-return.cuf

diff --git a/flang/include/flang/Lower/StatementContext.h b/flang/include/flang/Lower/StatementContext.h
index 7776edc93ed737..4a07eeaefece41 100644
--- a/flang/include/flang/Lower/StatementContext.h
+++ b/flang/include/flang/Lower/StatementContext.h
@@ -79,23 +79,26 @@ class StatementContext {
     }
   }
 
-  /// Make cleanup calls. Retain the stack top list for a repeat call.
+  /// Make a cleanup call. Retain the stack top list for a repeat call.
   void finalizeAndKeep() {
     assert(!cufs.empty() && "invalid finalize statement context");
     if (cufs.back())
       (*cufs.back())();
   }
 
-  /// Make cleanup calls. Clear the stack top list.
+  /// Make a cleanup call. Clear the stack top list.
   void finalizeAndReset() {
     finalizeAndKeep();
     cufs.back().reset();
   }
 
-  /// Make cleanup calls. Pop the stack top list.
+  /// Pop the stack top list.
+  void pop() { cufs.pop_back(); }
+
+  /// Make a cleanup call. Pop the stack top list.
   void finalizeAndPop() {
     finalizeAndKeep();
-    cufs.pop_back();
+    pop();
   }
 
   bool hasCode() const {
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index da53edf7e734b0..7f41742bf5e8b2 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1621,13 +1621,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   // Termination of symbolically referenced execution units
   //===--------------------------------------------------------------------===//
 
-  /// END of program
+  /// Exit of a routine
   ///
-  /// Generate the cleanup block before the program exits
-  void genExitRoutine() {
-
-    if (blockIsUnterminated())
-      builder->create<mlir::func::ReturnOp>(toLocation());
+  /// Generate the cleanup block before the routine exits
+  void genExitRoutine(bool earlyReturn, mlir::ValueRange retval = {}) {
+    if (blockIsUnterminated()) {
+      bridge.openAccCtx().finalizeAndKeep();
+      bridge.fctCtx().finalizeAndKeep();
+      builder->create<mlir::func::ReturnOp>(toLocation(), retval);
+    }
+    if (!earlyReturn) {
+      bridge.openAccCtx().pop();
+      bridge.fctCtx().pop();
+    }
   }
 
   /// END of procedure-like constructs
@@ -1684,9 +1690,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
             resultRef = builder->createConvert(loc, resultRefType, resultRef);
           return builder->create<fir::LoadOp>(loc, resultRef);
         });
-    bridge.openAccCtx().finalizeAndPop();
-    bridge.fctCtx().finalizeAndPop();
-    builder->create<mlir::func::ReturnOp>(loc, resultVal);
+    genExitRoutine(false, resultVal);
   }
 
   /// Get the return value of a call to \p symbol, which is a subroutine entry
@@ -1712,13 +1716,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     } else if (Fortran::semantics::HasAlternateReturns(symbol)) {
       mlir::Value retval = builder->create<fir::LoadOp>(
           toLocation(), getAltReturnResult(symbol));
-      bridge.openAccCtx().finalizeAndPop();
-      bridge.fctCtx().finalizeAndPop();
-      builder->create<mlir::func::ReturnOp>(toLocation(), retval);
+      genExitRoutine(false, retval);
     } else {
-      bridge.openAccCtx().finalizeAndPop();
-      bridge.fctCtx().finalizeAndPop();
-      genExitRoutine();
+      genExitRoutine(false);
     }
   }
 
@@ -5018,8 +5018,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
       it->stmtCtx.finalizeAndKeep();
     }
     if (funit->isMainProgram()) {
-      bridge.fctCtx().finalizeAndKeep();
-      genExitRoutine();
+      genExitRoutine(true);
       return;
     }
     mlir::Location loc = toLocation();
@@ -5478,9 +5477,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
   void endNewFunction(Fortran::lower::pft::FunctionLikeUnit &funit) {
     setCurrentPosition(Fortran::lower::pft::stmtSourceLoc(funit.endStmt));
     if (funit.isMainProgram()) {
-      bridge.openAccCtx().finalizeAndPop();
-      bridge.fctCtx().finalizeAndPop();
-      genExitRoutine();
+      genExitRoutine(false);
     } else {
       genFIRProcedureExit(funit, funit.getSubprogramSymbol());
     }
diff --git a/flang/test/Lower/CUDA/cuda-return.cuf b/flang/test/Lower/CUDA/cuda-return.cuf
new file mode 100644
index 00000000000000..b40d63aa8b5fe0
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-return.cuf
@@ -0,0 +1,14 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Check if the the finalizer works
+
+program main
+  integer, device :: a(10)
+  return
+end
+
+! CHECK: func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare
+! CHECK-NEXT: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK-NEXT: return
+! CHECK-NEXT: }

>From 172149c1468d5ff22c4030b6bd286fb0101a6c41 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 14 Nov 2024 17:11:32 -0800
Subject: [PATCH 2/3] [test] Fix a comment

---
 flang/test/Lower/CUDA/cuda-return.cuf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/test/Lower/CUDA/cuda-return.cuf b/flang/test/Lower/CUDA/cuda-return.cuf
index b40d63aa8b5fe0..c9f9a8b57ef041 100644
--- a/flang/test/Lower/CUDA/cuda-return.cuf
+++ b/flang/test/Lower/CUDA/cuda-return.cuf
@@ -1,6 +1,6 @@
 ! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
 
-! Check if the the finalizer works
+! Check if finalization works with a return statement
 
 program main
   integer, device :: a(10)

>From 30bf0083692d65bfd3bfa31b791d527148a00e38 Mon Sep 17 00:00:00 2001
From: Kazuaki Matsumura <kmatsumura at nvidia.com>
Date: Thu, 14 Nov 2024 23:36:50 -0800
Subject: [PATCH 3/3] [flang][test] Revert some of comments; Add tests with
 multiple returns

---
 flang/include/flang/Lower/StatementContext.h  |  6 +--
 .../{cuda-return.cuf => cuda-return01.cuf}    |  0
 flang/test/Lower/CUDA/cuda-return02.cuf       | 48 +++++++++++++++++++
 3 files changed, 51 insertions(+), 3 deletions(-)
 rename flang/test/Lower/CUDA/{cuda-return.cuf => cuda-return01.cuf} (100%)
 create mode 100644 flang/test/Lower/CUDA/cuda-return02.cuf

diff --git a/flang/include/flang/Lower/StatementContext.h b/flang/include/flang/Lower/StatementContext.h
index 4a07eeaefece41..eef21d4bae5aab 100644
--- a/flang/include/flang/Lower/StatementContext.h
+++ b/flang/include/flang/Lower/StatementContext.h
@@ -79,14 +79,14 @@ class StatementContext {
     }
   }
 
-  /// Make a cleanup call. Retain the stack top list for a repeat call.
+  /// Make cleanup calls. Retain the stack top list for a repeat call.
   void finalizeAndKeep() {
     assert(!cufs.empty() && "invalid finalize statement context");
     if (cufs.back())
       (*cufs.back())();
   }
 
-  /// Make a cleanup call. Clear the stack top list.
+  /// Make cleanup calls. Clear the stack top list.
   void finalizeAndReset() {
     finalizeAndKeep();
     cufs.back().reset();
@@ -95,7 +95,7 @@ class StatementContext {
   /// Pop the stack top list.
   void pop() { cufs.pop_back(); }
 
-  /// Make a cleanup call. Pop the stack top list.
+  /// Make cleanup calls. Pop the stack top list.
   void finalizeAndPop() {
     finalizeAndKeep();
     pop();
diff --git a/flang/test/Lower/CUDA/cuda-return.cuf b/flang/test/Lower/CUDA/cuda-return01.cuf
similarity index 100%
rename from flang/test/Lower/CUDA/cuda-return.cuf
rename to flang/test/Lower/CUDA/cuda-return01.cuf
diff --git a/flang/test/Lower/CUDA/cuda-return02.cuf b/flang/test/Lower/CUDA/cuda-return02.cuf
new file mode 100644
index 00000000000000..5d01f0a24b420b
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-return02.cuf
@@ -0,0 +1,48 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Check if finalization works with multiple return statements
+
+program test
+  integer, device :: a(10)
+  logical :: l
+
+  if (l) then
+    return
+  end if
+
+  return
+end
+
+! CHECK: func.func @_QQmain() attributes {fir.bindc_name = "test"} {
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare
+! CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
+! CHECK-NEXT: ^bb1:
+! CHECK-NEXT: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK-NEXT: return
+! CHECK-NEXT: ^bb2:
+! CHECK-NEXT: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK-NEXT: return
+! CHECK-NEXT: }
+
+subroutine sub(l)
+  integer, device :: a(10)
+  logical :: l
+
+  if (l) then
+    l = .false.
+    return
+  end if
+
+  return
+end
+
+! CHECK: func.func @_QPsub(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"}) {
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare
+! CHECK: cf.cond_br %6, ^bb1, ^bb2
+! CHECK: ^bb1:
+! CHECK: cf.br ^bb3
+! CHECK: ^bb2:
+! CHECK: cf.br ^bb3
+! CHECK: ^bb3:
+! CHECK: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK: }



More information about the flang-commits mailing list