[flang-commits] [flang] ff7fca7 - [flang][cuda] Support memory cleanup at a return statement (#116304)
via flang-commits
flang-commits at lists.llvm.org
Fri Nov 15 08:44:46 PST 2024
Author: khaki3
Date: 2024-11-15T08:44:42-08:00
New Revision: ff7fca7fa8646d73f884ab8a351e4178499c4d05
URL: https://github.com/llvm/llvm-project/commit/ff7fca7fa8646d73f884ab8a351e4178499c4d05
DIFF: https://github.com/llvm/llvm-project/commit/ff7fca7fa8646d73f884ab8a351e4178499c4d05.diff
LOG: [flang][cuda] Support memory cleanup at a return statement (#116304)
We generate `cuf.free` and `func.return` twice if a return statement
exists at the end of program.
```f90
program test
integer, device :: a(10)
return
end
```
```
% flang -x cuda test.cuf -mmlir --mlir-print-ir-after-all
error: loc("/path/to/test.cuf":3:3): 'func.return' op must be the last operation in the parent block
// -----// IR Dump After Fortran::lower::VerifierPass Failed () //----- //
```
Dumped IR:
```mlir
"func.func"() <{function_type = () -> (), sym_name = "_QQmain"}> ({
...
"cuf.free"(%5#1) <{data_attr = #cuf.cuda<device>}> : (!fir.ref<!fir.array<10xi32>>) -> ()
"func.return"() : () -> ()
"cuf.free"(%5#1) <{data_attr = #cuf.cuda<device>}> : (!fir.ref<!fir.array<10xi32>>) -> ()
"func.return"() : () -> ()
}
...
```
The routine `genExitRoutine` in `Bridge.cpp` is guarded by
`blockIsUnterminated()` to make sure that `func.return` is generated
only at the end of a block. However, we redundantly run
`bridge.fctCtx().finalizeAndKeep()` before `genExitRoutine` in this
case, resulting in two pairs of `cuf.free` and `func.return`. This PR
fixes `Bridge.cpp` by using `blockIsUnterminated()` to guard
`finalizeAndKeep` as well.
Added:
flang/test/Lower/CUDA/cuda-return01.cuf
flang/test/Lower/CUDA/cuda-return02.cuf
Modified:
flang/include/flang/Lower/StatementContext.h
flang/lib/Lower/Bridge.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Lower/StatementContext.h b/flang/include/flang/Lower/StatementContext.h
index 7776edc93ed737..eef21d4bae5aab 100644
--- a/flang/include/flang/Lower/StatementContext.h
+++ b/flang/include/flang/Lower/StatementContext.h
@@ -92,10 +92,13 @@ class StatementContext {
cufs.back().reset();
}
+ /// Pop the stack top list.
+ void pop() { cufs.pop_back(); }
+
/// Make cleanup calls. Pop the stack top list.
void finalizeAndPop() {
finalizeAndKeep();
- cufs.pop_back();
+ pop();
}
bool hasCode() const {
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index da53edf7e734b0..7f41742bf5e8b2 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1621,13 +1621,19 @@ class FirConverter : public Fortran::lower::AbstractConverter {
// Termination of symbolically referenced execution units
//===--------------------------------------------------------------------===//
- /// END of program
+ /// Exit of a routine
///
- /// Generate the cleanup block before the program exits
- void genExitRoutine() {
-
- if (blockIsUnterminated())
- builder->create<mlir::func::ReturnOp>(toLocation());
+ /// Generate the cleanup block before the routine exits
+ void genExitRoutine(bool earlyReturn, mlir::ValueRange retval = {}) {
+ if (blockIsUnterminated()) {
+ bridge.openAccCtx().finalizeAndKeep();
+ bridge.fctCtx().finalizeAndKeep();
+ builder->create<mlir::func::ReturnOp>(toLocation(), retval);
+ }
+ if (!earlyReturn) {
+ bridge.openAccCtx().pop();
+ bridge.fctCtx().pop();
+ }
}
/// END of procedure-like constructs
@@ -1684,9 +1690,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
resultRef = builder->createConvert(loc, resultRefType, resultRef);
return builder->create<fir::LoadOp>(loc, resultRef);
});
- bridge.openAccCtx().finalizeAndPop();
- bridge.fctCtx().finalizeAndPop();
- builder->create<mlir::func::ReturnOp>(loc, resultVal);
+ genExitRoutine(false, resultVal);
}
/// Get the return value of a call to \p symbol, which is a subroutine entry
@@ -1712,13 +1716,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
} else if (Fortran::semantics::HasAlternateReturns(symbol)) {
mlir::Value retval = builder->create<fir::LoadOp>(
toLocation(), getAltReturnResult(symbol));
- bridge.openAccCtx().finalizeAndPop();
- bridge.fctCtx().finalizeAndPop();
- builder->create<mlir::func::ReturnOp>(toLocation(), retval);
+ genExitRoutine(false, retval);
} else {
- bridge.openAccCtx().finalizeAndPop();
- bridge.fctCtx().finalizeAndPop();
- genExitRoutine();
+ genExitRoutine(false);
}
}
@@ -5018,8 +5018,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
it->stmtCtx.finalizeAndKeep();
}
if (funit->isMainProgram()) {
- bridge.fctCtx().finalizeAndKeep();
- genExitRoutine();
+ genExitRoutine(true);
return;
}
mlir::Location loc = toLocation();
@@ -5478,9 +5477,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
void endNewFunction(Fortran::lower::pft::FunctionLikeUnit &funit) {
setCurrentPosition(Fortran::lower::pft::stmtSourceLoc(funit.endStmt));
if (funit.isMainProgram()) {
- bridge.openAccCtx().finalizeAndPop();
- bridge.fctCtx().finalizeAndPop();
- genExitRoutine();
+ genExitRoutine(false);
} else {
genFIRProcedureExit(funit, funit.getSubprogramSymbol());
}
diff --git a/flang/test/Lower/CUDA/cuda-return01.cuf b/flang/test/Lower/CUDA/cuda-return01.cuf
new file mode 100644
index 00000000000000..c9f9a8b57ef041
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-return01.cuf
@@ -0,0 +1,14 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Check if finalization works with a return statement
+
+program main
+ integer, device :: a(10)
+ return
+end
+
+! CHECK: func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare
+! CHECK-NEXT: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK-NEXT: return
+! CHECK-NEXT: }
diff --git a/flang/test/Lower/CUDA/cuda-return02.cuf b/flang/test/Lower/CUDA/cuda-return02.cuf
new file mode 100644
index 00000000000000..5d01f0a24b420b
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-return02.cuf
@@ -0,0 +1,48 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Check if finalization works with multiple return statements
+
+program test
+ integer, device :: a(10)
+ logical :: l
+
+ if (l) then
+ return
+ end if
+
+ return
+end
+
+! CHECK: func.func @_QQmain() attributes {fir.bindc_name = "test"} {
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare
+! CHECK: cf.cond_br %{{.*}}, ^bb1, ^bb2
+! CHECK-NEXT: ^bb1:
+! CHECK-NEXT: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK-NEXT: return
+! CHECK-NEXT: ^bb2:
+! CHECK-NEXT: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK-NEXT: return
+! CHECK-NEXT: }
+
+subroutine sub(l)
+ integer, device :: a(10)
+ logical :: l
+
+ if (l) then
+ l = .false.
+ return
+ end if
+
+ return
+end
+
+! CHECK: func.func @_QPsub(%arg0: !fir.ref<!fir.logical<4>> {fir.bindc_name = "l"}) {
+! CHECK: %[[DECL:.*]]:2 = hlfir.declare
+! CHECK: cf.cond_br %6, ^bb1, ^bb2
+! CHECK: ^bb1:
+! CHECK: cf.br ^bb3
+! CHECK: ^bb2:
+! CHECK: cf.br ^bb3
+! CHECK: ^bb3:
+! CHECK: cuf.free %[[DECL]]#1 : !fir.ref<!fir.array<10xi32>>
+! CHECK: }
More information about the flang-commits
mailing list