[flang-commits] [flang] 464d321 - [flang][stack-arrays] Extend pass to work on declare ops and within omp regions (#98810)
via flang-commits
flang-commits at lists.llvm.org
Wed Jul 17 21:00:40 PDT 2024
Author: Kareem Ergawy
Date: 2024-07-18T06:00:36+02:00
New Revision: 464d321ee8dde1eaf14b5537eaf030e6df513849
URL: https://github.com/llvm/llvm-project/commit/464d321ee8dde1eaf14b5537eaf030e6df513849
DIFF: https://github.com/llvm/llvm-project/commit/464d321ee8dde1eaf14b5537eaf030e6df513849.diff
LOG: [flang][stack-arrays] Extend pass to work on declare ops and within omp regions (#98810)
Extends the stack-arrays pass to support `fir.declare` ops. Before that,
we did not recognize malloc-free pairs for which `fir.declare` is used
to declare the allocated entity. This is because the `free` op was
invoked on the result of the `fir.declare` op and did not directly use
the allocated memory SSA value.
This also extends the pass to collect the analysis results within OpenMP
regions.
Added:
flang/test/Transforms/stack-arrays-hlfir.f90
Modified:
flang/lib/Optimizer/Transforms/StackArrays.cpp
flang/test/Transforms/stack-arrays.fir
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index e8fa70ebc39d8..bdc2d9cd9c6c4 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -287,7 +287,7 @@ mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
void LatticePoint::print(llvm::raw_ostream &os) const {
for (const auto &[value, state] : stateMap) {
- os << value << ": ";
+ os << "\n * " << value << ": ";
::print(os, state);
}
}
@@ -361,6 +361,13 @@ void AllocationAnalysis::visitOperation(mlir::Operation *op,
} else if (mlir::isa<fir::FreeMemOp>(op)) {
assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
mlir::Value operand = op->getOperand(0);
+
+ // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir
+ // to fir. Therefore, we only need to handle `fir::DeclareOp`s.
+ if (auto declareOp =
+ llvm::dyn_cast_if_present<fir::DeclareOp>(operand.getDefiningOp()))
+ operand = declareOp.getMemref();
+
std::optional<AllocationState> operandState = before.get(operand);
if (operandState && *operandState == AllocationState::Allocated) {
// don't tag things not allocated in this function as freed, so that we
@@ -452,6 +459,9 @@ StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
};
func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); });
func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); });
+ func->walk(
+ [&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); });
+
llvm::DenseSet<mlir::Value> freedValues;
point.appendFreedValues(freedValues);
@@ -518,9 +528,18 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
// remove freemem operations
llvm::SmallVector<mlir::Operation *> erases;
- for (mlir::Operation *user : allocmem.getOperation()->getUsers())
+ for (mlir::Operation *user : allocmem.getOperation()->getUsers()) {
+ if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+ for (mlir::Operation *user : declareOp->getUsers()) {
+ if (mlir::isa<fir::FreeMemOp>(user))
+ erases.push_back(user);
+ }
+ }
+
if (mlir::isa<fir::FreeMemOp>(user))
erases.push_back(user);
+ }
+
// now we are done iterating the users, it is safe to mutate them
for (mlir::Operation *erase : erases)
rewriter.eraseOp(erase);
@@ -633,9 +652,19 @@ AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) {
// find freemem ops
llvm::SmallVector<mlir::Operation *, 1> freeOps;
- for (mlir::Operation *user : oldAllocOp->getUsers())
+
+ for (mlir::Operation *user : oldAllocOp->getUsers()) {
+ if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+ for (mlir::Operation *user : declareOp->getUsers()) {
+ if (mlir::isa<fir::FreeMemOp>(user))
+ freeOps.push_back(user);
+ }
+ }
+
if (mlir::isa<fir::FreeMemOp>(user))
freeOps.push_back(user);
+ }
+
assert(freeOps.size() && "DFA should only return freed memory");
// Don't attempt to reason about a stacksave/stackrestore between
diff erent
@@ -717,12 +746,23 @@ void AllocMemConversion::insertStackSaveRestore(
mlir::SymbolRefAttr stackRestoreSym =
builder.getSymbolRefAttr(stackRestoreFn.getName());
+ auto createStackRestoreCall = [&](mlir::Operation *user) {
+ builder.setInsertionPoint(user);
+ builder.create<fir::CallOp>(user->getLoc(),
+ stackRestoreFn.getFunctionType().getResults(),
+ stackRestoreSym, mlir::ValueRange{sp});
+ };
+
for (mlir::Operation *user : oldAlloc->getUsers()) {
+ if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+ for (mlir::Operation *user : declareOp->getUsers()) {
+ if (mlir::isa<fir::FreeMemOp>(user))
+ createStackRestoreCall(user);
+ }
+ }
+
if (mlir::isa<fir::FreeMemOp>(user)) {
- builder.setInsertionPoint(user);
- builder.create<fir::CallOp>(user->getLoc(),
- stackRestoreFn.getFunctionType().getResults(),
- stackRestoreSym, mlir::ValueRange{sp});
+ createStackRestoreCall(user);
}
}
diff --git a/flang/test/Transforms/stack-arrays-hlfir.f90 b/flang/test/Transforms/stack-arrays-hlfir.f90
new file mode 100644
index 0000000000000..50261b3078466
--- /dev/null
+++ b/flang/test/Transforms/stack-arrays-hlfir.f90
@@ -0,0 +1,55 @@
+! Similar to stack-arrays.f90; i.e. both test the stack-arrays pass for
diff erent
+! kinds of supported inputs. This one
diff ers in that it takes the hlfir lowering
+! path in flag rather than the fir one. For example, temp arrays are lowered
+!
diff erently in hlfir vs. fir and the IR that reaches the stack arrays pass looks
+! quite
diff erent.
+
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - \
+! RUN: | fir-opt --lower-hlfir-ordered-assignments \
+! RUN: --bufferize-hlfir \
+! RUN: --convert-hlfir-to-fir \
+! RUN: --array-value-copy \
+! RUN: --stack-arrays \
+! RUN: | FileCheck %s
+
+subroutine temp_array
+ implicit none
+ integer (8) :: lV
+ integer (8), dimension (2) :: iaVS
+
+ lV = 202
+
+ iaVS = [lV, lV]
+end subroutine temp_array
+! CHECK-LABEL: func.func @_QPtemp_array{{.*}} {
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: fir.alloca !fir.array<2xi64>
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: return
+! CHECK-NEXT: }
+
+subroutine omp_temp_array
+ implicit none
+ integer (8) :: lV
+ integer (8), dimension (2) :: iaVS
+
+ lV = 202
+
+ !$omp target
+ iaVS = [lV, lV]
+ !$omp end target
+end subroutine omp_temp_array
+! CHECK-LABEL: func.func @_QPomp_temp_array{{.*}} {
+! CHECK: omp.target {{.*}} {
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: fir.alloca !fir.array<2xi64>
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: omp.terminator
+! CHECK-NEXT: }
+! CHECK: return
+! CHECK-NEXT: }
diff --git a/flang/test/Transforms/stack-arrays.fir b/flang/test/Transforms/stack-arrays.fir
index a2ffe555091eb..45c22c15f7995 100644
--- a/flang/test/Transforms/stack-arrays.fir
+++ b/flang/test/Transforms/stack-arrays.fir
@@ -339,13 +339,10 @@ func.func @omp_placement1() {
return
}
// CHECK: func.func @omp_placement1() {
+// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<42xi32>
+// CHECK-NEXT: %[[MEM_CONV:.*]] = fir.convert %[[MEM]] : (!fir.ref<!fir.array<42xi32>>) -> !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: omp.sections {
// CHECK-NEXT: omp.section {
-// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<42xi32>
-// TODO: this allocation should be moved to the stack. Unfortunately, the data
-// flow analysis fails to propogate the lattice out of the omp region to the
-// return satement.
-// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: omp.terminator
// CHECK-NEXT: }
// CHECK-NEXT: omp.terminator
@@ -369,3 +366,37 @@ func.func @stop_terminator() {
// CHECK-NEXT: %[[NONE:.*]] = fir.call @_FortranAStopStatement(%[[ZERO]], %[[FALSE]], %[[FALSE]]) : (i32, i1, i1) -> none
// CHECK-NEXT: fir.unreachable
// CHECK-NEXT: }
+
+
+// check that stack allocations that use fir.declare which must be placed in loops
+// use stacksave
+func.func @placement_loop_declare() {
+ %c1 = arith.constant 1 : index
+ %c1_i32 = fir.convert %c1 : (index) -> i32
+ %c2 = arith.constant 2 : index
+ %c10 = arith.constant 10 : index
+ %0:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) {
+ %3 = arith.addi %c1, %c2 : index
+ // operand is now available
+ %4 = fir.allocmem !fir.array<?xi32>, %3
+ %5 = fir.declare %4 {uniq_name = "temp"} : (!fir.heap<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+ // ...
+ fir.freemem %5 : !fir.heap<!fir.array<?xi32>>
+ fir.result %3, %c1_i32 : index, i32
+ }
+ return
+}
+// CHECK: func.func @placement_loop_declare() {
+// CHECK-NEXT: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT: %[[C1_I32:.*]] = fir.convert %[[C1]] : (index) -> i32
+// CHECK-NEXT: %[[C2:.*]] = arith.constant 2 : index
+// CHECK-NEXT: %[[C10:.*]] = arith.constant 10 : index
+// CHECK-NEXT: fir.do_loop
+// CHECK-NEXT: %[[SUM:.*]] = arith.addi %[[C1]], %[[C2]] : index
+// CHECK-NEXT: %[[SP:.*]] = fir.call @llvm.stacksave.p0() : () -> !fir.ref<i8>
+// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<?xi32>, %[[SUM]]
+// CHECK: fir.call @llvm.stackrestore.p0(%[[SP]])
+// CHECK-NEXT: fir.result
+// CHECK-NEXT: }
+// CHECK-NEXT: return
+// CHECK-NEXT: }
More information about the flang-commits
mailing list