[flang-commits] [flang] [flang][stack-arrays] Extend pass to work on declare ops and within omp regions (PR #98810)
Kareem Ergawy via flang-commits
flang-commits at lists.llvm.org
Tue Jul 16 05:15:57 PDT 2024
https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/98810
>From 64ee7b34abbd6c745f1f65878fa149c3f48f3cb1 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 12 Jul 2024 03:41:23 -0500
Subject: [PATCH 1/2] [flang][stack-arrays] Extend pass to work on declare ops
and within omp regions
Extends the stack-arrays pass to support `fir.declare` ops. Before that,
we did not recognize malloc-free pairs for which `fir.declare` is used
to declare the allocated entity. This is because the `free` op was
invoked on the result of the `fir.declare` op and did not directly use
the allocated memory SSA value.
This also extends the pass to collect the analysis results within OpenMP
regions.
---
.../lib/Optimizer/Transforms/StackArrays.cpp | 23 +++++++-
flang/test/Transforms/stack-arrays-hlfir.f90 | 55 +++++++++++++++++++
flang/test/Transforms/stack-arrays.fir | 7 +--
3 files changed, 78 insertions(+), 7 deletions(-)
create mode 100644 flang/test/Transforms/stack-arrays-hlfir.f90
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index e8fa70ebc39d8..f695a7fa2f8fc 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -287,7 +287,7 @@ mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
void LatticePoint::print(llvm::raw_ostream &os) const {
for (const auto &[value, state] : stateMap) {
- os << value << ": ";
+ os << "\n * " << value << ": ";
::print(os, state);
}
}
@@ -361,6 +361,13 @@ void AllocationAnalysis::visitOperation(mlir::Operation *op,
} else if (mlir::isa<fir::FreeMemOp>(op)) {
assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
mlir::Value operand = op->getOperand(0);
+
+ // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir
+ // to fir. Therefore, we only need to handle `fir::DeclareOp`s.
+ if (auto declareOp =
+ llvm::dyn_cast_if_present<fir::DeclareOp>(operand.getDefiningOp()))
+ operand = declareOp.getMemref();
+
std::optional<AllocationState> operandState = before.get(operand);
if (operandState && *operandState == AllocationState::Allocated) {
// don't tag things not allocated in this function as freed, so that we
@@ -452,6 +459,9 @@ StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
};
func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); });
func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); });
+ func->walk(
+ [&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); });
+
llvm::DenseSet<mlir::Value> freedValues;
point.appendFreedValues(freedValues);
@@ -518,9 +528,18 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
// remove freemem operations
llvm::SmallVector<mlir::Operation *> erases;
- for (mlir::Operation *user : allocmem.getOperation()->getUsers())
+ for (mlir::Operation *user : allocmem.getOperation()->getUsers()) {
+ if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+ for (mlir::Operation *user : declareOp->getUsers()) {
+ if (mlir::isa<fir::FreeMemOp>(user))
+ erases.push_back(user);
+ }
+ }
+
if (mlir::isa<fir::FreeMemOp>(user))
erases.push_back(user);
+ }
+
// now we are done iterating the users, it is safe to mutate them
for (mlir::Operation *erase : erases)
rewriter.eraseOp(erase);
diff --git a/flang/test/Transforms/stack-arrays-hlfir.f90 b/flang/test/Transforms/stack-arrays-hlfir.f90
new file mode 100644
index 0000000000000..50261b3078466
--- /dev/null
+++ b/flang/test/Transforms/stack-arrays-hlfir.f90
@@ -0,0 +1,55 @@
+! Similar to stack-arrays.f90; i.e. both test the stack-arrays pass for different
+! kinds of supported inputs. This one differs in that it takes the hlfir lowering
+! path in flag rather than the fir one. For example, temp arrays are lowered
+! differently in hlfir vs. fir and the IR that reaches the stack arrays pass looks
+! quite different.
+
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - \
+! RUN: | fir-opt --lower-hlfir-ordered-assignments \
+! RUN: --bufferize-hlfir \
+! RUN: --convert-hlfir-to-fir \
+! RUN: --array-value-copy \
+! RUN: --stack-arrays \
+! RUN: | FileCheck %s
+
+subroutine temp_array
+ implicit none
+ integer (8) :: lV
+ integer (8), dimension (2) :: iaVS
+
+ lV = 202
+
+ iaVS = [lV, lV]
+end subroutine temp_array
+! CHECK-LABEL: func.func @_QPtemp_array{{.*}} {
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: fir.alloca !fir.array<2xi64>
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: return
+! CHECK-NEXT: }
+
+subroutine omp_temp_array
+ implicit none
+ integer (8) :: lV
+ integer (8), dimension (2) :: iaVS
+
+ lV = 202
+
+ !$omp target
+ iaVS = [lV, lV]
+ !$omp end target
+end subroutine omp_temp_array
+! CHECK-LABEL: func.func @_QPomp_temp_array{{.*}} {
+! CHECK: omp.target {{.*}} {
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: fir.alloca !fir.array<2xi64>
+! CHECK-NOT: fir.allocmem
+! CHECK-NOT: fir.freemem
+! CHECK: omp.terminator
+! CHECK-NEXT: }
+! CHECK: return
+! CHECK-NEXT: }
diff --git a/flang/test/Transforms/stack-arrays.fir b/flang/test/Transforms/stack-arrays.fir
index a2ffe555091eb..841fea56c35d1 100644
--- a/flang/test/Transforms/stack-arrays.fir
+++ b/flang/test/Transforms/stack-arrays.fir
@@ -339,13 +339,10 @@ func.func @omp_placement1() {
return
}
// CHECK: func.func @omp_placement1() {
+// CHECK-NEXT: %[[MEM:.*]] = fir.alloca !fir.array<42xi32>
+// CHECK-NEXT: %[[MEM_CONV:.*]] = fir.convert %[[MEM]] : (!fir.ref<!fir.array<42xi32>>) -> !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: omp.sections {
// CHECK-NEXT: omp.section {
-// CHECK-NEXT: %[[MEM:.*]] = fir.allocmem !fir.array<42xi32>
-// TODO: this allocation should be moved to the stack. Unfortunately, the data
-// flow analysis fails to propogate the lattice out of the omp region to the
-// return satement.
-// CHECK-NEXT: fir.freemem %[[MEM]] : !fir.heap<!fir.array<42xi32>>
// CHECK-NEXT: omp.terminator
// CHECK-NEXT: }
// CHECK-NEXT: omp.terminator
>From 9d642d8476ac9a3f257408c5e79b37fda02a681a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 16 Jul 2024 07:14:54 -0500
Subject: [PATCH 2/2] further detections of declare ops
---
flang/lib/Optimizer/Transforms/StackArrays.cpp | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index f695a7fa2f8fc..5556be85b49a5 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -652,9 +652,19 @@ AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) {
// find freemem ops
llvm::SmallVector<mlir::Operation *, 1> freeOps;
- for (mlir::Operation *user : oldAllocOp->getUsers())
- if (mlir::isa<fir::FreeMemOp>(user))
- freeOps.push_back(user);
+
+ for (mlir::Operation *user : oldAllocOp->getUsers()) {
+ if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+ for (mlir::Operation *user : declareOp->getUsers()) {
+ if (mlir::isa<fir::FreeMemOp>(user))
+ freeOps.push_back(user);
+ }
+ }
+
+ if (mlir::isa<fir::FreeMemOp>(user))
+ freeOps.push_back(user);
+ }
+
assert(freeOps.size() && "DFA should only return freed memory");
// Don't attempt to reason about a stacksave/stackrestore between different
More information about the flang-commits
mailing list