[flang-commits] [flang] [flang][stack-arrays] Extend pass to work on declare ops and within omp regions (PR #98810)

Tue Jul 16 05:15:57 PDT 2024

https://github.com/ergawy updated https://github.com/llvm/llvm-project/pull/98810

>From 64ee7b34abbd6c745f1f65878fa149c3f48f3cb1 Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Fri, 12 Jul 2024 03:41:23 -0500
Subject: [PATCH 1/2] [flang][stack-arrays] Extend pass to work on declare ops
 and within omp regions

Extends the stack-arrays pass to support `fir.declare` ops. Before that,
we did not recognize malloc-free pairs for which `fir.declare` is used
to declare the allocated entity. This is because the `free` op was
invoked on the result of the `fir.declare` op and did not directly use
the allocated memory SSA value.

This also extends the pass to collect the analysis results within OpenMP
regions.
---
 .../lib/Optimizer/Transforms/StackArrays.cpp  | 23 +++++++-
 flang/test/Transforms/stack-arrays-hlfir.f90  | 55 +++++++++++++++++++
 flang/test/Transforms/stack-arrays.fir        |  7 +--
 3 files changed, 78 insertions(+), 7 deletions(-)
 create mode 100644 flang/test/Transforms/stack-arrays-hlfir.f90

diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index e8fa70ebc39d8..f695a7fa2f8fc 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -287,7 +287,7 @@ mlir::ChangeResult LatticePoint::join(const AbstractDenseLattice &lattice) {
 
 void LatticePoint::print(llvm::raw_ostream &os) const {
   for (const auto &[value, state] : stateMap) {
-    os << value << ": ";
+    os << "\n * " << value << ": ";
     ::print(os, state);
   }
 }
@@ -361,6 +361,13 @@ void AllocationAnalysis::visitOperation(mlir::Operation *op,
   } else if (mlir::isa<fir::FreeMemOp>(op)) {
     assert(op->getNumOperands() == 1 && "fir.freemem has one operand");
     mlir::Value operand = op->getOperand(0);
+
+    // Note: StackArrays is scheduled in the pass pipeline after lowering hlfir
+    // to fir. Therefore, we only need to handle `fir::DeclareOp`s.
+    if (auto declareOp =
+            llvm::dyn_cast_if_present<fir::DeclareOp>(operand.getDefiningOp()))
+      operand = declareOp.getMemref();
+
     std::optional<AllocationState> operandState = before.get(operand);
     if (operandState && *operandState == AllocationState::Allocated) {
       // don't tag things not allocated in this function as freed, so that we
@@ -452,6 +459,9 @@ StackArraysAnalysisWrapper::analyseFunction(mlir::Operation *func) {
   };
   func->walk([&](mlir::func::ReturnOp child) { joinOperationLattice(child); });
   func->walk([&](fir::UnreachableOp child) { joinOperationLattice(child); });
+  func->walk(
+      [&](mlir::omp::TerminatorOp child) { joinOperationLattice(child); });
+
   llvm::DenseSet<mlir::Value> freedValues;
   point.appendFreedValues(freedValues);
 
@@ -518,9 +528,18 @@ AllocMemConversion::matchAndRewrite(fir::AllocMemOp allocmem,
 
   // remove freemem operations
   llvm::SmallVector<mlir::Operation *> erases;
-  for (mlir::Operation *user : allocmem.getOperation()->getUsers())
+  for (mlir::Operation *user : allocmem.getOperation()->getUsers()) {
+    if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+      for (mlir::Operation *user : declareOp->getUsers()) {
+        if (mlir::isa<fir::FreeMemOp>(user))
+          erases.push_back(user);
+      }
+    }
+
     if (mlir::isa<fir::FreeMemOp>(user))
       erases.push_back(user);
+  }
+
   // now we are done iterating the users, it is safe to mutate them
   for (mlir::Operation *erase : erases)
     rewriter.eraseOp(erase);
diff --git a/flang/test/Transforms/stack-arrays-hlfir.f90 b/flang/test/Transforms/stack-arrays-hlfir.f90
new file mode 100644
index 0000000000000..50261b3078466
--- /dev/null
+++ b/flang/test/Transforms/stack-arrays-hlfir.f90
@@ -0,0 +1,55 @@
+! Similar to stack-arrays.f90; i.e. both test the stack-arrays pass for different
+! kinds of supported inputs. This one differs in that it takes the hlfir lowering
+! path in flag rather than the fir one. For example, temp arrays are lowered
+! differently in hlfir vs. fir and the IR that reaches the stack arrays pass looks
+! quite different.
+
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - \
+! RUN: | fir-opt --lower-hlfir-ordered-assignments \
+! RUN:           --bufferize-hlfir \
+! RUN:           --convert-hlfir-to-fir \
+! RUN:           --array-value-copy \
+! RUN:           --stack-arrays \
+! RUN: | FileCheck %s
+
+subroutine temp_array
+  implicit none
+  integer (8) :: lV
+  integer (8), dimension (2) :: iaVS
+
+  lV = 202
+
+  iaVS = [lV, lV]
+end subroutine temp_array
+! CHECK-LABEL: func.func @_QPtemp_array{{.*}} {
+! CHECK-NOT:     fir.allocmem
+! CHECK-NOT:     fir.freemem
+! CHECK:         fir.alloca !fir.array<2xi64>
+! CHECK-NOT:     fir.allocmem
+! CHECK-NOT:     fir.freemem
+! CHECK:         return
+! CHECK-NEXT:  }
+
+subroutine omp_temp_array
+  implicit none
+  integer (8) :: lV
+  integer (8), dimension (2) :: iaVS
+
+  lV = 202
+
+  !$omp target
+    iaVS = [lV, lV]
+  !$omp end target
+end subroutine omp_temp_array
+! CHECK-LABEL: func.func @_QPomp_temp_array{{.*}} {
+! CHECK:         omp.target {{.*}} {
+! CHECK-NOT:       fir.allocmem
+! CHECK-NOT:       fir.freemem
+! CHECK:           fir.alloca !fir.array<2xi64>
+! CHECK-NOT:       fir.allocmem
+! CHECK-NOT:       fir.freemem
+! CHECK:           omp.terminator
+! CHECK-NEXT:    }
+! CHECK:         return
+! CHECK-NEXT:  }
diff --git a/flang/test/Transforms/stack-arrays.fir b/flang/test/Transforms/stack-arrays.fir
index a2ffe555091eb..841fea56c35d1 100644
--- a/flang/test/Transforms/stack-arrays.fir
+++ b/flang/test/Transforms/stack-arrays.fir
@@ -339,13 +339,10 @@ func.func @omp_placement1() {
   return
 }
 // CHECK:      func.func @omp_placement1() {
+// CHECK-NEXT:   %[[MEM:.*]] = fir.alloca !fir.array<42xi32>
+// CHECK-NEXT:   %[[MEM_CONV:.*]] = fir.convert %[[MEM]] : (!fir.ref<!fir.array<42xi32>>) -> !fir.heap<!fir.array<42xi32>>
 // CHECK-NEXT:   omp.sections {
 // CHECK-NEXT:     omp.section {
-// CHECK-NEXT:       %[[MEM:.*]] = fir.allocmem !fir.array<42xi32>
-// TODO: this allocation should be moved to the stack. Unfortunately, the data
-// flow analysis fails to propogate the lattice out of the omp region to the
-// return satement.
-// CHECK-NEXT:       fir.freemem %[[MEM]] : !fir.heap<!fir.array<42xi32>>
 // CHECK-NEXT:       omp.terminator
 // CHECK-NEXT:     }
 // CHECK-NEXT:     omp.terminator

>From 9d642d8476ac9a3f257408c5e79b37fda02a681a Mon Sep 17 00:00:00 2001
From: ergawy <kareem.ergawy at amd.com>
Date: Tue, 16 Jul 2024 07:14:54 -0500
Subject: [PATCH 2/2] further detections of declare ops

---
 flang/lib/Optimizer/Transforms/StackArrays.cpp | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/StackArrays.cpp b/flang/lib/Optimizer/Transforms/StackArrays.cpp
index f695a7fa2f8fc..5556be85b49a5 100644
--- a/flang/lib/Optimizer/Transforms/StackArrays.cpp
+++ b/flang/lib/Optimizer/Transforms/StackArrays.cpp
@@ -652,9 +652,19 @@ AllocMemConversion::findAllocaLoopInsertionPoint(fir::AllocMemOp &oldAlloc) {
 
   // find freemem ops
   llvm::SmallVector<mlir::Operation *, 1> freeOps;
-  for (mlir::Operation *user : oldAllocOp->getUsers())
-    if (mlir::isa<fir::FreeMemOp>(user))
-      freeOps.push_back(user);
+
+  for (mlir::Operation *user : oldAllocOp->getUsers()) {
+    if (auto declareOp = mlir::dyn_cast_if_present<fir::DeclareOp>(user)) {
+      for (mlir::Operation *user : declareOp->getUsers()) {
+        if (mlir::isa<fir::FreeMemOp>(user))
+          freeOps.push_back(user);
+      }
+    }
+
+     if (mlir::isa<fir::FreeMemOp>(user))
+       freeOps.push_back(user);
+  }
+
   assert(freeOps.size() && "DFA should only return freed memory");
 
   // Don't attempt to reason about a stacksave/stackrestore between different