[flang-commits] [flang] [flang][MemoryAllocation] do not assume all blocks have terminators (PR #203902)

via flang-commits flang-commits at lists.llvm.org
Mon Jun 15 06:21:30 PDT 2026


llvmorg-github-actions[bot] wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-fir-hlfir

Author: jeanPerier

<details>
<summary>Changes</summary>

Update MemoryAllocation to cope with blocks without terminators.
`getTerminator` cannot be called when a block has no terminator and must be guarded by `mightHaveTerminator`.
This case was hit for instance for alloca inside fir.do_concurrent.loop.

Add handling for single block regions with no terminators (which is the case of `fir.do_concurrent.loop` and most regions without terminators). The deallocation point can simply be placed at the end of the block in such cases. For regions with several blocks and no terminators, the pass will leave the alloca (no known operation used in flang with such behavior).

Also add `AutomaticAllocationScope` to the `fir.do_concurrent.loop` since each iteration is independent and owns its allocas (otherwise the pass would create allocmem outside of the loops).



---
Full diff: https://github.com/llvm/llvm-project/pull/203902.diff


3 Files Affected:

- (modified) flang/include/flang/Optimizer/Dialect/FIROps.td (+2-1) 
- (modified) flang/lib/Optimizer/Transforms/MemoryUtils.cpp (+71-46) 
- (added) flang/test/Fir/memory-allocation-opt-do-concurrent.fir (+111) 


``````````diff
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index b6eef7dd6233f..35a87c29d0cb6 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -4289,7 +4289,8 @@ def fir_ReduceSpecifier {
 def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
     [AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
                                                          ["getLoopInductionVars"]>,
-     Terminator, NoTerminator, SingleBlock, ParentOneOf<["DoConcurrentOp"]>]> {
+     Terminator, NoTerminator, SingleBlock, AutomaticAllocationScope,
+     ParentOneOf<["DoConcurrentOp"]>]> {
   let summary = "do concurrent loop";
 
   let description = [{
diff --git a/flang/lib/Optimizer/Transforms/MemoryUtils.cpp b/flang/lib/Optimizer/Transforms/MemoryUtils.cpp
index 789111cd35f67..684007221f0c8 100644
--- a/flang/lib/Optimizer/Transforms/MemoryUtils.cpp
+++ b/flang/lib/Optimizer/Transforms/MemoryUtils.cpp
@@ -10,10 +10,26 @@
 #include "flang/Optimizer/Builder/FIRBuilder.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Builders.h"
 #include "mlir/IR/Dominance.h"
 #include "llvm/ADT/STLExtras.h"
 
 namespace {
+using DeallocationInsertionPoint = mlir::OpBuilder::InsertPoint;
+
+static mlir::Location getDeallocationLocation(DeallocationInsertionPoint ip) {
+  if (ip.getPoint() != ip.getBlock()->end())
+    return ip.getPoint()->getLoc();
+  if (!ip.getBlock()->empty())
+    return ip.getBlock()->back().getLoc();
+  return ip.getBlock()->getParentOp()->getLoc();
+}
+
+static void setDeallocationInsertionPoint(mlir::RewriterBase &rewriter,
+                                          DeallocationInsertionPoint ip) {
+  rewriter.setInsertionPoint(ip.getBlock(), ip.getPoint());
+}
+
 /// Helper class to detect if an alloca is inside an mlir::Block that can be
 /// reached again before its deallocation points via block successors. This
 /// analysis is only valid if the deallocation points are inside (or nested
@@ -24,8 +40,9 @@ namespace {
 /// restrictive).
 class BlockCycleDetector {
 public:
-  bool allocaIsInCycle(fir::AllocaOp alloca,
-                       llvm::ArrayRef<mlir::Operation *> deallocationPoints);
+  bool allocaIsInCycle(
+      fir::AllocaOp alloca,
+      llvm::ArrayRef<DeallocationInsertionPoint> deallocationPoints);
 
 private:
   // Cache for blocks owning alloca that have been analyzed. In many Fortran
@@ -46,19 +63,21 @@ class AllocaReplaceImpl {
 private:
   mlir::Region *findDeallocationPointsAndOwner(
       fir::AllocaOp alloca,
-      llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints);
-  bool
-  allocDominatesDealloc(fir::AllocaOp alloca,
-                        llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
-    return llvm::all_of(deallocationPoints, [&](mlir::Operation *deallocPoint) {
-      return this->dominanceInfo.properlyDominates(alloca.getOperation(),
-                                                   deallocPoint);
+      llvm::SmallVectorImpl<DeallocationInsertionPoint> &deallocationPoints);
+  bool allocDominatesDealloc(
+      fir::AllocaOp alloca,
+      llvm::ArrayRef<DeallocationInsertionPoint> deallocationPoints) {
+    return llvm::all_of(deallocationPoints, [&](DeallocationInsertionPoint ip) {
+      if (ip.getPoint() != ip.getBlock()->end())
+        return dominanceInfo.properlyDominates(alloca.getOperation(),
+                                               &*ip.getPoint());
+      return dominanceInfo.dominates(alloca->getBlock(), ip.getBlock());
     });
   }
-  void
-  genIndirectDeallocation(mlir::RewriterBase &, fir::AllocaOp,
-                          llvm::ArrayRef<mlir::Operation *> deallocationPoints,
-                          mlir::Value replacement, mlir::Region &owningRegion);
+  void genIndirectDeallocation(
+      mlir::RewriterBase &, fir::AllocaOp,
+      llvm::ArrayRef<DeallocationInsertionPoint> deallocationPoints,
+      mlir::Value replacement, mlir::Region &owningRegion);
 
 private:
   fir::AllocaRewriterCallBack allocaRewriter;
@@ -68,14 +87,14 @@ class AllocaReplaceImpl {
 };
 } // namespace
 
-static bool
-allocaIsInCycleImpl(mlir::Block *allocaBlock,
-                    llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
+static bool allocaIsInCycleImpl(
+    mlir::Block *allocaBlock,
+    llvm::ArrayRef<DeallocationInsertionPoint> deallocationPoints) {
   llvm::DenseSet<mlir::Block *> seen;
   // Insert the deallocation point blocks as "seen" so that the block
   // traversal will stop at them.
-  for (mlir::Operation *deallocPoint : deallocationPoints)
-    seen.insert(deallocPoint->getBlock());
+  for (DeallocationInsertionPoint ip : deallocationPoints)
+    seen.insert(ip.getBlock());
   if (seen.contains(allocaBlock))
     return false;
   // Traverse the block successor graph starting by the alloca block.
@@ -92,7 +111,7 @@ allocaIsInCycleImpl(mlir::Block *allocaBlock,
 }
 bool BlockCycleDetector::allocaIsInCycle(
     fir::AllocaOp alloca,
-    llvm::ArrayRef<mlir::Operation *> deallocationPoints) {
+    llvm::ArrayRef<DeallocationInsertionPoint> deallocationPoints) {
   mlir::Block *allocaBlock = alloca->getBlock();
   auto analyzedPair = analyzed.try_emplace(allocaBlock, /*isInCycle=*/false);
   bool alreadyAnalyzed = !analyzedPair.second;
@@ -136,7 +155,7 @@ static bool isRegionTerminator(mlir::Operation &terminator) {
 
 mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner(
     fir::AllocaOp alloca,
-    llvm::SmallVectorImpl<mlir::Operation *> &deallocationPoints) {
+    llvm::SmallVectorImpl<DeallocationInsertionPoint> &deallocationPoints) {
   // Step 1: Identify the operation and region owning the alloca.
   mlir::Region *owningRegion = alloca.getOwnerRegion();
   if (!owningRegion)
@@ -148,24 +167,29 @@ mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner(
   // deallocation points.
   bool isOpenACCMPRecipe = mlir::isa<mlir::accomp::RecipeInterface>(owningOp);
   for (mlir::Block &block : owningRegion->getBlocks())
-    if (mlir::Operation *terminator = block.getTerminator();
-        isRegionTerminator(*terminator)) {
-      // FIXME: OpenACC and OpenMP privatization recipe are stand alone
-      // operation meant to be later "inlined", the value they return may
-      // be the address of a local alloca. It would be incorrect to insert
-      // deallocation before the terminator (this would introduce use after
-      // free once the recipe is inlined.
-      // This probably require redesign or special handling on the OpenACC/MP
-      // side.
-      if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator))
-        return nullptr;
-      deallocationPoints.push_back(terminator);
+    if (block.mightHaveTerminator()) {
+      mlir::Operation *terminator = block.getTerminator();
+      if (terminator && isRegionTerminator(*terminator)) {
+        // FIXME: OpenACC and OpenMP privatization recipe are stand alone
+        // operation meant to be later "inlined", the value they return may
+        // be the address of a local alloca. It would be incorrect to insert
+        // deallocation before the terminator (this would introduce use after
+        // free once the recipe is inlined.
+        // This probably require redesign or special handling on the OpenACC/MP
+        // side.
+        if (isOpenACCMPRecipe && terminatorYieldsMemory(*terminator))
+          return nullptr;
+        deallocationPoints.emplace_back(&block, terminator->getIterator());
+      }
     }
-  // If no block terminators without successors have been found, this is
-  // an odd region we cannot reason about (never seen yet in FIR and
-  // mainstream dialects, but MLIR does not really prevent it).
-  if (deallocationPoints.empty())
-    return nullptr;
+  // Regions like fir.do_concurrent.loop have a single block without a
+  // terminator. Deallocate at the end of that block.
+  if (deallocationPoints.empty()) {
+    if (!owningRegion->hasOneBlock())
+      return nullptr;
+    mlir::Block &block = owningRegion->front();
+    deallocationPoints.emplace_back(&block, block.end());
+  }
 
   // Step 3: detect block based loops between the allocation and deallocation
   // points, and add a deallocation point on the back edge to avoid memory
@@ -186,13 +210,13 @@ mlir::Region *AllocaReplaceImpl::findDeallocationPointsAndOwner(
   // false positives.
   if (!allocDominatesDealloc(alloca, deallocationPoints) ||
       blockCycleDetector.allocaIsInCycle(alloca, deallocationPoints))
-    deallocationPoints.push_back(alloca.getOperation());
+    deallocationPoints.emplace_back(alloca->getBlock(), alloca->getIterator());
   return owningRegion;
 }
 
 void AllocaReplaceImpl::genIndirectDeallocation(
     mlir::RewriterBase &rewriter, fir::AllocaOp alloca,
-    llvm::ArrayRef<mlir::Operation *> deallocationPoints,
+    llvm::ArrayRef<DeallocationInsertionPoint> deallocationPoints,
     mlir::Value replacement, mlir::Region &owningRegion) {
   mlir::Location loc = alloca.getLoc();
   auto replacementInsertPoint = rewriter.saveInsertionPoint();
@@ -233,15 +257,15 @@ void AllocaReplaceImpl::genIndirectDeallocation(
     // alloca.
     rewriter.setInsertionPointAfter(ifOp);
   };
-  for (mlir::Operation *deallocPoint : deallocationPoints) {
-    rewriter.setInsertionPoint(deallocPoint);
-    genConditionalDealloc(deallocPoint->getLoc());
+  for (DeallocationInsertionPoint deallocPoint : deallocationPoints) {
+    setDeallocationInsertionPoint(rewriter, deallocPoint);
+    genConditionalDealloc(getDeallocationLocation(deallocPoint));
   }
 }
 
 bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter,
                                 fir::AllocaOp alloca) {
-  llvm::SmallVector<mlir::Operation *> deallocationPoints;
+  llvm::SmallVector<DeallocationInsertionPoint> deallocationPoints;
   mlir::Region *owningRegion =
       findDeallocationPointsAndOwner(alloca, deallocationPoints);
   if (!owningRegion)
@@ -254,9 +278,10 @@ bool AllocaReplaceImpl::replace(mlir::RewriterBase &rewriter,
     mlir::Value castReplacement = fir::factory::createConvert(
         rewriter, alloca.getLoc(), alloca.getType(), replacement);
     if (deallocPointsDominateAlloc)
-      for (mlir::Operation *deallocPoint : deallocationPoints) {
-        rewriter.setInsertionPoint(deallocPoint);
-        deallocGenerator(deallocPoint->getLoc(), rewriter, replacement);
+      for (DeallocationInsertionPoint deallocPoint : deallocationPoints) {
+        setDeallocationInsertionPoint(rewriter, deallocPoint);
+        deallocGenerator(getDeallocationLocation(deallocPoint), rewriter,
+                         replacement);
       }
     else
       genIndirectDeallocation(rewriter, alloca, deallocationPoints, replacement,
diff --git a/flang/test/Fir/memory-allocation-opt-do-concurrent.fir b/flang/test/Fir/memory-allocation-opt-do-concurrent.fir
new file mode 100644
index 0000000000000..586eb8df28684
--- /dev/null
+++ b/flang/test/Fir/memory-allocation-opt-do-concurrent.fir
@@ -0,0 +1,111 @@
+// RUN: fir-opt --memory-allocation-opt="dynamic-array-on-heap=true" %s | FileCheck %s
+
+func.func @test_do_concurrent() {
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  fir.do_concurrent {
+    fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) {
+      %0 = fir.alloca !fir.array<?xf32>, %arg0
+      fir.call @bar(%0) : (!fir.ref<!fir.array<?xf32>>) -> ()
+    }
+  }
+  return
+}
+// CHECK-LABEL: func.func @test_do_concurrent() {
+// CHECK: fir.do_concurrent {
+// CHECK:   fir.do_concurrent.loop
+// CHECK:     %[[MEM:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {bindc_name = "", uniq_name = ""}
+// CHECK:     %[[REF:.*]] = fir.convert %[[MEM]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+// CHECK:     fir.call @bar(%[[REF]])
+// CHECK:     fir.freemem %[[MEM]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:   }
+// CHECK: }
+
+func.func @test_do_concurrent_if(%cond: i1) {
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  fir.do_concurrent {
+    fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) {
+      fir.if %cond {
+        %0 = fir.alloca !fir.array<?xf32>, %arg0
+        fir.call @bar(%0) : (!fir.ref<!fir.array<?xf32>>) -> ()
+      }
+    }
+  }
+  return
+}
+// CHECK-LABEL: func.func @test_do_concurrent_if(
+// CHECK: fir.do_concurrent {
+// CHECK:   fir.do_concurrent.loop
+// CHECK:     %[[PTR:.*]] = fir.alloca !fir.heap<!fir.array<?xf32>>
+// CHECK:     %[[NULL:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+// CHECK:     fir.store %[[NULL]] to %[[PTR]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:     fir.if
+// CHECK:       %[[PREV:.*]] = fir.load %[[PTR]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:       fir.if {{.*}} {
+// CHECK:         fir.freemem %[[PREV]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:       }
+// CHECK:       %[[MEM:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {bindc_name = "", uniq_name = ""}
+// CHECK:       %[[REF:.*]] = fir.convert %[[MEM]] : (!fir.heap<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+// CHECK:       fir.store %[[MEM]] to %[[PTR]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:       fir.call @bar(%[[REF]])
+// CHECK:     %[[LAST:.*]] = fir.load %[[PTR]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:     fir.if {{.*}} {
+// CHECK:       fir.freemem %[[LAST]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:     }
+// CHECK:   }
+// CHECK: }
+
+func.func @test_do_concurrent_if_else(%cond: i1) {
+  %c1 = arith.constant 1 : index
+  %c10 = arith.constant 10 : index
+  fir.do_concurrent {
+    fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) {
+      fir.if %cond {
+        %0 = fir.alloca !fir.array<?xf32>, %arg0
+        fir.call @bar(%0) : (!fir.ref<!fir.array<?xf32>>) -> ()
+      } else {
+        %1 = fir.alloca !fir.array<?xf32>, %arg0
+        fir.call @bar(%1) : (!fir.ref<!fir.array<?xf32>>) -> ()
+      }
+    }
+  }
+  return
+}
+// CHECK-LABEL: func.func @test_do_concurrent_if_else(
+// CHECK: fir.do_concurrent {
+// CHECK:   fir.do_concurrent.loop
+// CHECK:     %[[PTR_ELSE:.*]] = fir.alloca !fir.heap<!fir.array<?xf32>>
+// CHECK:     %[[NULL_ELSE:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+// CHECK:     fir.store %[[NULL_ELSE]] to %[[PTR_ELSE]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:     %[[PTR_THEN:.*]] = fir.alloca !fir.heap<!fir.array<?xf32>>
+// CHECK:     %[[NULL_THEN:.*]] = fir.zero_bits !fir.heap<!fir.array<?xf32>>
+// CHECK:     fir.store %[[NULL_THEN]] to %[[PTR_THEN]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:     fir.if
+// CHECK:       %[[PREV_THEN:.*]] = fir.load %[[PTR_THEN]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:       fir.if {{.*}} {
+// CHECK:         fir.freemem %[[PREV_THEN]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:       }
+// CHECK:       %[[MEM_THEN:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {bindc_name = "", uniq_name = ""}
+// CHECK:       fir.store %[[MEM_THEN]] to %[[PTR_THEN]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:       fir.call @bar(
+// CHECK:     } else {
+// CHECK:       %[[PREV_ELSE:.*]] = fir.load %[[PTR_ELSE]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:       fir.if {{.*}} {
+// CHECK:         fir.freemem %[[PREV_ELSE]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:       }
+// CHECK:       %[[MEM_ELSE:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {bindc_name = "", uniq_name = ""}
+// CHECK:       fir.store %[[MEM_ELSE]] to %[[PTR_ELSE]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:       fir.call @bar(
+// CHECK:     %[[LAST_THEN:.*]] = fir.load %[[PTR_THEN]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:     fir.if {{.*}} {
+// CHECK:       fir.freemem %[[LAST_THEN]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:     }
+// CHECK:     %[[LAST_ELSE:.*]] = fir.load %[[PTR_ELSE]] : !fir.ref<!fir.heap<!fir.array<?xf32>>>
+// CHECK:     fir.if {{.*}} {
+// CHECK:       fir.freemem %[[LAST_ELSE]] : !fir.heap<!fir.array<?xf32>>
+// CHECK:     }
+// CHECK:   }
+// CHECK: }
+
+func.func private @bar(!fir.ref<!fir.array<?xf32>>)

``````````

</details>


https://github.com/llvm/llvm-project/pull/203902


More information about the flang-commits mailing list