[flang-commits] [flang] [flang] Recognize generic allocations in Flang LICM. (PR #191923)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Mon Apr 13 18:51:19 PDT 2026


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/191923

Instead of matching particular operations like `fir.alloca`
we can use `MemoryEffectOpInterface` to figure out if a location
is a new allocation.


>From de1c06e69427ead3cb8c2bc4250b2f0d6d29efb5 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Mon, 13 Apr 2026 18:35:19 -0700
Subject: [PATCH] [flang] Recognize generic allocations in Flang LICM.

Instead of matching particular operations like `fir.alloca`
we can use `MemoryEffectOpInterface` to figure out if a location
is a new allocation.
---
 flang/include/flang/Optimizer/Support/Utils.h |  7 +++
 .../lib/Optimizer/Analysis/AliasAnalysis.cpp  | 38 +++++-------
 flang/lib/Optimizer/Support/Utils.cpp         | 17 ++++++
 .../Transforms/LoopInvariantCodeMotion.cpp    | 21 ++++---
 .../Transforms/licm-allocmem-cufaloc.mlir     | 59 +++++++++++++++++++
 5 files changed, 110 insertions(+), 32 deletions(-)
 create mode 100644 flang/test/Transforms/licm-allocmem-cufaloc.mlir

diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 63d9dbdecb18b..59f38b17e90d2 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -237,6 +237,13 @@ mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
                         mlir::Location loc,
                         mlir::ConversionPatternRewriter &rewriter,
                         mlir::Type ty, mlir::Value val, bool fold = false);
+
+/// Check if the given operation result is a new allocation
+/// as specified by the MemoryEffects of the operation.
+/// The function returns true iff it is a new allocation,
+/// it return false iff it is not a new allocation,
+/// otherwise it returns std::nullopt.
+std::optional<bool> isNewAllocationResult(mlir::OpResult result);
 } // namespace fir
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index e832bec484786..ea97f2228651a 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -14,6 +14,7 @@
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Analysis/AliasAnalysis.h"
 #include "mlir/Dialect/OpenACC/OpenACC.h"
 #include "mlir/Dialect/OpenACC/OpenACCUtils.h"
@@ -38,23 +39,14 @@ llvm::cl::opt<bool> supportCrayPointers(
     llvm::cl::init(false));
 
 // Inspect for value-scoped Allocate effects and determine whether
-// 'candidate' is a new allocation. Returns SourceKind::Allocate if a
+// 'result' is a new allocation. Returns SourceKind::Allocate if a
 // MemAlloc effect is attached
 static fir::AliasAnalysis::SourceKind
-classifyAllocateFromEffects(mlir::Operation *op, mlir::Value candidate) {
-  if (!op)
-    return fir::AliasAnalysis::SourceKind::Unknown;
-  auto interface = llvm::dyn_cast<mlir::MemoryEffectOpInterface>(op);
-  if (!interface)
-    return fir::AliasAnalysis::SourceKind::Unknown;
-  llvm::SmallVector<mlir::MemoryEffects::EffectInstance, 4> effects;
-  interface.getEffects(effects);
-  for (mlir::MemoryEffects::EffectInstance &e : effects) {
-    if (mlir::isa<mlir::MemoryEffects::Allocate>(e.getEffect()) &&
-        e.getValue() && e.getValue() == candidate)
-      return fir::AliasAnalysis::SourceKind::Allocate;
-  }
-  return fir::AliasAnalysis::SourceKind::Unknown;
+classifyAllocateFromEffects(OpResult result) {
+  std::optional<bool> isNewAllocation = fir::isNewAllocationResult(result);
+  return isNewAllocation.value_or(false)
+             ? fir::AliasAnalysis::SourceKind::Allocate
+             : fir::AliasAnalysis::SourceKind::Unknown;
 }
 
 //===----------------------------------------------------------------------===//
@@ -828,15 +820,15 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
   Source::AccessPath accessPath;
   bool accessPathFinalized{false};
   while (defOp && !breakFromLoop) {
-    // Value-scoped allocation detection via effects.
-    if (classifyAllocateFromEffects(defOp, v) == SourceKind::Allocate) {
-      type = SourceKind::Allocate;
-      break;
-    }
     // Operations may have multiple results, so we need to analyze
     // the result for which the source is queried.
     auto opResult = mlir::cast<OpResult>(v);
     assert(opResult.getOwner() == defOp && "v must be a result of defOp");
+    // Value-scoped allocation detection via effects.
+    if (classifyAllocateFromEffects(opResult) == SourceKind::Allocate) {
+      type = SourceKind::Allocate;
+      break;
+    }
     ty = opResult.getType();
     std::optional<AliasAnalysis::Source> accSourceReturn;
     llvm::TypeSwitch<Operation *>(defOp)
@@ -932,11 +924,11 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
             } else {
               auto def = llvm::cast<mlir::Value>(boxSrc.origin.u);
               bool classified = false;
-              if (auto defDefOp = def.getDefiningOp()) {
-                if (classifyAllocateFromEffects(defDefOp, def) ==
+              if (auto defAsOpResult = mlir::dyn_cast<OpResult>(def)) {
+                if (classifyAllocateFromEffects(defAsOpResult) ==
                     SourceKind::Allocate) {
                   v = def;
-                  defOp = defDefOp;
+                  defOp = defAsOpResult.getOwner();
                   type = SourceKind::Allocate;
                   classified = true;
                 }
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index 2f33d89564796..507f802f42c22 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -131,3 +131,20 @@ mlir::Value fir::integerCast(const fir::LLVMTypeConverter &converter,
   }
   return val;
 }
+
+std::optional<bool> fir::isNewAllocationResult(mlir::OpResult result) {
+  if (!result)
+    return std::nullopt;
+  auto interface =
+      llvm::dyn_cast<mlir::MemoryEffectOpInterface>(result.getOwner());
+  if (!interface)
+    return std::nullopt;
+  llvm::SmallVector<mlir::MemoryEffects::EffectInstance, 4> effects;
+  interface.getEffects(effects);
+  for (mlir::MemoryEffects::EffectInstance &e : effects) {
+    if (mlir::isa<mlir::MemoryEffects::Allocate>(e.getEffect()) &&
+        e.getValue() && e.getValue() == result)
+      return true;
+  }
+  return false;
+}
diff --git a/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp b/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
index d1c4046f38b19..e52bdf6e94690 100644
--- a/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
+++ b/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
@@ -13,10 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
 #include "flang/Optimizer/Dialect/FIROperationMoveOpInterface.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Dialect/FortranVariableInterface.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Pass/Pass.h"
@@ -104,9 +106,10 @@ static bool isNonOptionalScalar(Value location) {
       return false;
     }
 
-    // Scalars "defined" by fir.alloca and fir.address_of
-    // are present.
-    if (isa<fir::AllocaOp, fir::AddrOfOp>(defOp)) {
+    // Scalars "defined" by fir.address_of or that are new
+    // allocations (e.g. fir.alloca, cuf.alloc, etc.) are present.
+    if (isa<fir::AddrOfOp>(defOp) ||
+        fir::isNewAllocationResult(cast<OpResult>(location)).value_or(false)) {
       LDBG() << "Success: is non optional scalar";
       return true;
     }
@@ -141,13 +144,13 @@ static bool isNonOptionalScalar(Value location) {
 
       // TODO: we can probably use FIR AliasAnalysis' getSource()
       // method to identify the storage in more cases.
-      Value memref = llvm::TypeSwitch<Operation *, Value>(defOp)
-                         .Case<fir::DeclareOp, hlfir::DeclareOp>(
-                             [](auto op) { return op.getMemref(); })
-                         .Default([](auto) { return nullptr; });
+      location = llvm::TypeSwitch<Operation *, Value>(defOp)
+                     .Case<fir::DeclareOp, hlfir::DeclareOp>(
+                         [](auto op) { return op.getMemref(); })
+                     .Default([](auto) { return nullptr; });
 
-      if (memref)
-        return isNonOptionalScalar(memref);
+      if (location)
+        continue;
 
       LDBG() << "Failure: cannot reason about variable storage";
       return false;
diff --git a/flang/test/Transforms/licm-allocmem-cufaloc.mlir b/flang/test/Transforms/licm-allocmem-cufaloc.mlir
new file mode 100644
index 0000000000000..87ed92b98b639
--- /dev/null
+++ b/flang/test/Transforms/licm-allocmem-cufaloc.mlir
@@ -0,0 +1,59 @@
+// RUN: fir-opt -flang-licm --split-input-file %s | FileCheck %s
+
+// Test that a load of a scalar allocated by fir.allocmem
+// is hoisted out of the loop (the allocation proves
+// the variable is always present).
+// CHECK-LABEL:   func.func @test_allocmem(
+// CHECK:           %[[ALLOCMEM:.*]] = fir.allocmem f32
+// CHECK:           %[[DECLARE:.*]] = fir.declare %[[ALLOCMEM]] {uniq_name = "_QFtestEy"}
+// CHECK:           %[[LOAD:.*]] = fir.load %[[DECLARE]]
+// CHECK:           fir.do_loop
+// CHECK-NOT:         fir.load
+func.func @test_allocmem(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
+  %c1 = arith.constant 1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = fir.allocmem f32 {uniq_name = "_QFtestEy.alloc"}
+  %2 = fir.declare %1 {uniq_name = "_QFtestEy"} : (!fir.heap<f32>) -> !fir.heap<f32>
+  %3 = fir.declare %arg1 dummy_scope %0 arg 2 {uniq_name = "_QFtestEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %4 = fir.load %3 : !fir.ref<i32>
+  %n = fir.convert %4 : (i32) -> index
+  %5 = fir.shape %n : (index) -> !fir.shape<1>
+  %6 = fir.declare %arg0(%5) dummy_scope %0 arg 1 {uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  fir.do_loop %arg2 = %c1 to %n step %c1 {
+    %7 = fir.load %2 : !fir.heap<f32>
+    %8 = fir.array_coor %6(%5) %arg2 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    fir.store %7 to %8 : !fir.ref<f32>
+  }
+  fir.freemem %2 : !fir.heap<f32>
+  return
+}
+
+// -----
+
+// Test that a load of a scalar allocated by cuf.alloc
+// is hoisted out of the loop (the allocation proves
+// the variable is always present).
+// CHECK-LABEL:   func.func @test_cuf_alloc(
+// CHECK:           %[[ALLOC:.*]] = cuf.alloc f32
+// CHECK:           %[[DECLARE:.*]] = fir.declare %[[ALLOC]]
+// CHECK:           %[[LOAD:.*]] = fir.load %[[DECLARE]]
+// CHECK:           fir.do_loop
+// CHECK-NOT:         fir.load
+func.func @test_cuf_alloc(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
+  %c1 = arith.constant 1 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = cuf.alloc f32 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestEy"} -> !fir.ref<f32>
+  %2 = fir.declare %1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestEy"} : (!fir.ref<f32>) -> !fir.ref<f32>
+  %3 = fir.declare %arg1 dummy_scope %0 arg 2 {uniq_name = "_QFtestEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+  %4 = fir.load %3 : !fir.ref<i32>
+  %n = fir.convert %4 : (i32) -> index
+  %5 = fir.shape %n : (index) -> !fir.shape<1>
+  %6 = fir.declare %arg0(%5) dummy_scope %0 arg 1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+  fir.do_loop %arg2 = %c1 to %n step %c1 {
+    %7 = fir.load %2 : !fir.ref<f32>
+    %8 = fir.array_coor %6(%5) %arg2 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    fir.store %7 to %8 : !fir.ref<f32>
+  }
+  cuf.free %1 : !fir.ref<f32> {data_attr = #cuf.cuda<device>}
+  return
+}



More information about the flang-commits mailing list