[flang-commits] [flang] [flang] Recognize generic allocations in Flang LICM. (PR #191923)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Mon Apr 13 18:51:19 PDT 2026
https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/191923
Instead of matching particular operations like `fir.alloca`
we can use `MemoryEffectOpInterface` to figure out if a location
is a new allocation.
>From de1c06e69427ead3cb8c2bc4250b2f0d6d29efb5 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Mon, 13 Apr 2026 18:35:19 -0700
Subject: [PATCH] [flang] Recognize generic allocations in Flang LICM.
Instead of matching particular operations like `fir.alloca`
we can use `MemoryEffectOpInterface` to figure out if a location
is a new allocation.
---
flang/include/flang/Optimizer/Support/Utils.h | 7 +++
.../lib/Optimizer/Analysis/AliasAnalysis.cpp | 38 +++++-------
flang/lib/Optimizer/Support/Utils.cpp | 17 ++++++
.../Transforms/LoopInvariantCodeMotion.cpp | 21 ++++---
.../Transforms/licm-allocmem-cufaloc.mlir | 59 +++++++++++++++++++
5 files changed, 110 insertions(+), 32 deletions(-)
create mode 100644 flang/test/Transforms/licm-allocmem-cufaloc.mlir
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 63d9dbdecb18b..59f38b17e90d2 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -237,6 +237,13 @@ mlir::Value integerCast(const fir::LLVMTypeConverter &converter,
mlir::Location loc,
mlir::ConversionPatternRewriter &rewriter,
mlir::Type ty, mlir::Value val, bool fold = false);
+
+/// Check if the given operation result is a new allocation
+/// as specified by the MemoryEffects of the operation.
+/// The function returns true iff it is a new allocation,
+/// it return false iff it is not a new allocation,
+/// otherwise it returns std::nullopt.
+std::optional<bool> isNewAllocationResult(mlir::OpResult result);
} // namespace fir
#endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index e832bec484786..ea97f2228651a 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -14,6 +14,7 @@
#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Support/InternalNames.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "mlir/Analysis/AliasAnalysis.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/Dialect/OpenACC/OpenACCUtils.h"
@@ -38,23 +39,14 @@ llvm::cl::opt<bool> supportCrayPointers(
llvm::cl::init(false));
// Inspect for value-scoped Allocate effects and determine whether
-// 'candidate' is a new allocation. Returns SourceKind::Allocate if a
+// 'result' is a new allocation. Returns SourceKind::Allocate if a
// MemAlloc effect is attached
static fir::AliasAnalysis::SourceKind
-classifyAllocateFromEffects(mlir::Operation *op, mlir::Value candidate) {
- if (!op)
- return fir::AliasAnalysis::SourceKind::Unknown;
- auto interface = llvm::dyn_cast<mlir::MemoryEffectOpInterface>(op);
- if (!interface)
- return fir::AliasAnalysis::SourceKind::Unknown;
- llvm::SmallVector<mlir::MemoryEffects::EffectInstance, 4> effects;
- interface.getEffects(effects);
- for (mlir::MemoryEffects::EffectInstance &e : effects) {
- if (mlir::isa<mlir::MemoryEffects::Allocate>(e.getEffect()) &&
- e.getValue() && e.getValue() == candidate)
- return fir::AliasAnalysis::SourceKind::Allocate;
- }
- return fir::AliasAnalysis::SourceKind::Unknown;
+classifyAllocateFromEffects(OpResult result) {
+ std::optional<bool> isNewAllocation = fir::isNewAllocationResult(result);
+ return isNewAllocation.value_or(false)
+ ? fir::AliasAnalysis::SourceKind::Allocate
+ : fir::AliasAnalysis::SourceKind::Unknown;
}
//===----------------------------------------------------------------------===//
@@ -828,15 +820,15 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
Source::AccessPath accessPath;
bool accessPathFinalized{false};
while (defOp && !breakFromLoop) {
- // Value-scoped allocation detection via effects.
- if (classifyAllocateFromEffects(defOp, v) == SourceKind::Allocate) {
- type = SourceKind::Allocate;
- break;
- }
// Operations may have multiple results, so we need to analyze
// the result for which the source is queried.
auto opResult = mlir::cast<OpResult>(v);
assert(opResult.getOwner() == defOp && "v must be a result of defOp");
+ // Value-scoped allocation detection via effects.
+ if (classifyAllocateFromEffects(opResult) == SourceKind::Allocate) {
+ type = SourceKind::Allocate;
+ break;
+ }
ty = opResult.getType();
std::optional<AliasAnalysis::Source> accSourceReturn;
llvm::TypeSwitch<Operation *>(defOp)
@@ -932,11 +924,11 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
} else {
auto def = llvm::cast<mlir::Value>(boxSrc.origin.u);
bool classified = false;
- if (auto defDefOp = def.getDefiningOp()) {
- if (classifyAllocateFromEffects(defDefOp, def) ==
+ if (auto defAsOpResult = mlir::dyn_cast<OpResult>(def)) {
+ if (classifyAllocateFromEffects(defAsOpResult) ==
SourceKind::Allocate) {
v = def;
- defOp = defDefOp;
+ defOp = defAsOpResult.getOwner();
type = SourceKind::Allocate;
classified = true;
}
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index 2f33d89564796..507f802f42c22 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -131,3 +131,20 @@ mlir::Value fir::integerCast(const fir::LLVMTypeConverter &converter,
}
return val;
}
+
+std::optional<bool> fir::isNewAllocationResult(mlir::OpResult result) {
+ if (!result)
+ return std::nullopt;
+ auto interface =
+ llvm::dyn_cast<mlir::MemoryEffectOpInterface>(result.getOwner());
+ if (!interface)
+ return std::nullopt;
+ llvm::SmallVector<mlir::MemoryEffects::EffectInstance, 4> effects;
+ interface.getEffects(effects);
+ for (mlir::MemoryEffects::EffectInstance &e : effects) {
+ if (mlir::isa<mlir::MemoryEffects::Allocate>(e.getEffect()) &&
+ e.getValue() && e.getValue() == result)
+ return true;
+ }
+ return false;
+}
diff --git a/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp b/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
index d1c4046f38b19..e52bdf6e94690 100644
--- a/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
+++ b/flang/lib/Optimizer/Transforms/LoopInvariantCodeMotion.cpp
@@ -13,10 +13,12 @@
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROperationMoveOpInterface.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Dialect/FortranVariableInterface.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "flang/Optimizer/Transforms/Passes.h"
#include "mlir/Interfaces/LoopLikeInterface.h"
#include "mlir/Pass/Pass.h"
@@ -104,9 +106,10 @@ static bool isNonOptionalScalar(Value location) {
return false;
}
- // Scalars "defined" by fir.alloca and fir.address_of
- // are present.
- if (isa<fir::AllocaOp, fir::AddrOfOp>(defOp)) {
+ // Scalars "defined" by fir.address_of or that are new
+ // allocations (e.g. fir.alloca, cuf.alloc, etc.) are present.
+ if (isa<fir::AddrOfOp>(defOp) ||
+ fir::isNewAllocationResult(cast<OpResult>(location)).value_or(false)) {
LDBG() << "Success: is non optional scalar";
return true;
}
@@ -141,13 +144,13 @@ static bool isNonOptionalScalar(Value location) {
// TODO: we can probably use FIR AliasAnalysis' getSource()
// method to identify the storage in more cases.
- Value memref = llvm::TypeSwitch<Operation *, Value>(defOp)
- .Case<fir::DeclareOp, hlfir::DeclareOp>(
- [](auto op) { return op.getMemref(); })
- .Default([](auto) { return nullptr; });
+ location = llvm::TypeSwitch<Operation *, Value>(defOp)
+ .Case<fir::DeclareOp, hlfir::DeclareOp>(
+ [](auto op) { return op.getMemref(); })
+ .Default([](auto) { return nullptr; });
- if (memref)
- return isNonOptionalScalar(memref);
+ if (location)
+ continue;
LDBG() << "Failure: cannot reason about variable storage";
return false;
diff --git a/flang/test/Transforms/licm-allocmem-cufaloc.mlir b/flang/test/Transforms/licm-allocmem-cufaloc.mlir
new file mode 100644
index 0000000000000..87ed92b98b639
--- /dev/null
+++ b/flang/test/Transforms/licm-allocmem-cufaloc.mlir
@@ -0,0 +1,59 @@
+// RUN: fir-opt -flang-licm --split-input-file %s | FileCheck %s
+
+// Test that a load of a scalar allocated by fir.allocmem
+// is hoisted out of the loop (the allocation proves
+// the variable is always present).
+// CHECK-LABEL: func.func @test_allocmem(
+// CHECK: %[[ALLOCMEM:.*]] = fir.allocmem f32
+// CHECK: %[[DECLARE:.*]] = fir.declare %[[ALLOCMEM]] {uniq_name = "_QFtestEy"}
+// CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]
+// CHECK: fir.do_loop
+// CHECK-NOT: fir.load
+func.func @test_allocmem(%arg0: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
+ %c1 = arith.constant 1 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.allocmem f32 {uniq_name = "_QFtestEy.alloc"}
+ %2 = fir.declare %1 {uniq_name = "_QFtestEy"} : (!fir.heap<f32>) -> !fir.heap<f32>
+ %3 = fir.declare %arg1 dummy_scope %0 arg 2 {uniq_name = "_QFtestEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ %4 = fir.load %3 : !fir.ref<i32>
+ %n = fir.convert %4 : (i32) -> index
+ %5 = fir.shape %n : (index) -> !fir.shape<1>
+ %6 = fir.declare %arg0(%5) dummy_scope %0 arg 1 {uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+ fir.do_loop %arg2 = %c1 to %n step %c1 {
+ %7 = fir.load %2 : !fir.heap<f32>
+ %8 = fir.array_coor %6(%5) %arg2 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+ fir.store %7 to %8 : !fir.ref<f32>
+ }
+ fir.freemem %2 : !fir.heap<f32>
+ return
+}
+
+// -----
+
+// Test that a load of a scalar allocated by cuf.alloc
+// is hoisted out of the loop (the allocation proves
+// the variable is always present).
+// CHECK-LABEL: func.func @test_cuf_alloc(
+// CHECK: %[[ALLOC:.*]] = cuf.alloc f32
+// CHECK: %[[DECLARE:.*]] = fir.declare %[[ALLOC]]
+// CHECK: %[[LOAD:.*]] = fir.load %[[DECLARE]]
+// CHECK: fir.do_loop
+// CHECK-NOT: fir.load
+func.func @test_cuf_alloc(%arg0: !fir.ref<!fir.array<?xf32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "x"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n"}) {
+ %c1 = arith.constant 1 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = cuf.alloc f32 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestEy"} -> !fir.ref<f32>
+ %2 = fir.declare %1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestEy"} : (!fir.ref<f32>) -> !fir.ref<f32>
+ %3 = fir.declare %arg1 dummy_scope %0 arg 2 {uniq_name = "_QFtestEn"} : (!fir.ref<i32>, !fir.dscope) -> !fir.ref<i32>
+ %4 = fir.load %3 : !fir.ref<i32>
+ %n = fir.convert %4 : (i32) -> index
+ %5 = fir.shape %n : (index) -> !fir.shape<1>
+ %6 = fir.declare %arg0(%5) dummy_scope %0 arg 1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> !fir.ref<!fir.array<?xf32>>
+ fir.do_loop %arg2 = %c1 to %n step %c1 {
+ %7 = fir.load %2 : !fir.ref<f32>
+ %8 = fir.array_coor %6(%5) %arg2 : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+ fir.store %7 to %8 : !fir.ref<f32>
+ }
+ cuf.free %1 : !fir.ref<f32> {data_attr = #cuf.cuda<device>}
+ return
+}
More information about the flang-commits
mailing list