[flang-commits] [flang] [flang][NFC] Reduce FIR AA overhead for functions with one scope. (PR #204009)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Mon Jun 22 14:05:15 PDT 2026
https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/204009
>From ce1bd7fcd6cf3fb6a0908c8b3e3290b6843d6bc3 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Mon, 15 Jun 2026 15:22:50 -0700
Subject: [PATCH 1/2] [flang][NFC] Reduce FIR AA overhead for functions with
one scope.
Avoid overheads of collectScopedOrigins and getDeclarationScope/DominanceInfo
when the values passed to FIR AA belong to a function with a single
dummy scope.
---
.../flang/Optimizer/Analysis/AliasAnalysis.h | 15 +++++++
.../lib/Optimizer/Analysis/AliasAnalysis.cpp | 40 +++++++++++++++++--
2 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
index fa4a673683df4..832634a708dba 100644
--- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
+++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
@@ -359,6 +359,16 @@ struct AliasAnalysis {
/// POINTER object or a raw fir::PointerType.
static bool isPointerReference(mlir::Type ty);
+ /// Return true if the function containing \p v has more than one
+ /// fir.dummy_scope op (e.g. the function body has been inlined into).
+ /// Scope-aware disambiguation in alias(lhs, rhs) is only meaningful in
+ /// that case; skipping it for functions with just one scope avoids the
+ /// getDeclarationScope/DominanceInfo overhead in getSource.
+ /// Both true and false results are cached in multiScopeCache so the
+ /// function walk is paid at most once per funcOp per AliasAnalysis
+ /// instance.
+ bool functionHasMultipleScopes(mlir::Value v);
+
private:
/// Build an intermediate Source rooted at the declare captured by the
/// snapshot. Reuses getSource(declValue) for the SourceKind / origin
@@ -431,6 +441,11 @@ struct AliasAnalysis {
domInfoCache;
llvm::DenseMap<mlir::Operation *, llvm::SmallVector<mlir::Operation *, 16>>
sortedScopeCache;
+ /// Per-function cache: true iff the function contains more than one
+ /// fir.dummy_scope op (i.e. has been inlined into). Populated by
+ /// functionHasMultipleScopes(); both true and false are cached so that
+ /// repeated queries are O(1) without re-walking the function body.
+ llvm::DenseMap<mlir::Operation *, bool> multiScopeCache;
};
inline bool operator==(const AliasAnalysis::Source::SourceOrigin &lhs,
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 838fcffc5fa66..080f6272ca3df 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -599,9 +599,17 @@ static mlir::Value getZeroOffsetViewRoot(mlir::Value val) {
AliasResult AliasAnalysis::alias(mlir::Value lhs, mlir::Value rhs) {
// A wrapper around alias(Source lhsSrc, Source rhsSrc, mlir::Value lhs,
// mlir::Value rhs) This allows a user to provide Source that may be obtained
- // through other dialects
- auto lhsSrc = getSource(lhs);
- auto rhsSrc = getSource(rhs);
+ // through other dialects.
+ //
+ // Scope-aware refinement is only meaningful after inlining, when the
+ // function contains more than one fir.dummy_scope op. Skip
+ // collectScopedOrigins and the scope-pair loop for non-inlined functions
+ // to avoid the per-query getDeclarationScope/DominanceInfo overhead.
+ bool multiScopes = functionHasMultipleScopes(lhs);
+ auto lhsSrc =
+ getSource(lhs, /*getLastInstantiationPoint=*/false, multiScopes);
+ auto rhsSrc =
+ getSource(rhs, /*getLastInstantiationPoint=*/false, multiScopes);
AliasResult result = alias(lhsSrc, rhsSrc, lhs, rhs);
// Scope-aware refinement after inlining: if both walks crossed declares
@@ -615,7 +623,8 @@ AliasResult AliasAnalysis::alias(mlir::Value lhs, mlir::Value rhs) {
// and pointer-dereferenced paths remain correctly reported as MayAlias.
// Short-circuit on NoAlias since any pair that disambiguates is
// decisive.
- if (result == AliasResult::NoAlias || result == AliasResult::MustAlias)
+ if (!multiScopes || result == AliasResult::NoAlias ||
+ result == AliasResult::MustAlias)
return result;
for (const auto &lhsScopedOrigin : lhsSrc.scopedOrigins) {
if (!lhsScopedOrigin.scope)
@@ -1758,4 +1767,27 @@ fir::AliasAnalysis::Source fir::AliasAnalysis::buildSourceAtDeclare(
return source;
}
+bool fir::AliasAnalysis::functionHasMultipleScopes(mlir::Value v) {
+ mlir::func::FuncOp funcOp;
+ if (mlir::Operation *defOp = v.getDefiningOp())
+ funcOp = defOp->getParentOfType<mlir::func::FuncOp>();
+ else if (auto bArg = mlir::dyn_cast<mlir::BlockArgument>(v))
+ if (mlir::Region *region = bArg.getOwner()->getParent())
+ funcOp = region->getParentOfType<mlir::func::FuncOp>();
+ if (!funcOp)
+ return true; // conservative
+ mlir::Operation *funcOpPtr = funcOp.getOperation();
+ auto it = multiScopeCache.find(funcOpPtr);
+ if (it != multiScopeCache.end())
+ return it->second;
+ // Walk counting DummyScopeOps, stop early at 2.
+ unsigned count = 0;
+ funcOp.walk([&](fir::DummyScopeOp) -> mlir::WalkResult {
+ return ++count >= 2 ? mlir::WalkResult::interrupt()
+ : mlir::WalkResult::advance();
+ });
+ // Cache both true and false so subsequent queries are O(1).
+ return multiScopeCache.try_emplace(funcOpPtr, count >= 2).first->second;
+}
+
} // namespace fir
>From 683513abc13a513d9d5676a94f945aa1a71aa19e Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Mon, 22 Jun 2026 13:58:39 -0700
Subject: [PATCH 2/2] Recognize host-assoc for box references + tests fixes.
---
.../lib/Optimizer/Analysis/AliasAnalysis.cpp | 12 ++++++
.../alias-analysis-host-assoc.fir | 9 +++--
.../alias-analysis-scoped-origins.fir | 37 ++++++++++++++-----
3 files changed, 45 insertions(+), 13 deletions(-)
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index 080f6272ca3df..a30d54841dd2e 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -1304,6 +1304,18 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
type = SourceKind::Allocate;
v = def;
defOp = nullptr;
+ } else if (boxSrc.kind == SourceKind::HostAssoc) {
+ // Box loaded from a host-associated descriptor: classify
+ // the dereferenced target as HostAssoc (not Indirect) so
+ // alias() can apply the host-assoc/pointer rules instead
+ // of coarsening to MayAlias. The access path (PointerDeref/
+ // AllocDeref step) and Pointer attribute were already set
+ // above, so the resulting Source matches the one that
+ // buildSourceAtDeclare() rebuilds during scope-aware
+ // refinement.
+ type = SourceKind::HostAssoc;
+ v = def;
+ defOp = nullptr;
} else if (isDummyArgument(def)) {
defOp = nullptr;
v = def;
diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir
index 7f90384ac99c5..d570d039432bd 100644
--- a/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir
+++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir
@@ -184,9 +184,12 @@ func.func @_QFtest5Pinner(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name =
// end subroutine inner
// end subroutine test6
-// F18 15.5.2.13 (4):
-// FIXME: 'x' is classified as Indirect access leading to a conservative reply:
-// CHECK: test6_y(1)#0 <-> test6_x(1)#0: MayAlias
+// F18 15.5.2.13 (4): 'x' is a host-associated POINTER and 'y' is a non-TARGET
+// dummy, so 'x' cannot be associated with 'y' and they do not alias. The
+// host-associated pointer descriptor load is now classified as HostAssoc
+// rather than Indirect, so this no longer falls back to a conservative
+// MayAlias.
+// CHECK: test6_y(1)#0 <-> test6_x(1)#0: NoAlias
func.func @_QFtest6Pinner(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "y"}, %arg1: !fir.ref<tuple<!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>>> {fir.host_assoc}) attributes {fir.internal_proc} {
%c0_i32 = arith.constant 0 : i32
%0 = fir.coordinate_of %arg1, %c0_i32 : (!fir.ref<tuple<!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>>>, i32) -> !fir.llvm_ptr<!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>>
diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir
index 6291ba86caccc..228c720d0450a 100644
--- a/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir
+++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir
@@ -131,11 +131,17 @@ func.func @_QPtest_two_dummies_fir(
// SourceKind::Unknown and the underlying alias() reports MayAlias
// ("indirect access"). The ONLY way to disambiguate is via the
// ScopedOrigin snapshots taken at the inner-frame declares (which share
-// one fir.dummy_scope). This is a regression for buildSourceAtDeclare:
-// it must classify each rebuilt Source AT the captured declare's own
-// scope (getLastInstantiationPoint=true, yielding SourceKind::Argument)
-// rather than walking past the declare back into the fir.if and
-// collapsing to Unknown.
+// one fir.dummy_scope).
+//
+// This models REAL two-level inlining: the caller is itself a procedure
+// with its own dummy arguments, so it has its own fir.dummy_scope
+// (%outer) in addition to the inlined callee's (%inner). With two
+// dummy_scope ops, functionHasMultipleScopes() is true and the
+// scope-aware refinement is enabled. buildSourceAtDeclare must classify
+// each rebuilt Source AT the captured declare's own scope
+// (getLastInstantiationPoint=true, yielding SourceKind::Argument) rather
+// than walking past the declare back into the fir.if and collapsing to
+// Unknown.
//
// CHECK-LABEL: Testing : "_QPtest_nested_inline_region_branch"
// CHECK-DAG: field#0 <-> value#0: NoAlias
@@ -143,17 +149,28 @@ func.func @_QPtest_nested_inline_region_branch(
%arg0: !fir.ref<f32> {fir.bindc_name = "field"},
%arg1: !fir.ref<f32> {fir.bindc_name = "value", fir.optional},
%cond: i1) {
- // field: contiguity copy-in select -> original actual or a local temp.
+ // Outer (caller) frame: the caller has its own dummy arguments, hence
+ // its own fir.dummy_scope.
+ %outer = fir.dummy_scope : !fir.dscope
+ %f_outer = fir.declare %arg0 dummy_scope %outer
+ {fortran_attrs = #fir.var_attrs<intent_inout>,
+ uniq_name = "_QFcallerEfield"}
+ : (!fir.ref<f32>, !fir.dscope) -> !fir.ref<f32>
+ %v_outer = fir.declare %arg1 dummy_scope %outer
+ {fortran_attrs = #fir.var_attrs<intent_in, optional>,
+ uniq_name = "_QFcallerEvalue"}
+ : (!fir.ref<f32>, !fir.dscope) -> !fir.ref<f32>
+ // field: contiguity copy-in select -> caller actual or a local temp.
%ftmp = fir.alloca f32
%f_sel = fir.if %cond -> (!fir.ref<f32>) {
- fir.result %arg0 : !fir.ref<f32>
+ fir.result %f_outer : !fir.ref<f32>
} else {
fir.result %ftmp : !fir.ref<f32>
}
- // value: OPTIONAL presence select -> actual or absent.
- %present = fir.is_present %arg1 : (!fir.ref<f32>) -> i1
+ // value: OPTIONAL presence select -> caller actual or absent.
+ %present = fir.is_present %v_outer : (!fir.ref<f32>) -> i1
%v_sel = fir.if %present -> (!fir.ref<f32>) {
- fir.result %arg1 : !fir.ref<f32>
+ fir.result %v_outer : !fir.ref<f32>
} else {
%absent = fir.absent !fir.ref<f32>
fir.result %absent : !fir.ref<f32>
More information about the flang-commits
mailing list