[flang-commits] [flang] [flang] Support cuf.device_address in FIR AliasAnalysis. (PR #177518)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Fri Jan 23 08:44:08 PST 2026
https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/177518
>From d0f682e6c4271f88df6edec3fec9c2beaa1d8003 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Thu, 22 Jan 2026 19:16:08 -0800
Subject: [PATCH 1/2] [flang] Support cuf.device_address in FIR AliasAnalysis.
Support `cuf.device_address` same way as `fir.address_of`.
This implementation implies that the host address and the device
address `MustAlias` (as shown in the new test). This should be
conservatively correct as long as `MustAlias` does not allow
to assume that the actual addresses are the same (that is what
LLVM documentation implies, I believe).
It is probably worth adding an operation interface to handle
`fir::AddrOfOp` and `cuf::DeviceAddressOp` in FIR AliasAnalysis,
but for the initial implementation I hardcoded the checks.
I also removed the call to `fir::valueHasFirAttribute` that performs
on demand SymbolTable lookups, which may be costly, and added
SymbolTable caching in FIR AliasAnalysis object. Anyway,
`fir::valueHasFirAttribute` does not work for `cuf::DeviceAddressOp`.
---
.../flang/Optimizer/Analysis/AliasAnalysis.h | 25 +++++++++
.../lib/Optimizer/Analysis/AliasAnalysis.cpp | 52 +++++++++++++++----
flang/lib/Optimizer/Analysis/CMakeLists.txt | 2 +
.../AliasAnalysis/load-ptr-alloca.fir | 8 +++
flang/test/Fir/CUDA/cuda-alias-analysis.fir | 50 ++++++++++++++++++
.../Transforms/tbaa-with-dummy-scope2.fir | 2 +
6 files changed, 128 insertions(+), 11 deletions(-)
create mode 100644 flang/test/Fir/CUDA/cuda-alias-analysis.fir
diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
index 455100ff3c003..41f451faea401 100644
--- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
+++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
@@ -12,7 +12,9 @@
#include "flang/Common/enum-class.h"
#include "flang/Common/enum-set.h"
#include "mlir/Analysis/AliasAnalysis.h"
+#include "mlir/IR/SymbolTable.h"
#include "mlir/IR/Value.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerUnion.h"
namespace fir {
@@ -234,6 +236,29 @@ struct AliasAnalysis {
/// Return true, if `ty` is a reference type to an object of derived type
/// that contains a component with POINTER attribute.
static bool isRecordWithPointerComponent(mlir::Type ty);
+
+ /// Return the symbol table nearest to the given operation.
+ /// If a SymbolTable has not been cached in symTabMap,
+ /// it will be created, which may be expensive.
+ const mlir::SymbolTable *getNearestSymbolTable(mlir::Operation *from);
+
+ /// Return true if the given symbol may correspond to a Fortran variable
+ /// with a TARGET attribute. 'from' is used to find the nearest
+ /// SymbolTable (by calling getNearestSymbolTable()).
+ bool symbolMayHaveTargetAttr(mlir::SymbolRefAttr symbol,
+ mlir::Operation *from);
+
+ /// A map between operations with OpTrait::SymbolTable
+ /// and the SymbolTable objects associated with them.
+ /// TODO: it might be better to initialize just a single SymbolTable
+ /// during fir::AliasAnalysis construction, e.g. by giving
+ /// the constructor the operation from which the nearest SymbolTable
+ /// should be looked up. This implies that the users will have to
+ /// specify proper operation (e.g. 'module') so that the discovered
+ /// SymbolTable contains all the symbols that may appear during
+ /// the aliasing queries through the constructed AliasAnalysis
+ /// entity.
+ llvm::DenseMap<mlir::Operation *, mlir::SymbolTable> symTabMap;
};
inline bool operator==(const AliasAnalysis::Source::SourceOrigin &lhs,
diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
index cb6a0828196fb..388fb60fcda35 100644
--- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
+++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Dialect/FIRType.h"
@@ -74,11 +75,20 @@ getAttrsFromVariable(fir::FortranVariableOpInterface var) {
return attrs;
}
-static bool hasGlobalOpTargetAttr(mlir::Value v, fir::AddrOfOp op) {
- auto globalOpName =
- mlir::OperationName(fir::GlobalOp::getOperationName(), op->getContext());
- return fir::valueHasFirAttribute(
- v, fir::GlobalOp::getTargetAttrName(globalOpName));
+bool fir::AliasAnalysis::symbolMayHaveTargetAttr(mlir::SymbolRefAttr symbol,
+ mlir::Operation *from) {
+ assert(from);
+
+ // If we cannot find the nearest SymbolTable assume the worst.
+ const mlir::SymbolTable *symTab = getNearestSymbolTable(from);
+ if (!symTab)
+ return true;
+
+ if (auto globalOp = symTab->lookup<fir::GlobalOp>(symbol.getLeafReference()))
+ return globalOp.getTarget().value_or(false);
+
+ // If the symbol is not defined by fir.global assume the worst.
+ return true;
}
static bool isEvaluateInMemoryBlockArg(mlir::Value v) {
@@ -715,19 +725,27 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
type = SourceKind::Indirect;
breakFromLoop = true;
})
- .Case<fir::AddrOfOp>([&](auto op) {
+ .Case<fir::AddrOfOp, cuf::DeviceAddressOp>([&](auto op) {
// Address of a global scope object.
ty = v.getType();
type = SourceKind::Global;
-
- if (hasGlobalOpTargetAttr(v, op))
- attributes.set(Attribute::Target);
-
// TODO: Take followBoxData into account when setting the pointer
// attribute
if (isPointerReference(ty))
attributes.set(Attribute::Pointer);
- global = llvm::cast<fir::AddrOfOp>(op).getSymbol();
+
+ if constexpr (std::is_same_v<std::decay_t<decltype(op)>,
+ fir::AddrOfOp>)
+ global = op.getSymbol();
+ else if constexpr (std::is_same_v<std::decay_t<decltype(op)>,
+ cuf::DeviceAddressOp>)
+ global = op.getHostSymbol();
+ else
+ llvm_unreachable("unexpected operation");
+
+ if (symbolMayHaveTargetAttr(global, op))
+ attributes.set(Attribute::Target);
+
breakFromLoop = true;
})
.Case<hlfir::DeclareOp, fir::DeclareOp>([&](auto op) {
@@ -898,4 +916,16 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v,
isCapturedInInternalProcedure};
}
+const mlir::SymbolTable *
+fir::AliasAnalysis::getNearestSymbolTable(mlir::Operation *from) {
+ assert(from);
+ Operation *symTabOp = mlir::SymbolTable::getNearestSymbolTable(from);
+ if (!symTabOp)
+ return nullptr;
+ auto it = symTabMap.find(symTabOp);
+ if (it != symTabMap.end())
+ return &it->second;
+ return &symTabMap.try_emplace(symTabOp, symTabOp).first->second;
+}
+
} // namespace fir
diff --git a/flang/lib/Optimizer/Analysis/CMakeLists.txt b/flang/lib/Optimizer/Analysis/CMakeLists.txt
index 4d4ad882c27d3..c890b969bae34 100644
--- a/flang/lib/Optimizer/Analysis/CMakeLists.txt
+++ b/flang/lib/Optimizer/Analysis/CMakeLists.txt
@@ -3,11 +3,13 @@ add_flang_library(FIRAnalysis
TBAAForest.cpp
DEPENDS
+ CUFDialect
FIRDialect
FIRSupport
HLFIRDialect
LINK_LIBS
+ CUFDialect
FIRBuilder
FIRDialect
FIRSupport
diff --git a/flang/test/Analysis/AliasAnalysis/load-ptr-alloca.fir b/flang/test/Analysis/AliasAnalysis/load-ptr-alloca.fir
index 56c5313d397a5..9739566110942 100644
--- a/flang/test/Analysis/AliasAnalysis/load-ptr-alloca.fir
+++ b/flang/test/Analysis/AliasAnalysis/load-ptr-alloca.fir
@@ -410,3 +410,11 @@ func.func @_QMmPtest.fir() {
%27 = fir.box_addr %26 {test.ptr = "t_alloc.tgt.fir"} : (!fir.box<!fir.heap<f32>>) -> !fir.heap<f32>
return
}
+
+fir.global @_QMmEp1 : !fir.box<!fir.ptr<f32>>
+fir.global @_QMmEarr : !fir.array<2xf32>
+fir.global @_QMmEt_arr target : !fir.array<2xf32>
+fir.global @_QMmEalloc : !fir.box<!fir.heap<f32>>
+fir.global @_QMmEt_alloc target : !fir.box<!fir.heap<f32>>
+fir.global @_QMmEt target : f32
+fir.global @_QMmEv : f32
diff --git a/flang/test/Fir/CUDA/cuda-alias-analysis.fir b/flang/test/Fir/CUDA/cuda-alias-analysis.fir
new file mode 100644
index 0000000000000..16fb722e92f8e
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-alias-analysis.fir
@@ -0,0 +1,50 @@
+// RUN: fir-opt %s --split-input-file -o /dev/null --mlir-disable-threading \
+// RUN: -pass-pipeline='builtin.module(func.func(test-fir-alias-analysis))' \
+// RUN: 2>&1 | FileCheck -match-full-lines %s
+
+fir.global @device_global1 {data_attr = #cuf.cuda<device>} : i32
+fir.global @device_global2 {data_attr = #cuf.cuda<device>} : i32
+fir.global @device_target {data_attr = #cuf.cuda<device>, target} : i32
+fir.global @device_pointer {data_attr = #cuf.cuda<device>} : !fir.box<!fir.ptr<i32>>
+
+func.func @test_device_address() {
+ %0 = fir.address_of(@device_global1) {test.ptr = "host_ptr1"} : !fir.ref<i32>
+ %1 = cuf.device_address @device_global1 {test.ptr = "device_ptr1"} -> !fir.ref<i32>
+ %2 = fir.address_of(@device_global2) {test.ptr = "host_ptr2"} : !fir.ref<i32>
+ %3 = cuf.device_address @device_global2 {test.ptr = "device_ptr2"} -> !fir.ref<i32>
+ %4 = fir.address_of(@device_target) {test.ptr = "host_target"} : !fir.ref<i32>
+ %5 = cuf.device_address @device_target {test.ptr = "device_target"} -> !fir.ref<i32>
+ %6 = fir.address_of(@device_pointer) {test.ptr = "host_pointer"} : !fir.ref<!fir.box<!fir.ptr<i32>>>
+ %7 = cuf.device_address @device_pointer {test.ptr = "device_pointer"} -> !fir.ref<!fir.box<!fir.ptr<i32>>>
+ return
+}
+
+// CHECK-LABEL: Testing : "test_device_address"
+// CHECK: host_ptr1#0 <-> device_ptr1#0: MustAlias
+// CHECK: host_ptr1#0 <-> host_ptr2#0: NoAlias
+// CHECK: device_ptr1#0 <-> host_ptr2#0: NoAlias
+// CHECK: host_ptr1#0 <-> device_ptr2#0: NoAlias
+// CHECK: device_ptr1#0 <-> device_ptr2#0: NoAlias
+// CHECK: host_ptr2#0 <-> device_ptr2#0: MustAlias
+// CHECK: host_ptr1#0 <-> host_target#0: NoAlias
+// CHECK: device_ptr1#0 <-> host_target#0: NoAlias
+// CHECK: host_ptr2#0 <-> host_target#0: NoAlias
+// CHECK: device_ptr2#0 <-> host_target#0: NoAlias
+// CHECK: host_ptr1#0 <-> device_target#0: NoAlias
+// CHECK: device_ptr1#0 <-> device_target#0: NoAlias
+// CHECK: host_ptr2#0 <-> device_target#0: NoAlias
+// CHECK: device_ptr2#0 <-> device_target#0: NoAlias
+// CHECK: host_target#0 <-> device_target#0: MustAlias
+// CHECK: host_ptr1#0 <-> host_pointer#0: NoAlias
+// CHECK: device_ptr1#0 <-> host_pointer#0: NoAlias
+// CHECK: host_ptr2#0 <-> host_pointer#0: NoAlias
+// CHECK: device_ptr2#0 <-> host_pointer#0: NoAlias
+// CHECK: host_target#0 <-> host_pointer#0: NoAlias
+// CHECK: device_target#0 <-> host_pointer#0: NoAlias
+// CHECK: host_ptr1#0 <-> device_pointer#0: NoAlias
+// CHECK: device_ptr1#0 <-> device_pointer#0: NoAlias
+// CHECK: host_ptr2#0 <-> device_pointer#0: NoAlias
+// CHECK: device_ptr2#0 <-> device_pointer#0: NoAlias
+// CHECK: host_target#0 <-> device_pointer#0: NoAlias
+// CHECK: device_target#0 <-> device_pointer#0: NoAlias
+// CHECK: host_pointer#0 <-> device_pointer#0: MustAlias
diff --git a/flang/test/Transforms/tbaa-with-dummy-scope2.fir b/flang/test/Transforms/tbaa-with-dummy-scope2.fir
index 6f5ed69fbc9c6..a0718142a7931 100644
--- a/flang/test/Transforms/tbaa-with-dummy-scope2.fir
+++ b/flang/test/Transforms/tbaa-with-dummy-scope2.fir
@@ -41,6 +41,7 @@ func.func @_QPtest1() attributes {noinline} {
fir.store %c2_i32 to %2 : !fir.ref<i32>
return
}
+fir.global @_QMmEglob : i32
}
// CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtest1">
// CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_type_desc<id = "any access", members = {<#[[$ATTR_0]], 0>}>
@@ -86,6 +87,7 @@ func.func @_QPtest2() attributes {noinline} {
fir.store %c2_i32 to %2 : !fir.ref<i32>
return
}
+fir.global @_QMmEglob : i32
}
// CHECK: #[[$ATTR_0:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtest2">
// CHECK: #[[$ATTR_1:.+]] = #llvm.tbaa_root<id = "Flang function root _QPtest2 - Scope 1">
>From c7e2a628ed9a3e90f4620c06732ff75377de7f91 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 23 Jan 2026 08:43:28 -0800
Subject: [PATCH 2/2] Updated TODO comment.
---
flang/include/flang/Optimizer/Analysis/AliasAnalysis.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
index 41f451faea401..b896532976332 100644
--- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
+++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h
@@ -257,7 +257,10 @@ struct AliasAnalysis {
/// specify proper operation (e.g. 'module') so that the discovered
/// SymbolTable contains all the symbols that may appear during
/// the aliasing queries through the constructed AliasAnalysis
- /// entity.
+ /// entity. On ther other hand, this approach may be too expensive
+ /// for the clients that create AliasAnalysis on the fly for just
+ /// a few values that are likely not globals.
+ /// We can have both modes for different clients.
llvm::DenseMap<mlir::Operation *, mlir::SymbolTable> symTabMap;
};
More information about the flang-commits
mailing list