[flang-commits] [flang] [flang][cuda] Fix invalid address space in addresof op conversion (PR #192111)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Apr 14 11:47:42 PDT 2026
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/192111
The change in lowering order introduced in https://github.com/llvm/llvm-project/pull/183268 exposed an issue when converting addressof op pointing to globals with different address space. Look at the fir::GlobalOp when it has not been converted.
>From 305aa43ca46a5751aa55ea71f3ce9f6024814b2b Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 14 Apr 2026 11:46:00 -0700
Subject: [PATCH] [flang][cuda] Fix invalid address space in addresof op
conversion
---
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 22 ++++++++++++++---
flang/test/Fir/CUDA/cuda-code-gen.mlir | 32 +++++++++++++++++++++++++
2 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 7c3f686fc58bb..fc4dc85ff8748 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -196,6 +196,22 @@ mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
return mlir::LLVM::AddressOfOp::create(rewriter, loc, type, symName);
}
+static std::uint64_t getAddressSpace(fir::AddrOfOp addr,
+ mlir::ConversionPatternRewriter &rewriter,
+ std::uint64_t defaultAS) {
+ auto global = addr->getParentOfType<mlir::ModuleOp>()
+ .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ if (global)
+ return global.getAddrSpace();
+ auto firGlobal =
+ addr->getParentOfType<mlir::ModuleOp>().lookupSymbol<fir::GlobalOp>(
+ addr.getSymbol());
+ if (firGlobal && firGlobal.getDataAttr() &&
+ *firGlobal.getDataAttr() == cuf::DataAttribute::Constant)
+ return static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant);
+ return defaultAS;
+}
+
/// Lower `fir.address_of` operation to `llvm.address_of` operation.
struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
using FIROpConversion::FIROpConversion;
@@ -216,12 +232,12 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
return mlir::success();
}
+ std::uint64_t globalAS =
+ getAddressSpace(addr, rewriter, getGlobalAddressSpace(rewriter));
auto global = addr->getParentOfType<mlir::ModuleOp>()
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
replaceWithAddrOfOrASCast(
- rewriter, addr->getLoc(),
- global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
- getProgramAddressSpace(rewriter),
+ rewriter, addr->getLoc(), globalAS, getProgramAddressSpace(rewriter),
global ? global.getSymName()
: addr.getSymbol().getRootReference().getValue(),
convertType(addr.getType()), addr);
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index fc962f8de5039..68fef43bef3bb 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -328,3 +328,35 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
}
// CHECK: llvm.mlir.global external @_QMtestEmanx() {addr_space = 1 : i32, nvvm.managed} : !llvm.array<100 x i32>
+
+// -----
+
+func.func @sub16_(%arg0: !fir.ref<f32> {fir.bindc_name = "h16"}) attributes {fir.internal_name = "_QPsub16", llvm.reciprocal_estimates = "none", llvm.target_cpu = "x86-64", llvm.target_features = #llvm.target_features<["+cmov", "+mmx", "+sse", "+sse2", "+cx8", "+x87", "+fxsr"]>} {
+ %c8_i32 = arith.constant 8 : i32
+ %c4_i64 = arith.constant 4 : i64
+ %c0_i32 = arith.constant 0 : i32
+ %0 = fir.address_of(@_QMdevice_dataEd16) : !fir.ref<f32>
+ %1 = fir.convert %0 : (!fir.ref<f32>) -> !fir.llvm_ptr<i8>
+ %2 = fir.address_of(@_QQclXc8657e47c19bb9e89730387c9d99c2da) : !fir.ref<!fir.char<1,38>>
+ %c38 = arith.constant 38 : index
+ %c2_i32 = arith.constant 2 : i32
+ %3 = fir.convert %2 : (!fir.ref<!fir.char<1,38>>) -> !fir.ref<i8>
+ %4 = fir.call @_FortranACUFGetDeviceAddress(%1, %3, %c2_i32) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
+ %5 = fir.convert %4 : (!fir.llvm_ptr<i8>) -> !fir.ref<f32>
+ %6 = fir.convert %arg0 : (!fir.ref<f32>) -> memref<f32>
+ %8 = fir.address_of(@_QQclXc8657e47c19bb9e89730387c9d99c2da) : !fir.ref<!fir.char<1,38>>
+ %9 = fir.convert %5 : (!fir.ref<f32>) -> !fir.llvm_ptr<i8>
+ return
+}
+fir.global linkonce @_QQclXc8657e47c19bb9e89730387c9d99c2da constant : !fir.char<1,38> {
+ %0 = fir.string_lit "/local/home/vclement/lorado/dummy.cuf\00"(38) : !fir.char<1,38>
+ fir.has_value %0 : !fir.char<1,38>
+}
+fir.global @_QMdevice_dataEd16 {data_attr = #cuf.cuda<constant>} : f32 {
+ %0 = fir.zero_bits f32
+ fir.has_value %0 : f32
+}
+func.func private @_FortranACUFGetDeviceAddress(!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> attributes {fir.runtime}
+
+// CHECK-LABEL: llvm.func @sub16_
+// CHECK: llvm.mlir.addressof @_QMdevice_dataEd16 : !llvm.ptr<4>
More information about the flang-commits
mailing list