[flang-commits] [flang] [flang][cuda] Fix invalid address space in addresof op conversion (PR #192111)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Tue Apr 14 11:47:42 PDT 2026


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/192111

The change in lowering order introduced in https://github.com/llvm/llvm-project/pull/183268 exposed an issue when converting addressof op pointing to globals with different address space. Look at the fir::GlobalOp when it has not been converted. 

>From 305aa43ca46a5751aa55ea71f3ce9f6024814b2b Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 14 Apr 2026 11:46:00 -0700
Subject: [PATCH] [flang][cuda] Fix invalid address space in addresof op
 conversion

---
 flang/lib/Optimizer/CodeGen/CodeGen.cpp | 22 ++++++++++++++---
 flang/test/Fir/CUDA/cuda-code-gen.mlir  | 32 +++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 7c3f686fc58bb..fc4dc85ff8748 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -196,6 +196,22 @@ mlir::Value replaceWithAddrOfOrASCast(mlir::ConversionPatternRewriter &rewriter,
   return mlir::LLVM::AddressOfOp::create(rewriter, loc, type, symName);
 }
 
+static std::uint64_t getAddressSpace(fir::AddrOfOp addr,
+                                     mlir::ConversionPatternRewriter &rewriter,
+                                     std::uint64_t defaultAS) {
+  auto global = addr->getParentOfType<mlir::ModuleOp>()
+                    .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+  if (global)
+    return global.getAddrSpace();
+  auto firGlobal =
+      addr->getParentOfType<mlir::ModuleOp>().lookupSymbol<fir::GlobalOp>(
+          addr.getSymbol());
+  if (firGlobal && firGlobal.getDataAttr() &&
+      *firGlobal.getDataAttr() == cuf::DataAttribute::Constant)
+    return static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant);
+  return defaultAS;
+}
+
 /// Lower `fir.address_of` operation to `llvm.address_of` operation.
 struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
   using FIROpConversion::FIROpConversion;
@@ -216,12 +232,12 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
       return mlir::success();
     }
 
+    std::uint64_t globalAS =
+        getAddressSpace(addr, rewriter, getGlobalAddressSpace(rewriter));
     auto global = addr->getParentOfType<mlir::ModuleOp>()
                       .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
     replaceWithAddrOfOrASCast(
-        rewriter, addr->getLoc(),
-        global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
-        getProgramAddressSpace(rewriter),
+        rewriter, addr->getLoc(), globalAS, getProgramAddressSpace(rewriter),
         global ? global.getSymName()
                : addr.getSymbol().getRootReference().getValue(),
         convertType(addr.getType()), addr);
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index fc962f8de5039..68fef43bef3bb 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -328,3 +328,35 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
 }
 
 // CHECK: llvm.mlir.global external @_QMtestEmanx() {addr_space = 1 : i32, nvvm.managed} : !llvm.array<100 x i32>
+
+// -----
+
+func.func @sub16_(%arg0: !fir.ref<f32> {fir.bindc_name = "h16"}) attributes {fir.internal_name = "_QPsub16", llvm.reciprocal_estimates = "none", llvm.target_cpu = "x86-64", llvm.target_features = #llvm.target_features<["+cmov", "+mmx", "+sse", "+sse2", "+cx8", "+x87", "+fxsr"]>} {
+  %c8_i32 = arith.constant 8 : i32
+  %c4_i64 = arith.constant 4 : i64
+  %c0_i32 = arith.constant 0 : i32
+  %0 = fir.address_of(@_QMdevice_dataEd16) : !fir.ref<f32>
+  %1 = fir.convert %0 : (!fir.ref<f32>) -> !fir.llvm_ptr<i8>
+  %2 = fir.address_of(@_QQclXc8657e47c19bb9e89730387c9d99c2da) : !fir.ref<!fir.char<1,38>>
+  %c38 = arith.constant 38 : index
+  %c2_i32 = arith.constant 2 : i32
+  %3 = fir.convert %2 : (!fir.ref<!fir.char<1,38>>) -> !fir.ref<i8>
+  %4 = fir.call @_FortranACUFGetDeviceAddress(%1, %3, %c2_i32) : (!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
+  %5 = fir.convert %4 : (!fir.llvm_ptr<i8>) -> !fir.ref<f32>
+  %6 = fir.convert %arg0 : (!fir.ref<f32>) -> memref<f32>
+  %8 = fir.address_of(@_QQclXc8657e47c19bb9e89730387c9d99c2da) : !fir.ref<!fir.char<1,38>>
+  %9 = fir.convert %5 : (!fir.ref<f32>) -> !fir.llvm_ptr<i8>
+  return
+}
+fir.global linkonce @_QQclXc8657e47c19bb9e89730387c9d99c2da constant : !fir.char<1,38> {
+  %0 = fir.string_lit "/local/home/vclement/lorado/dummy.cuf\00"(38) : !fir.char<1,38>
+  fir.has_value %0 : !fir.char<1,38>
+}
+fir.global @_QMdevice_dataEd16 {data_attr = #cuf.cuda<constant>} : f32 {
+  %0 = fir.zero_bits f32
+  fir.has_value %0 : f32
+}
+func.func private @_FortranACUFGetDeviceAddress(!fir.llvm_ptr<i8>, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> attributes {fir.runtime}
+
+// CHECK-LABEL: llvm.func @sub16_
+// CHECK: llvm.mlir.addressof @_QMdevice_dataEd16 : !llvm.ptr<4>



More information about the flang-commits mailing list