[flang-commits] [flang] [flang][cuda] Import type descriptor in the gpu module when needed (PR #149157)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Wed Jul 16 11:18:51 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/149157
None
>From 557fee5038c1620c73601a57a41ccb92d5810b80 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 16 Jul 2025 10:19:45 -0700
Subject: [PATCH] [flang][cuda] Import type descriptor in the gpu module when
needed
---
.../Optimizer/Transforms/CUFDeviceGlobal.cpp | 42 +++++++++++++------
flang/test/Fir/CUDA/cuda-device-global.f90 | 22 ++++++++++
2 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index bfb0daeacb8c3..35badb6eadb1c 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -53,21 +53,26 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp,
}
}
+static void processTypeDescriptor(fir::RecordType recTy,
+ mlir::SymbolTable &symbolTable,
+ llvm::DenseSet<fir::GlobalOp> &candidates) {
+ if (auto globalOp = symbolTable.lookup<fir::GlobalOp>(
+ fir::NameUniquer::getTypeDescriptorName(recTy.getName()))) {
+ if (!candidates.contains(globalOp)) {
+ globalOp.walk([&](fir::AddrOfOp op) {
+ processAddrOfOp(op, symbolTable, candidates,
+ /*recurseInGlobal=*/true);
+ });
+ candidates.insert(globalOp);
+ }
+ }
+}
+
static void processEmboxOp(fir::EmboxOp emboxOp, mlir::SymbolTable &symbolTable,
llvm::DenseSet<fir::GlobalOp> &candidates) {
if (auto recTy = mlir::dyn_cast<fir::RecordType>(
- fir::unwrapRefType(emboxOp.getMemref().getType()))) {
- if (auto globalOp = symbolTable.lookup<fir::GlobalOp>(
- fir::NameUniquer::getTypeDescriptorName(recTy.getName()))) {
- if (!candidates.contains(globalOp)) {
- globalOp.walk([&](fir::AddrOfOp op) {
- processAddrOfOp(op, symbolTable, candidates,
- /*recurseInGlobal=*/true);
- });
- candidates.insert(globalOp);
- }
- }
- }
+ fir::unwrapRefType(emboxOp.getMemref().getType())))
+ processTypeDescriptor(recTy, symbolTable, candidates);
}
static void
@@ -85,6 +90,17 @@ prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp,
}
}
+static void
+processPotentialTypeDescriptor(mlir::Type candidateType,
+ mlir::SymbolTable &symbolTable,
+ llvm::DenseSet<fir::GlobalOp> &candidates) {
+ if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(candidateType))
+ candidateType = boxTy.getEleTy();
+ candidateType = fir::unwrapSequenceType(fir::unwrapRefType(candidateType));
+ if (auto recTy = mlir::dyn_cast<fir::RecordType>(candidateType))
+ processTypeDescriptor(recTy, symbolTable, candidates);
+}
+
class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
public:
void runOnOperation() override {
@@ -115,6 +131,8 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
for (auto globalOp : mod.getOps<fir::GlobalOp>()) {
if (cuf::isRegisteredDeviceGlobal(globalOp)) {
candidates.insert(globalOp);
+ processPotentialTypeDescriptor(globalOp.getType(), parentSymTable,
+ candidates);
} else if (globalOp.getConstant() &&
mlir::isa<fir::SequenceType>(
fir::unwrapRefType(globalOp.resultType()))) {
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
index 4c634513745fd..a2875a2dd241d 100644
--- a/flang/test/Fir/CUDA/cuda-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -24,3 +24,25 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.conta
// CHECK: gpu.module @cuda_device_mod
// CHECK-DAG: fir.global @_QMm2ECc
// CHECK-DAG: fir.global @_QMm1ECb
+
+// -----
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+ fir.global @_QMmEddarrays {data_attr = #cuf.cuda<managed>} : !fir.box<!fir.heap<!fir.array<?x!fir.type<_QMmTdevicearrays{phi_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_i:!fir.box<!fir.heap<!fir.array<?xf64>>>}>>>> {
+ %c0 = arith.constant 0 : index
+ %0 = fir.zero_bits !fir.heap<!fir.array<?x!fir.type<_QMmTdevicearrays{phi_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_i:!fir.box<!fir.heap<!fir.array<?xf64>>>}>>>
+ %1 = fir.shape %c0 : (index) -> !fir.shape<1>
+ %2 = fir.embox %0(%1) {allocator_idx = 3 : i32} : (!fir.heap<!fir.array<?x!fir.type<_QMmTdevicearrays{phi_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_i:!fir.box<!fir.heap<!fir.array<?xf64>>>}>>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?x!fir.type<_QMmTdevicearrays{phi_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_i:!fir.box<!fir.heap<!fir.array<?xf64>>>}>>>>
+ fir.has_value %2 : !fir.box<!fir.heap<!fir.array<?x!fir.type<_QMmTdevicearrays{phi_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,phi0_i:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_r:!fir.box<!fir.heap<!fir.array<?xf64>>>,buf_i:!fir.box<!fir.heap<!fir.array<?xf64>>>}>>>>
+ }
+ fir.global linkonce_odr @_QMmE.dt.devicearrays constant target : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}> {
+ %0 = fir.undefined !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>
+ fir.has_value %0 : !fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{genre:i8,__padding0:!fir.array<7xi8>,value:i64}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>
+ }
+}
+
+// CHECK: fir.global @_QMmEddarrays
+// CHECK: fir.global linkonce_odr @_QMmE.dt.devicearrays
+// CHECK: gpu.module @cuda_device_mod
+// CHECK: fir.global @_QMmEddarrays
+// CHECK: fir.global linkonce_odr @_QMmE.dt.devicearrays
More information about the flang-commits
mailing list