[flang-commits] [flang] [flang][cuda] Add option to preserve global with no use for debug info (PR #192731)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Fri Apr 17 13:55:23 PDT 2026
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/192731
>From 6da2e58f379515b494db5682829ca9e4941894ba Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 Apr 2026 13:31:32 -0700
Subject: [PATCH 1/2] [flang][cuda] Add option to preserve global with no use
for debug info
---
.../flang/Optimizer/Transforms/Passes.td | 5 ++++
.../Transforms/CUDA/CUFDeviceGlobal.cpp | 28 ++++++++++-------
flang/test/Fir/CUDA/cuda-device-global.f90 | 30 +++++++++++++++++++
3 files changed, 53 insertions(+), 10 deletions(-)
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 71c9f7b62d2be..9ace5756e417a 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -512,6 +512,11 @@ def CUFDeviceGlobal :
let dependentDialects = [
"cuf::CUFDialect", "mlir::gpu::GPUDialect", "mlir::NVVM::NVVMDialect"
];
+ let options = [
+ Option<"skipDeadDeclares", "skip-dead-declares", "bool",
+ /*default=*/"true",
+ "Skip globals whose only use is a dead fir.declare">
+ ];
}
def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> {
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp
index 10682314567b9..a3c65ad8543df 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp
@@ -12,6 +12,7 @@
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/Support/InternalNames.h"
+#include "flang/Optimizer/Transforms/Passes.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/allocatable.h"
#include "flang/Support/Fortran.h"
@@ -31,10 +32,14 @@ namespace {
static void processAddrOfOp(fir::AddrOfOp addrOfOp,
mlir::SymbolTable &symbolTable,
llvm::DenseSet<fir::GlobalOp> &candidates,
- bool recurseInGlobal) {
+ bool recurseInGlobal,
+ bool skipDeadDeclares = true) {
- // Check if there is a real use of the global.
- if (addrOfOp.getOperation()->hasOneUse()) {
+ // Skip globals whose only reference is a dead fir.declare (no real uses).
+ // This is disabled when fir.declare ops are preserved for debug info,
+ // because later passes will copy the entire function body (including dead
+ // references) into GPU kernels.
+ if (skipDeadDeclares && addrOfOp.getOperation()->hasOneUse()) {
mlir::OpOperand &addrUse = *addrOfOp.getOperation()->getUses().begin();
if (mlir::isa<fir::DeclareOp>(addrUse.getOwner()) &&
addrUse.getOwner()->use_empty())
@@ -82,15 +87,15 @@ static void processEmboxOp(fir::EmboxOp emboxOp, mlir::SymbolTable &symbolTable,
processTypeDescriptor(recTy, symbolTable, candidates);
}
-static void
-prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp,
- mlir::SymbolTable &symbolTable,
- llvm::DenseSet<fir::GlobalOp> &candidates) {
+static void prepareImplicitDeviceGlobals(
+ mlir::func::FuncOp funcOp, mlir::SymbolTable &symbolTable,
+ llvm::DenseSet<fir::GlobalOp> &candidates, bool skipDeadDeclares) {
auto cudaProcAttr{
funcOp->getAttrOfType<cuf::ProcAttributeAttr>(cuf::getProcAttrName())};
if (cudaProcAttr && cudaProcAttr.getValue() != cuf::ProcAttribute::Host) {
funcOp.walk([&](fir::AddrOfOp op) {
- processAddrOfOp(op, symbolTable, candidates, /*recurseInGlobal=*/false);
+ processAddrOfOp(op, symbolTable, candidates, /*recurseInGlobal=*/false,
+ skipDeadDeclares);
});
funcOp.walk(
[&](fir::EmboxOp op) { processEmboxOp(op, symbolTable, candidates); });
@@ -113,6 +118,8 @@ processPotentialTypeDescriptor(mlir::Type candidateType,
class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
public:
+ using CUFDeviceGlobalBase::CUFDeviceGlobalBase;
+
void runOnOperation() override {
mlir::Operation *op = getOperation();
mlir::ModuleOp mod = mlir::dyn_cast<mlir::ModuleOp>(op);
@@ -122,13 +129,14 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
llvm::DenseSet<fir::GlobalOp> candidates;
mlir::SymbolTable symTable(mod);
mod.walk([&](mlir::func::FuncOp funcOp) {
- prepareImplicitDeviceGlobals(funcOp, symTable, candidates);
+ prepareImplicitDeviceGlobals(funcOp, symTable, candidates,
+ skipDeadDeclares);
return mlir::WalkResult::advance();
});
mod.walk([&](cuf::KernelOp kernelOp) {
kernelOp.walk([&](fir::AddrOfOp addrOfOp) {
processAddrOfOp(addrOfOp, symTable, candidates,
- /*recurseInGlobal=*/false);
+ /*recurseInGlobal=*/false, skipDeadDeclares);
});
});
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
index 7edcf1a4b13c5..64ef3d36ddea7 100644
--- a/flang/test/Fir/CUDA/cuda-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -65,3 +65,33 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.conta
// CHECK-LABEL: gpu.module @cuda_device_mod
// CHECK: fir.global linkonce_odr @_QMvector_typesE.dt.v2real2
+// -----
+
+// Test that dead declares in device functions are skipped by default.
+
+// RUN: fir-opt --split-input-file --cuf-device-global="skip-dead-declares=false" %s | FileCheck --check-prefix=PRESERVE %s
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+ fir.global @_QMiso_c_bindingECc_alert constant : !fir.char<1> {
+ %0 = fir.string_lit "\07"(1) : !fir.char<1>
+ fir.has_value %0 : !fir.char<1>
+ }
+ func.func @_QMrhsPkernel(%arg0: !fir.ref<f64>) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+ %c1 = arith.constant 1 : index
+ %0 = fir.address_of(@_QMiso_c_bindingECc_alert) : !fir.ref<!fir.char<1>>
+ %1 = fir.declare %0 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMiso_c_bindingECc_alert"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
+ return
+ }
+ gpu.module @cuda_device_mod {
+ }
+}
+
+// With default skip-dead-declares=true, the global should NOT be in gpu.module.
+// CHECK-LABEL: gpu.module @cuda_device_mod
+// CHECK-NOT: fir.global @_QMiso_c_bindingECc_alert
+
+// With skip-dead-declares=false (preserveDeclare mode), the global should be copied.
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
+// PRESERVE-LABEL: gpu.module @cuda_device_mod
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
+
>From 8f7e8eada86296ae57df826e45a9877e547ef4bd Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 Apr 2026 13:55:08 -0700
Subject: [PATCH 2/2] Move test
---
.../Fir/CUDA/cuda-device-global-preserve.f90 | 26 +++++++++++++++++++
1 file changed, 26 insertions(+)
create mode 100644 flang/test/Fir/CUDA/cuda-device-global-preserve.f90
diff --git a/flang/test/Fir/CUDA/cuda-device-global-preserve.f90 b/flang/test/Fir/CUDA/cuda-device-global-preserve.f90
new file mode 100644
index 0000000000000..e2591cbd8b232
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-device-global-preserve.f90
@@ -0,0 +1,26 @@
+// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s
+// RUN: fir-opt --split-input-file --cuf-device-global="skip-dead-declares=false" %s | FileCheck --check-prefix=PRESERVE %s
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+ fir.global @_QMiso_c_bindingECc_alert constant : !fir.char<1> {
+ %0 = fir.string_lit "\07"(1) : !fir.char<1>
+ fir.has_value %0 : !fir.char<1>
+ }
+ func.func @_QMrhsPkernel(%arg0: !fir.ref<f64>) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+ %c1 = arith.constant 1 : index
+ %0 = fir.address_of(@_QMiso_c_bindingECc_alert) : !fir.ref<!fir.char<1>>
+ %1 = fir.declare %0 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMiso_c_bindingECc_alert"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
+ return
+ }
+ gpu.module @cuda_device_mod {
+ }
+}
+
+// With default skip-dead-declares=true, the global should NOT be in gpu.module.
+// CHECK-LABEL: gpu.module @cuda_device_mod
+// CHECK-NOT: fir.global @_QMiso_c_bindingECc_alert
+
+// With skip-dead-declares=false (preserveDeclare mode), the global should be copied.
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
+// PRESERVE-LABEL: gpu.module @cuda_device_mod
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
More information about the flang-commits
mailing list