[flang-commits] [flang] [flang][cuda] Add option to preserve global with no use for debug info (PR #192731)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Fri Apr 17 13:55:23 PDT 2026


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/192731

>From 6da2e58f379515b494db5682829ca9e4941894ba Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 Apr 2026 13:31:32 -0700
Subject: [PATCH 1/2] [flang][cuda] Add option to preserve global with no use
 for debug info

---
 .../flang/Optimizer/Transforms/Passes.td      |  5 ++++
 .../Transforms/CUDA/CUFDeviceGlobal.cpp       | 28 ++++++++++-------
 flang/test/Fir/CUDA/cuda-device-global.f90    | 30 +++++++++++++++++++
 3 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 71c9f7b62d2be..9ace5756e417a 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -512,6 +512,11 @@ def CUFDeviceGlobal :
   let dependentDialects = [
     "cuf::CUFDialect", "mlir::gpu::GPUDialect", "mlir::NVVM::NVVMDialect"
   ];
+  let options = [
+    Option<"skipDeadDeclares", "skip-dead-declares", "bool",
+           /*default=*/"true",
+           "Skip globals whose only use is a dead fir.declare">
+  ];
 }
 
 def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> {
diff --git a/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp
index 10682314567b9..a3c65ad8543df 100644
--- a/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUDA/CUFDeviceGlobal.cpp
@@ -12,6 +12,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/InternalNames.h"
+#include "flang/Optimizer/Transforms/Passes.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/allocatable.h"
 #include "flang/Support/Fortran.h"
@@ -31,10 +32,14 @@ namespace {
 static void processAddrOfOp(fir::AddrOfOp addrOfOp,
                             mlir::SymbolTable &symbolTable,
                             llvm::DenseSet<fir::GlobalOp> &candidates,
-                            bool recurseInGlobal) {
+                            bool recurseInGlobal,
+                            bool skipDeadDeclares = true) {
 
-  // Check if there is a real use of the global.
-  if (addrOfOp.getOperation()->hasOneUse()) {
+  // Skip globals whose only reference is a dead fir.declare (no real uses).
+  // This is disabled when fir.declare ops are preserved for debug info,
+  // because later passes will copy the entire function body (including dead
+  // references) into GPU kernels.
+  if (skipDeadDeclares && addrOfOp.getOperation()->hasOneUse()) {
     mlir::OpOperand &addrUse = *addrOfOp.getOperation()->getUses().begin();
     if (mlir::isa<fir::DeclareOp>(addrUse.getOwner()) &&
         addrUse.getOwner()->use_empty())
@@ -82,15 +87,15 @@ static void processEmboxOp(fir::EmboxOp emboxOp, mlir::SymbolTable &symbolTable,
     processTypeDescriptor(recTy, symbolTable, candidates);
 }
 
-static void
-prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp,
-                             mlir::SymbolTable &symbolTable,
-                             llvm::DenseSet<fir::GlobalOp> &candidates) {
+static void prepareImplicitDeviceGlobals(
+    mlir::func::FuncOp funcOp, mlir::SymbolTable &symbolTable,
+    llvm::DenseSet<fir::GlobalOp> &candidates, bool skipDeadDeclares) {
   auto cudaProcAttr{
       funcOp->getAttrOfType<cuf::ProcAttributeAttr>(cuf::getProcAttrName())};
   if (cudaProcAttr && cudaProcAttr.getValue() != cuf::ProcAttribute::Host) {
     funcOp.walk([&](fir::AddrOfOp op) {
-      processAddrOfOp(op, symbolTable, candidates, /*recurseInGlobal=*/false);
+      processAddrOfOp(op, symbolTable, candidates, /*recurseInGlobal=*/false,
+                      skipDeadDeclares);
     });
     funcOp.walk(
         [&](fir::EmboxOp op) { processEmboxOp(op, symbolTable, candidates); });
@@ -113,6 +118,8 @@ processPotentialTypeDescriptor(mlir::Type candidateType,
 
 class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
 public:
+  using CUFDeviceGlobalBase::CUFDeviceGlobalBase;
+
   void runOnOperation() override {
     mlir::Operation *op = getOperation();
     mlir::ModuleOp mod = mlir::dyn_cast<mlir::ModuleOp>(op);
@@ -122,13 +129,14 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
     llvm::DenseSet<fir::GlobalOp> candidates;
     mlir::SymbolTable symTable(mod);
     mod.walk([&](mlir::func::FuncOp funcOp) {
-      prepareImplicitDeviceGlobals(funcOp, symTable, candidates);
+      prepareImplicitDeviceGlobals(funcOp, symTable, candidates,
+                                   skipDeadDeclares);
       return mlir::WalkResult::advance();
     });
     mod.walk([&](cuf::KernelOp kernelOp) {
       kernelOp.walk([&](fir::AddrOfOp addrOfOp) {
         processAddrOfOp(addrOfOp, symTable, candidates,
-                        /*recurseInGlobal=*/false);
+                        /*recurseInGlobal=*/false, skipDeadDeclares);
       });
     });
 
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
index 7edcf1a4b13c5..64ef3d36ddea7 100644
--- a/flang/test/Fir/CUDA/cuda-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -65,3 +65,33 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.conta
 // CHECK-LABEL: gpu.module @cuda_device_mod
 // CHECK: fir.global linkonce_odr @_QMvector_typesE.dt.v2real2
 
+// -----
+
+// Test that dead declares in device functions are skipped by default.
+
+// RUN: fir-opt --split-input-file --cuf-device-global="skip-dead-declares=false" %s | FileCheck --check-prefix=PRESERVE %s
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+  fir.global @_QMiso_c_bindingECc_alert constant : !fir.char<1> {
+    %0 = fir.string_lit "\07"(1) : !fir.char<1>
+    fir.has_value %0 : !fir.char<1>
+  }
+  func.func @_QMrhsPkernel(%arg0: !fir.ref<f64>) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+    %c1 = arith.constant 1 : index
+    %0 = fir.address_of(@_QMiso_c_bindingECc_alert) : !fir.ref<!fir.char<1>>
+    %1 = fir.declare %0 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMiso_c_bindingECc_alert"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
+    return
+  }
+  gpu.module @cuda_device_mod {
+  }
+}
+
+// With default skip-dead-declares=true, the global should NOT be in gpu.module.
+// CHECK-LABEL: gpu.module @cuda_device_mod
+// CHECK-NOT: fir.global @_QMiso_c_bindingECc_alert
+
+// With skip-dead-declares=false (preserveDeclare mode), the global should be copied.
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
+// PRESERVE-LABEL: gpu.module @cuda_device_mod
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
+

>From 8f7e8eada86296ae57df826e45a9877e547ef4bd Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 17 Apr 2026 13:55:08 -0700
Subject: [PATCH 2/2] Move test

---
 .../Fir/CUDA/cuda-device-global-preserve.f90  | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 flang/test/Fir/CUDA/cuda-device-global-preserve.f90

diff --git a/flang/test/Fir/CUDA/cuda-device-global-preserve.f90 b/flang/test/Fir/CUDA/cuda-device-global-preserve.f90
new file mode 100644
index 0000000000000..e2591cbd8b232
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-device-global-preserve.f90
@@ -0,0 +1,26 @@
+// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s
+// RUN: fir-opt --split-input-file --cuf-device-global="skip-dead-declares=false" %s | FileCheck --check-prefix=PRESERVE %s
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+  fir.global @_QMiso_c_bindingECc_alert constant : !fir.char<1> {
+    %0 = fir.string_lit "\07"(1) : !fir.char<1>
+    fir.has_value %0 : !fir.char<1>
+  }
+  func.func @_QMrhsPkernel(%arg0: !fir.ref<f64>) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+    %c1 = arith.constant 1 : index
+    %0 = fir.address_of(@_QMiso_c_bindingECc_alert) : !fir.ref<!fir.char<1>>
+    %1 = fir.declare %0 typeparams %c1 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QMiso_c_bindingECc_alert"} : (!fir.ref<!fir.char<1>>, index) -> !fir.ref<!fir.char<1>>
+    return
+  }
+  gpu.module @cuda_device_mod {
+  }
+}
+
+// With default skip-dead-declares=true, the global should NOT be in gpu.module.
+// CHECK-LABEL: gpu.module @cuda_device_mod
+// CHECK-NOT: fir.global @_QMiso_c_bindingECc_alert
+
+// With skip-dead-declares=false (preserveDeclare mode), the global should be copied.
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert
+// PRESERVE-LABEL: gpu.module @cuda_device_mod
+// PRESERVE: fir.global @_QMiso_c_bindingECc_alert



More information about the flang-commits mailing list