[flang-commits] [flang] [flang][cuda] Walk through cuf kernel for implicit globals (PR #119455)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Dec 10 12:54:52 PST 2024
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/119455
Globals used in cuf kernel need to be flagged as well.
>From df0773544e7e89e0dcda141cd173a4995f41b83e Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 10 Dec 2024 12:53:54 -0800
Subject: [PATCH] [flang][cuda] Walk through cuf kernel for implicit globals
---
.../Optimizer/Transforms/CUFDeviceGlobal.cpp | 5 +++
.../Fir/CUDA/cuda-implicit-device-global.f90 | 45 +++++++++++++++++++
2 files changed, 50 insertions(+)
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index 18150c4e595d40..72b894d1cec757 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -68,6 +68,11 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
prepareImplicitDeviceGlobals(funcOp, symTable, candidates);
return mlir::WalkResult::advance();
});
+ mod.walk([&](cuf::KernelOp kernelOp) {
+ kernelOp.walk([&](fir::AddrOfOp addrOfOp) {
+ processAddrOfOp(addrOfOp, symTable, candidates);
+ });
+ });
// Copying the device global variable into the gpu module
mlir::SymbolTable parentSymTable(mod);
diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
index 5a4cc8590f4168..ec5ed06824e227 100644
--- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
@@ -146,3 +146,48 @@ // Test that global used in device function are flagged with the correct
// CHECK-LABEL: gpu.module @cuda_device_mod
// CHECK: fir.global linkonce @_QQclX5465737420504153534544 constant
+
+// -----
+
+func.func @_QQmain() attributes {fir.bindc_name = "cufkernel_global"} {
+ %c10 = arith.constant 10 : index
+ %c5_i32 = arith.constant 5 : i32
+ %c6_i32 = arith.constant 6 : i32
+ %c1 = arith.constant 1 : index
+ %c1_i32 = arith.constant 1 : i32
+ %c10_i32 = arith.constant 10 : i32
+ %0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+ %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+ cuf.kernel<<<%c10_i32, %c1_i32>>> (%arg0 : index) = (%c1 : index) to (%c10 : index) step (%c1 : index) {
+ %2 = fir.convert %arg0 : (index) -> i32
+ fir.store %2 to %1#1 : !fir.ref<i32>
+ %3 = fir.load %1#0 : !fir.ref<i32>
+ %4 = arith.cmpi eq, %3, %c1_i32 : i32
+ cf.cond_br %4, ^bb1, ^bb2
+ ^bb1: // pred: ^bb0
+ %5 = fir.address_of(@_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5) : !fir.ref<!fir.char<1,50>>
+ %6 = fir.convert %5 : (!fir.ref<!fir.char<1,50>>) -> !fir.ref<i8>
+ %7 = fir.call @_FortranAioBeginExternalListOutput(%c6_i32, %6, %c5_i32) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
+ %8 = fir.load %1#0 : !fir.ref<i32>
+ %9 = fir.call @_FortranAioOutputInteger32(%7, %8) fastmath<contract> : (!fir.ref<i8>, i32) -> i1
+ %10 = fir.call @_FortranAioEndIoStatement(%7) fastmath<contract> : (!fir.ref<i8>) -> i32
+ cf.br ^bb2
+ ^bb2: // 2 preds: ^bb0, ^bb1
+ "fir.end"() : () -> ()
+ }
+ return
+}
+func.func private @_FortranAioBeginExternalListOutput(i32, !fir.ref<i8>, i32) -> !fir.ref<i8> attributes {fir.io, fir.runtime}
+fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50> {
+ %0 = fir.string_lit "/local/home/vclement/llvm-project/build/dummy.cuf\00"(50) : !fir.char<1,50>
+ fir.has_value %0 : !fir.char<1,50>
+}
+func.func private @_FortranAioOutputInteger32(!fir.ref<i8>, i32) -> i1 attributes {fir.io, fir.runtime}
+func.func private @_FortranAioEndIoStatement(!fir.ref<i8>) -> i32 attributes {fir.io, fir.runtime}
+func.func private @_FortranAProgramStart(i32, !llvm.ptr, !llvm.ptr, !llvm.ptr)
+func.func private @_FortranAProgramEndStatement()
+
+// CHECK-LABEL: func.func @_QQmain()
+// CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50>
+// CHECK: gpu.module @cuda_device_mod
+// CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50>
More information about the flang-commits
mailing list