[flang-commits] [flang] [flang][cuda] Copying device globals in the gpu module (PR #113955)
via flang-commits
flang-commits at lists.llvm.org
Mon Oct 28 12:38:06 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Renaud Kauffmann (Renaud-K)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/113955.diff
2 Files Affected:
- (modified) flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp (+27)
- (added) flang/test/Fir/CUDA/cuda-device-global.f90 (+14)
``````````diff
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index a4761f24f16d7b..dc39be8574f844 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -11,6 +11,7 @@
#include "flang/Optimizer/Dialect/FIRDialect.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Transforms/CUFCommon.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/allocatable.h"
#include "mlir/IR/SymbolTable.h"
@@ -58,6 +59,32 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
prepareImplicitDeviceGlobals(funcOp, symTable);
return mlir::WalkResult::advance();
});
+
+ // Copying the device global variable into the gpu module
+ mlir::SymbolTable parentSymTable(mod);
+ auto gpuMod =
+ parentSymTable.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
+ if (gpuMod) {
+ mlir::SymbolTable gpuSymTable(gpuMod);
+ for (auto globalOp : mod.getOps<fir::GlobalOp>()) {
+ auto attr = globalOp.getDataAttrAttr();
+ if (!attr)
+ continue;
+ switch (attr.getValue()) {
+ case cuf::DataAttribute::Device:
+ case cuf::DataAttribute::Constant:
+ case cuf::DataAttribute::Managed: {
+ auto globalName{globalOp.getSymbol().getValue()};
+ if (gpuSymTable.lookup<fir::GlobalOp>(globalName)) {
+ break;
+ }
+ gpuSymTable.insert(globalOp->clone());
+ } break;
+ default:
+ break;
+ }
+ }
+ }
}
};
} // namespace
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
new file mode 100644
index 00000000000000..8a986437007a4b
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -0,0 +1,14 @@
+
+// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s
+
+
+// -----// IR Dump After CUFLaunchToGPU (cuf-fir-launch-to-gpu) //----- //
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+ fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
+
+ gpu.module @cuda_device_mod [#nvvm.target] {
+ }
+}
+
+// CHECK: gpu.module @cuda_device_mod [#nvvm.target]
+// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
\ No newline at end of file
``````````
</details>
https://github.com/llvm/llvm-project/pull/113955
More information about the flang-commits
mailing list