[flang-commits] [flang] [flang][cuda] Copying device globals in the gpu module (PR #113955)

Mon Oct 28 13:49:18 PDT 2024

https://github.com/Renaud-K updated https://github.com/llvm/llvm-project/pull/113955

>From 07e26234eb59287acf7b2ae96053c76e00b76eac Mon Sep 17 00:00:00 2001
From: Renaud-K <rkauffmann at nvidia.com>
Date: Mon, 28 Oct 2024 12:29:45 -0700
Subject: [PATCH 1/3] [flang][cuda] Copying device globals in the gpu module

---
 .../Optimizer/Transforms/CUFDeviceGlobal.cpp  | 26 +++++++++++++++++++
 flang/test/Fir/CUDA/cuda-device-global.f90    | 14 ++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 flang/test/Fir/CUDA/cuda-device-global.f90

diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index a4761f24f16d7b..536322f10dcac5 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/allocatable.h"
 #include "mlir/IR/SymbolTable.h"
@@ -58,6 +59,31 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
       prepareImplicitDeviceGlobals(funcOp, symTable);
       return mlir::WalkResult::advance();
     });
+
+    mlir::SymbolTable parentSymTable(mod);
+    auto gpuMod =
+        parentSymTable.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
+    if (gpuMod) {
+      mlir::SymbolTable gpuSymTable(gpuMod);
+      for (auto globalOp : mod.getOps<fir::GlobalOp>()) {
+        auto attr = globalOp.getDataAttrAttr();
+        if (!attr)
+          continue;
+        switch (attr.getValue()) {
+        case cuf::DataAttribute::Device:
+        case cuf::DataAttribute::Constant:
+        case cuf::DataAttribute::Managed: {
+          auto globalName{globalOp.getSymbol().getValue()};
+          if (gpuSymTable.lookup<fir::GlobalOp>(globalName)) {
+            break;
+          }
+          gpuSymTable.insert(globalOp->clone());
+        } break;
+        default:
+          break;
+        }
+      }
+    }
   }
 };
 } // namespace
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
new file mode 100644
index 00000000000000..8a986437007a4b
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -0,0 +1,14 @@
+
+// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s
+
+
+// -----// IR Dump After CUFLaunchToGPU (cuf-fir-launch-to-gpu) //----- //
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+  fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
+
+  gpu.module @cuda_device_mod [#nvvm.target] {
+  }
+}
+
+// CHECK: gpu.module @cuda_device_mod [#nvvm.target] 
+// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
\ No newline at end of file

>From a4d47747fa7bec314d4d95f0a9252dd6a77c4084 Mon Sep 17 00:00:00 2001
From: Renaud-K <rkauffmann at nvidia.com>
Date: Mon, 28 Oct 2024 12:32:53 -0700
Subject: [PATCH 2/3] Added comment

---
 flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index 536322f10dcac5..dc39be8574f844 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -60,6 +60,7 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
       return mlir::WalkResult::advance();
     });
 
+    // Copying the device global variable into the gpu module
     mlir::SymbolTable parentSymTable(mod);
     auto gpuMod =
         parentSymTable.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);

>From e740aaf59cff85a56d7ec95aa75e46be554317aa Mon Sep 17 00:00:00 2001
From: Renaud-K <rkauffmann at nvidia.com>
Date: Mon, 28 Oct 2024 13:48:43 -0700
Subject: [PATCH 3/3] Applying review feedback

---
 flang/test/Fir/CUDA/cuda-device-global.f90 | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
index 8a986437007a4b..c83a938d5af214 100644
--- a/flang/test/Fir/CUDA/cuda-device-global.f90
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -2,7 +2,6 @@
 // RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s
 
 
-// -----// IR Dump After CUFLaunchToGPU (cuf-fir-launch-to-gpu) //----- //
 module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
   fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
 
@@ -11,4 +10,4 @@ module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.conta
 }
 
 // CHECK: gpu.module @cuda_device_mod [#nvvm.target] 
-// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
\ No newline at end of file
+// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>