[flang-commits] [flang] Adding CUFCommn.{h, cpp} for CUF utilities (PR #113740)

Fri Oct 25 14:50:05 PDT 2024

https://github.com/Renaud-K created https://github.com/llvm/llvm-project/pull/113740

None

>From 765fb614b20ad5e52675b9c9657bcec20fa9b0fb Mon Sep 17 00:00:00 2001
From: Renaud-K <rkauffmann at nvidia.com>
Date: Fri, 25 Oct 2024 14:39:00 -0700
Subject: [PATCH] Adding CUFCommn.{h,cpp} for CUF utilities

---
 .../flang/Optimizer/Transforms/CUFCommon.h    | 25 +++++++++++++++
 flang/lib/Optimizer/Transforms/CMakeLists.txt |  1 +
 .../Transforms/CUFAddConstructor.cpp          |  7 ++---
 flang/lib/Optimizer/Transforms/CUFCommon.cpp  | 31 +++++++++++++++++++
 4 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 flang/include/flang/Optimizer/Transforms/CUFCommon.h
 create mode 100644 flang/lib/Optimizer/Transforms/CUFCommon.cpp

diff --git a/flang/include/flang/Optimizer/Transforms/CUFCommon.h b/flang/include/flang/Optimizer/Transforms/CUFCommon.h
new file mode 100644
index 00000000000000..eac7dcdf15b3ce
--- /dev/null
+++ b/flang/include/flang/Optimizer/Transforms/CUFCommon.h
@@ -0,0 +1,25 @@
+//===-- CUFCommon.h -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_
+#define FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+
+static constexpr llvm::StringRef cudaDeviceModuleName = "cuda_device_mod";
+
+namespace cuf {
+
+/// Retrieve or create the CUDA Fortran GPU module in the give in \p mod.
+mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod,
+                                            mlir::SymbolTable &symTab);
+
+} // namespace cuf
+
+#endif // FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index d20d3bc4108ce9..9eafa4ec234bdd 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
   CompilerGeneratedNames.cpp
   ConstantArgumentGlobalisation.cpp
   ControlFlowConverter.cpp
+  CUFCommon.cpp
   CUFAddConstructor.cpp
   CUFDeviceGlobal.cpp
   CUFOpConversion.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index f260437e710417..4da06be8ef7dd9 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -24,8 +25,6 @@ namespace fir {
 
 namespace {
 
-static constexpr llvm::StringRef cudaModName{"cuda_device_mod"};
-
 static constexpr llvm::StringRef cudaFortranCtorName{
     "__cudaFortranConstructor"};
 
@@ -60,7 +59,7 @@ struct CUFAddConstructor
     builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
 
     // Register kernels
-    auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaModName);
+    auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
     if (gpuMod) {
       auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx);
       auto registeredMod = builder.create<cuf::RegisterModuleOp>(
@@ -68,7 +67,7 @@ struct CUFAddConstructor
       for (auto func : gpuMod.getOps<mlir::gpu::GPUFuncOp>()) {
         if (func.isKernel()) {
           auto kernelName = mlir::SymbolRefAttr::get(
-              builder.getStringAttr(cudaModName),
+              builder.getStringAttr(cudaDeviceModuleName),
               {mlir::SymbolRefAttr::get(builder.getContext(), func.getName())});
           builder.create<cuf::RegisterKernelOp>(loc, kernelName, registeredMod);
         }
diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Transforms/CUFCommon.cpp
new file mode 100644
index 00000000000000..5eca86529f9e17
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUFCommon.cpp
@@ -0,0 +1,31 @@
+//===-- CUFCommon.cpp - Shared functions between passes ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Transforms/CUFCommon.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+
+/// Retrieve or create the CUDA Fortran GPU module in the give in \p mod.
+mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod,
+                                                 mlir::SymbolTable &symTab) {
+  if (auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName))
+    return gpuMod;
+
+  auto *ctx = mod.getContext();
+  mod->setAttr(mlir::gpu::GPUDialect::getContainerModuleAttrName(),
+               mlir::UnitAttr::get(ctx));
+
+  mlir::OpBuilder builder(ctx);
+  auto gpuMod = builder.create<mlir::gpu::GPUModuleOp>(mod.getLoc(),
+                                                       cudaDeviceModuleName);
+  llvm::SmallVector<mlir::Attribute> targets;
+  targets.push_back(mlir::NVVM::NVVMTargetAttr::get(ctx));
+  gpuMod.setTargetsAttr(builder.getArrayAttr(targets));
+  mlir::Block::iterator insertPt(mod.getBodyRegion().front().end());
+  symTab.insert(gpuMod, insertPt);
+  return gpuMod;
+}