[flang-commits] [flang] [flang][cuda] Add global constructor for allocators registration (PR #109854)

via flang-commits flang-commits at lists.llvm.org
Tue Sep 24 13:01:46 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

This pass crates the constructor function to call the allocator registration and adds it to the global_ctors. 

---
Full diff: https://github.com/llvm/llvm-project/pull/109854.diff


5 Files Affected:

- (modified) flang/include/flang/Optimizer/Transforms/Passes.h (+1) 
- (modified) flang/include/flang/Optimizer/Transforms/Passes.td (+7) 
- (modified) flang/lib/Optimizer/Transforms/CMakeLists.txt (+1) 
- (added) flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp (+89) 
- (added) flang/test/Fir/CUDA/cuda-constructor.f90 (+12) 


``````````diff
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index fcfb8677951a2d..3b2af3a3398108 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -39,6 +39,7 @@ namespace fir {
 #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
 #define GEN_PASS_DECL_CHARACTERCONVERSION
 #define GEN_PASS_DECL_CFGCONVERSION
+#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
 #define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL
 #define GEN_PASS_DECL_CUFOPCONVERSION
 #define GEN_PASS_DECL_EXTERNALNAMECONVERSION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index ab98591c911cdf..bf75123e853779 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -436,4 +436,11 @@ def CufImplicitDeviceGlobal :
   ];
 }
 
+def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> {
+  let summary = "Add constructor to register CUDA Fortran allocators";
+  let dependentDialects = [
+    "mlir::func::FuncDialect"
+  ];
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index b68e3d68b9b83e..5e1a0293e63c97 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
   CompilerGeneratedNames.cpp
   ConstantArgumentGlobalisation.cpp
   ControlFlowConverter.cpp
+  CUFAddConstructor.cpp
   CufImplicitDeviceGlobal.cpp
   CufOpConversion.cpp
   ArrayValueCopy.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
new file mode 100644
index 00000000000000..7ab06845ddec2f
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -0,0 +1,89 @@
+//===- CUFAddConstructor.cpp ------------------------------------*- C++ -*-===//
+//
+// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+// property and proprietary rights in and to this material, related
+// documentation and any modifications thereto. Any use, reproduction,
+// disclosure or distribution of this material and related documentation
+// without an express license agreement from NVIDIA CORPORATION or
+// its affiliates is strictly prohibited.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRAttr.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Runtime/entry-names.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFADDCONSTRUCTOR
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+namespace {
+
+static constexpr llvm::StringRef cudaFortranCtorName{
+    "__cudaFortranConstructor"};
+
+struct CUFAddConstructor
+    : public fir::impl::CUFAddConstructorBase<CUFAddConstructor> {
+
+  void runOnOperation() override {
+    mlir::ModuleOp mod = getOperation();
+    mlir::OpBuilder builder{mod.getBodyRegion()};
+    builder.setInsertionPointToEnd(mod.getBody());
+    mlir::Location loc = mod.getLoc();
+    auto *ctx = mod.getContext();
+    auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
+    auto funcTy =
+        mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
+
+    llvm::errs() << "DBG1\n";
+    // Symbol reference to CUFRegisterAllocator.
+    builder.setInsertionPointToEnd(mod.getBody());
+    llvm::errs() << "DBG1.1\n";
+    auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
+        loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
+    llvm::errs() << "DBG1.2\n";
+    registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+    auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
+        mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
+    llvm::errs() << "DBG1.3\n";
+    builder.setInsertionPointToEnd(mod.getBody());
+    // mod.push_back(registerFuncOp);
+    llvm::errs() << "DBG1.4\n";
+
+    llvm::errs() << "DBG2\n";
+    // Create the constructor function that cal CUFRegisterAllocator.
+    builder.setInsertionPointToEnd(mod.getBody());
+    auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
+                                                       funcTy);
+    func.setLinkage(mlir::LLVM::Linkage::Internal);
+    builder.setInsertionPointToStart(func.addEntryBlock(builder));
+    builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
+    builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
+    // mod.push_back(func);
+
+    llvm::errs() << "DBG3\n";
+    // Create the llvm.global_ctor with the function.
+    // TODO: We might want to have a utility that retrieve it if already created
+    // and adds new functions.
+    builder.setInsertionPointToEnd(mod.getBody());
+    llvm::SmallVector<mlir::Attribute> funcs;
+    funcs.push_back(
+        mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName()));
+    llvm::SmallVector<int> priorities;
+    priorities.push_back(0);
+    builder.create<mlir::LLVM::GlobalCtorsOp>(
+        mod.getLoc(), builder.getArrayAttr(funcs),
+        builder.getI32ArrayAttr(priorities));
+  }
+};
+
+} // end anonymous namespace
\ No newline at end of file
diff --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90
new file mode 100644
index 00000000000000..3a27b9d54252ca
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-constructor.f90
@@ -0,0 +1,12 @@
+! RUN: bbc -fcuda -emit-hlfir %s -o - | fir-opt --cuf-add-constructor | FileCheck %s
+
+program main
+  real, device :: ahost(10)
+end
+
+! CHECK: llvm.func @_FortranACUFRegisterAllocator() attributes {sym_visibility = "private"}
+! CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() {
+! CHECK:   llvm.call @_FortranACUFRegisterAllocator() : () -> ()
+! CHECK:   llvm.return
+! CHECK: }
+! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]}
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/109854


More information about the flang-commits mailing list