[flang-commits] [flang] b15bd3f - [flang][cuda] Add global constructor for allocators registration (#109854)

via flang-commits flang-commits at lists.llvm.org
Tue Sep 24 17:04:58 PDT 2024


Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-09-24T17:04:54-07:00
New Revision: b15bd3fc653f061e3a69e1c42a3e5f5256aa1b50

URL: https://github.com/llvm/llvm-project/commit/b15bd3fc653f061e3a69e1c42a3e5f5256aa1b50
DIFF: https://github.com/llvm/llvm-project/commit/b15bd3fc653f061e3a69e1c42a3e5f5256aa1b50.diff

LOG: [flang][cuda] Add global constructor for allocators registration (#109854)

This pass creates the constructor function to call the allocator
registration and adds it to the global_ctors.

Added: 
    flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
    flang/test/Fir/CUDA/cuda-constructor.f90

Modified: 
    flang/include/flang/Optimizer/Transforms/Passes.h
    flang/include/flang/Optimizer/Transforms/Passes.td
    flang/lib/Optimizer/Transforms/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index fcfb8677951a2d..3b2af3a3398108 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -39,6 +39,7 @@ namespace fir {
 #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
 #define GEN_PASS_DECL_CHARACTERCONVERSION
 #define GEN_PASS_DECL_CFGCONVERSION
+#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
 #define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL
 #define GEN_PASS_DECL_CUFOPCONVERSION
 #define GEN_PASS_DECL_EXTERNALNAMECONVERSION

diff  --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index ab98591c911cdf..bf75123e853779 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -436,4 +436,11 @@ def CufImplicitDeviceGlobal :
   ];
 }
 
+def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> {
+  let summary = "Add constructor to register CUDA Fortran allocators";
+  let dependentDialects = [
+    "mlir::func::FuncDialect"
+  ];
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES

diff  --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index b68e3d68b9b83e..5e1a0293e63c97 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
   CompilerGeneratedNames.cpp
   ConstantArgumentGlobalisation.cpp
   ControlFlowConverter.cpp
+  CUFAddConstructor.cpp
   CufImplicitDeviceGlobal.cpp
   CufOpConversion.cpp
   ArrayValueCopy.cpp

diff  --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
new file mode 100644
index 00000000000000..48620fbc585861
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -0,0 +1,75 @@
+//===-- CUFAddConstructor.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRAttr.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Runtime/entry-names.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFADDCONSTRUCTOR
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+namespace {
+
+static constexpr llvm::StringRef cudaFortranCtorName{
+    "__cudaFortranConstructor"};
+
+struct CUFAddConstructor
+    : public fir::impl::CUFAddConstructorBase<CUFAddConstructor> {
+
+  void runOnOperation() override {
+    mlir::ModuleOp mod = getOperation();
+    mlir::OpBuilder builder{mod.getBodyRegion()};
+    builder.setInsertionPointToEnd(mod.getBody());
+    mlir::Location loc = mod.getLoc();
+    auto *ctx = mod.getContext();
+    auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
+    auto funcTy =
+        mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
+
+    // Symbol reference to CUFRegisterAllocator.
+    builder.setInsertionPointToEnd(mod.getBody());
+    auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
+        loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
+    registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+    auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
+        mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
+    builder.setInsertionPointToEnd(mod.getBody());
+
+    // Create the constructor function that cal CUFRegisterAllocator.
+    builder.setInsertionPointToEnd(mod.getBody());
+    auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
+                                                       funcTy);
+    func.setLinkage(mlir::LLVM::Linkage::Internal);
+    builder.setInsertionPointToStart(func.addEntryBlock(builder));
+    builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
+    builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
+
+    // Create the llvm.global_ctor with the function.
+    // TODO: We might want to have a utility that retrieve it if already created
+    // and adds new functions.
+    builder.setInsertionPointToEnd(mod.getBody());
+    llvm::SmallVector<mlir::Attribute> funcs;
+    funcs.push_back(
+        mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName()));
+    llvm::SmallVector<int> priorities;
+    priorities.push_back(0);
+    builder.create<mlir::LLVM::GlobalCtorsOp>(
+        mod.getLoc(), builder.getArrayAttr(funcs),
+        builder.getI32ArrayAttr(priorities));
+  }
+};
+
+} // end anonymous namespace

diff  --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90
new file mode 100644
index 00000000000000..d02350b4f4198c
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-constructor.f90
@@ -0,0 +1,12 @@
+! RUN: bbc -fcuda -emit-hlfir %s -o - | fir-opt --cuf-add-constructor | FileCheck %s
+
+program main
+  real, device :: ahost(10)
+end
+
+! CHECK: llvm.func @_FortranACUFRegisterAllocator() attributes {sym_visibility = "private"}
+! CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() {
+! CHECK:   llvm.call @_FortranACUFRegisterAllocator() : () -> ()
+! CHECK:   llvm.return
+! CHECK: }
+! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]}


        


More information about the flang-commits mailing list