[flang-commits] [flang] [flang][cuda] Add global constructor for allocators registration (PR #109854)

Valentin Clement バレンタイン クレメン via flang-commits flang-commits at lists.llvm.org
Tue Sep 24 13:34:42 PDT 2024


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109854

>From ef99fa8703d433cceae884865d72e09cffdf3c0b Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 12:59:38 -0700
Subject: [PATCH 1/4] [flang][cuda] Add global constructor for allocators
 registration

---
 .../flang/Optimizer/Transforms/Passes.h       |  1 +
 .../flang/Optimizer/Transforms/Passes.td      |  7 ++
 flang/lib/Optimizer/Transforms/CMakeLists.txt |  1 +
 .../Transforms/CUFAddConstructor.cpp          | 89 +++++++++++++++++++
 flang/test/Fir/CUDA/cuda-constructor.f90      | 12 +++
 5 files changed, 110 insertions(+)
 create mode 100644 flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
 create mode 100644 flang/test/Fir/CUDA/cuda-constructor.f90

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index fcfb8677951a2d..3b2af3a3398108 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -39,6 +39,7 @@ namespace fir {
 #define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
 #define GEN_PASS_DECL_CHARACTERCONVERSION
 #define GEN_PASS_DECL_CFGCONVERSION
+#define GEN_PASS_DECL_CUFADDCONSTRUCTOR
 #define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL
 #define GEN_PASS_DECL_CUFOPCONVERSION
 #define GEN_PASS_DECL_EXTERNALNAMECONVERSION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index ab98591c911cdf..bf75123e853779 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -436,4 +436,11 @@ def CufImplicitDeviceGlobal :
   ];
 }
 
+def CUFAddConstructor : Pass<"cuf-add-constructor", "mlir::ModuleOp"> {
+  let summary = "Add constructor to register CUDA Fortran allocators";
+  let dependentDialects = [
+    "mlir::func::FuncDialect"
+  ];
+}
+
 #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index b68e3d68b9b83e..5e1a0293e63c97 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
   CompilerGeneratedNames.cpp
   ConstantArgumentGlobalisation.cpp
   ControlFlowConverter.cpp
+  CUFAddConstructor.cpp
   CufImplicitDeviceGlobal.cpp
   CufOpConversion.cpp
   ArrayValueCopy.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
new file mode 100644
index 00000000000000..7ab06845ddec2f
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -0,0 +1,89 @@
+//===- CUFAddConstructor.cpp ------------------------------------*- C++ -*-===//
+//
+// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
+// property and proprietary rights in and to this material, related
+// documentation and any modifications thereto. Any use, reproduction,
+// disclosure or distribution of this material and related documentation
+// without an express license agreement from NVIDIA CORPORATION or
+// its affiliates is strictly prohibited.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRAttr.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Runtime/entry-names.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Pass/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFADDCONSTRUCTOR
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+namespace {
+
+static constexpr llvm::StringRef cudaFortranCtorName{
+    "__cudaFortranConstructor"};
+
+struct CUFAddConstructor
+    : public fir::impl::CUFAddConstructorBase<CUFAddConstructor> {
+
+  void runOnOperation() override {
+    mlir::ModuleOp mod = getOperation();
+    mlir::OpBuilder builder{mod.getBodyRegion()};
+    builder.setInsertionPointToEnd(mod.getBody());
+    mlir::Location loc = mod.getLoc();
+    auto *ctx = mod.getContext();
+    auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
+    auto funcTy =
+        mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
+
+    llvm::errs() << "DBG1\n";
+    // Symbol reference to CUFRegisterAllocator.
+    builder.setInsertionPointToEnd(mod.getBody());
+    llvm::errs() << "DBG1.1\n";
+    auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
+        loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
+    llvm::errs() << "DBG1.2\n";
+    registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
+    auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
+        mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
+    llvm::errs() << "DBG1.3\n";
+    builder.setInsertionPointToEnd(mod.getBody());
+    // mod.push_back(registerFuncOp);
+    llvm::errs() << "DBG1.4\n";
+
+    llvm::errs() << "DBG2\n";
+    // Create the constructor function that cal CUFRegisterAllocator.
+    builder.setInsertionPointToEnd(mod.getBody());
+    auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
+                                                       funcTy);
+    func.setLinkage(mlir::LLVM::Linkage::Internal);
+    builder.setInsertionPointToStart(func.addEntryBlock(builder));
+    builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
+    builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
+    // mod.push_back(func);
+
+    llvm::errs() << "DBG3\n";
+    // Create the llvm.global_ctor with the function.
+    // TODO: We might want to have a utility that retrieve it if already created
+    // and adds new functions.
+    builder.setInsertionPointToEnd(mod.getBody());
+    llvm::SmallVector<mlir::Attribute> funcs;
+    funcs.push_back(
+        mlir::FlatSymbolRefAttr::get(mod.getContext(), func.getSymName()));
+    llvm::SmallVector<int> priorities;
+    priorities.push_back(0);
+    builder.create<mlir::LLVM::GlobalCtorsOp>(
+        mod.getLoc(), builder.getArrayAttr(funcs),
+        builder.getI32ArrayAttr(priorities));
+  }
+};
+
+} // end anonymous namespace
\ No newline at end of file
diff --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90
new file mode 100644
index 00000000000000..3a27b9d54252ca
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-constructor.f90
@@ -0,0 +1,12 @@
+! RUN: bbc -fcuda -emit-hlfir %s -o - | fir-opt --cuf-add-constructor | FileCheck %s
+
+program main
+  real, device :: ahost(10)
+end
+
+! CHECK: llvm.func @_FortranACUFRegisterAllocator() attributes {sym_visibility = "private"}
+! CHECK-LABEL: llvm.func internal @__cudaFortranConstructor() {
+! CHECK:   llvm.call @_FortranACUFRegisterAllocator() : () -> ()
+! CHECK:   llvm.return
+! CHECK: }
+! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]}
\ No newline at end of file

>From 843481bd81793cef6b31c9cb12bdaa8fd500ace0 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 13:32:28 -0700
Subject: [PATCH 2/4] Update license

---
 .../lib/Optimizer/Transforms/CUFAddConstructor.cpp  | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index 7ab06845ddec2f..50a92866119796 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -1,13 +1,8 @@
-//===- CUFAddConstructor.cpp ------------------------------------*- C++ -*-===//
+//===-- CUFAddConstructor.cpp ---------------------------------------------===//
 //
-// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-//
-// NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
-// property and proprietary rights in and to this material, related
-// documentation and any modifications thereto. Any use, reproduction,
-// disclosure or distribution of this material and related documentation
-// without an express license agreement from NVIDIA CORPORATION or
-// its affiliates is strictly prohibited.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 

>From 1733d934d4f800b5d28d9a8efd8033a87d477314 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 13:33:51 -0700
Subject: [PATCH 3/4] new lines

---
 flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp | 2 +-
 flang/test/Fir/CUDA/cuda-constructor.f90             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index 50a92866119796..bdc8b32f9c1769 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -81,4 +81,4 @@ struct CUFAddConstructor
   }
 };
 
-} // end anonymous namespace
\ No newline at end of file
+} // end anonymous namespace
diff --git a/flang/test/Fir/CUDA/cuda-constructor.f90 b/flang/test/Fir/CUDA/cuda-constructor.f90
index 3a27b9d54252ca..d02350b4f4198c 100644
--- a/flang/test/Fir/CUDA/cuda-constructor.f90
+++ b/flang/test/Fir/CUDA/cuda-constructor.f90
@@ -9,4 +9,4 @@ program main
 ! CHECK:   llvm.call @_FortranACUFRegisterAllocator() : () -> ()
 ! CHECK:   llvm.return
 ! CHECK: }
-! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]}
\ No newline at end of file
+! CHECK: llvm.mlir.global_ctors {ctors = [@__cudaFortranConstructor], priorities = [0 : i32]}

>From b696849094c4bf952f9d2289ad8a5933e7790db2 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 13:34:30 -0700
Subject: [PATCH 4/4] Remove debug print

---
 flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index bdc8b32f9c1769..48620fbc585861 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -39,22 +39,15 @@ struct CUFAddConstructor
     auto funcTy =
         mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
 
-    llvm::errs() << "DBG1\n";
     // Symbol reference to CUFRegisterAllocator.
     builder.setInsertionPointToEnd(mod.getBody());
-    llvm::errs() << "DBG1.1\n";
     auto registerFuncOp = builder.create<mlir::LLVM::LLVMFuncOp>(
         loc, RTNAME_STRING(CUFRegisterAllocator), funcTy);
-    llvm::errs() << "DBG1.2\n";
     registerFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
     auto cufRegisterAllocatorRef = mlir::SymbolRefAttr::get(
         mod.getContext(), RTNAME_STRING(CUFRegisterAllocator));
-    llvm::errs() << "DBG1.3\n";
     builder.setInsertionPointToEnd(mod.getBody());
-    // mod.push_back(registerFuncOp);
-    llvm::errs() << "DBG1.4\n";
 
-    llvm::errs() << "DBG2\n";
     // Create the constructor function that cal CUFRegisterAllocator.
     builder.setInsertionPointToEnd(mod.getBody());
     auto func = builder.create<mlir::LLVM::LLVMFuncOp>(loc, cudaFortranCtorName,
@@ -63,9 +56,7 @@ struct CUFAddConstructor
     builder.setInsertionPointToStart(func.addEntryBlock(builder));
     builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
     builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
-    // mod.push_back(func);
 
-    llvm::errs() << "DBG3\n";
     // Create the llvm.global_ctor with the function.
     // TODO: We might want to have a utility that retrieve it if already created
     // and adds new functions.



More information about the flang-commits mailing list