[flang-commits] [flang] [flang][cuda] Flag globals used in device function (PR #109460)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Fri Sep 20 12:40:50 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109460
>From ee0d59b043d5999d62159aabec7f5dd074328429 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 19 Sep 2024 14:03:46 -0700
Subject: [PATCH 1/3] [flang][cuda] Flag globals used in device function
---
.../flang/Optimizer/Transforms/Passes.h | 1 +
.../flang/Optimizer/Transforms/Passes.td | 8 ++
flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 +
.../Transforms/CufImplicitDeviceGlobal.cpp | 73 +++++++++++++++++++
.../Fir/CUDA/cuda-implicit-device-global.f90 | 49 +++++++++++++
5 files changed, 132 insertions(+)
create mode 100644 flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
create mode 100644 flang/test/Fir/CUDA/cuda-implicit-device-global.f90
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 59266a6adfe464..fcfb8677951a2d 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -39,6 +39,7 @@ namespace fir {
#define GEN_PASS_DECL_ASSUMEDRANKOPCONVERSION
#define GEN_PASS_DECL_CHARACTERCONVERSION
#define GEN_PASS_DECL_CFGCONVERSION
+#define GEN_PASS_DECL_CUFIMPLICITDEVICEGLOBAL
#define GEN_PASS_DECL_CUFOPCONVERSION
#define GEN_PASS_DECL_EXTERNALNAMECONVERSION
#define GEN_PASS_DECL_MEMREFDATAFLOWOPT
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 925ada0f9d3507..ab98591c911cdf 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -428,4 +428,12 @@ def CufOpConversion : Pass<"cuf-convert", "mlir::ModuleOp"> {
];
}
+def CufImplicitDeviceGlobal :
+ Pass<"cuf-implicit-device-global", "mlir::ModuleOp"> {
+ let summary = "Flag globals used in device function with data attribute";
+ let dependentDialects = [
+ "cuf::CUFDialect"
+ ];
+}
+
#endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index b32f2ef86fca44..b68e3d68b9b83e 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
CompilerGeneratedNames.cpp
ConstantArgumentGlobalisation.cpp
ControlFlowConverter.cpp
+ CufImplicitDeviceGlobal.cpp
CufOpConversion.cpp
ArrayValueCopy.cpp
ExternalNameConversion.cpp
diff --git a/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
new file mode 100644
index 00000000000000..5f78bf8f005765
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
@@ -0,0 +1,73 @@
+//===-- CufOpConversion.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Common/Fortran.h"
+#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/allocatable.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace fir {
+#define GEN_PASS_DEF_CUFIMPLICITDEVICEGLOBAL
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+namespace {
+
+static fir::GlobalOp getGlobalOpFromValue(mlir::Value v) {
+ if (auto addrOfOp{mlir::dyn_cast_or_null<fir::AddrOfOp>(v.getDefiningOp())}) {
+ auto sym{mlir::SymbolTable::lookupNearestSymbolFrom(
+ addrOfOp, addrOfOp.getSymbolAttr())};
+ return mlir::dyn_cast_or_null<fir::GlobalOp>(sym);
+ }
+ return nullptr;
+}
+
+static void prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp,
+ bool onlyConstant = true) {
+ auto cudaProcAttr{
+ funcOp->getAttrOfType<cuf::ProcAttributeAttr>(cuf::getProcAttrName())};
+ if (!cudaProcAttr || cudaProcAttr.getValue() == cuf::ProcAttribute::Host)
+ return;
+ for (auto addrOfOp : funcOp.getBody().getOps<fir::AddrOfOp>()) {
+ if (auto globalOp{getGlobalOpFromValue(addrOfOp.getResult())}) {
+ bool isCandidate{(onlyConstant ? globalOp.getConstant() : true) &&
+ !globalOp.getDataAttr()};
+ if (isCandidate)
+ globalOp.setDataAttrAttr(cuf::DataAttributeAttr::get(
+ funcOp.getContext(), globalOp.getConstant()
+ ? cuf::DataAttribute::Constant
+ : cuf::DataAttribute::Device));
+ }
+ }
+}
+
+class CufImplicitDeviceGlobal
+ : public fir::impl::CufImplicitDeviceGlobalBase<CufImplicitDeviceGlobal> {
+public:
+ void runOnOperation() override {
+ auto *ctx = &getContext();
+ mlir::RewritePatternSet patterns(ctx);
+ mlir::ConversionTarget target(*ctx);
+
+ mlir::Operation *op = getOperation();
+ mlir::ModuleOp module = mlir::dyn_cast<mlir::ModuleOp>(op);
+ if (!module)
+ return signalPassFailure();
+
+ module.walk([&](mlir::func::FuncOp funcOp) {
+ prepareImplicitDeviceGlobals(funcOp);
+ return mlir::WalkResult::advance();
+ });
+ }
+};
+} // namespace
diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
new file mode 100644
index 00000000000000..c8bee3c62e6443
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90
@@ -0,0 +1,49 @@
+// RUN: fir-opt --split-input-file --cuf-implicit-device-global %s | FileCheck %s
+
+// Test that global used in device function are flagged with the correct
+// attribute.
+
+func.func @_QMdataPsetvalue() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
+ %c6_i32 = arith.constant 6 : i32
+ %21 = fir.address_of(@_QQclX6995815537abaf90e86ce166af128f3a) : !fir.ref<!fir.char<1,32>>
+ %22 = fir.convert %21 : (!fir.ref<!fir.char<1,32>>) -> !fir.ref<i8>
+ %c14_i32 = arith.constant 14 : i32
+ %23 = fir.call @_FortranAioBeginExternalListOutput(%c6_i32, %22, %c14_i32) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
+ return
+}
+
+func.func private @_FortranAioBeginExternalListOutput(i32, !fir.ref<i8>, i32) -> !fir.ref<i8> attributes {fir.io, fir.runtime}
+fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a constant : !fir.char<1,32> {
+ %0 = fir.string_lit "cuda-implicit-device-global.fir\00"(32) : !fir.char<1,32>
+ fir.has_value %0 : !fir.char<1,32>
+}
+
+// CHECK-LABEL: func.func @_QMdataPsetvalue() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+
+// CHECK: %[[GLOBAL:.*]] = fir.address_of(@_QQcl[[SYMBOL:.*]]) : !fir.ref<!fir.char<1,32>>
+// CHECK: %[[CONV:.*]] = fir.convert %[[GLOBAL]] : (!fir.ref<!fir.char<1,32>>) -> !fir.ref<i8>
+// CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
+// CHECK: fir.global linkonce @_QQcl[[SYMBOL]] {data_attr = #cuf.cuda<constant>} constant : !fir.char<1,32>
+
+// -----
+
+func.func @_QMdataPsetvalue() {
+ %c6_i32 = arith.constant 6 : i32
+ %21 = fir.address_of(@_QQclX6995815537abaf90e86ce166af128f3a) : !fir.ref<!fir.char<1,32>>
+ %22 = fir.convert %21 : (!fir.ref<!fir.char<1,32>>) -> !fir.ref<i8>
+ %c14_i32 = arith.constant 14 : i32
+ %23 = fir.call @_FortranAioBeginExternalListOutput(%c6_i32, %22, %c14_i32) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
+ return
+}
+
+func.func private @_FortranAioBeginExternalListOutput(i32, !fir.ref<i8>, i32) -> !fir.ref<i8> attributes {fir.io, fir.runtime}
+fir.global linkonce @_QQclX6995815537abaf90e86ce166af128f3a constant : !fir.char<1,32> {
+ %0 = fir.string_lit "cuda-implicit-device-global.fir\00"(32) : !fir.char<1,32>
+ fir.has_value %0 : !fir.char<1,32>
+}
+
+// CHECK-LABEL: func.func @_QMdataPsetvalue()
+// CHECK: %[[GLOBAL:.*]] = fir.address_of(@_QQcl[[SYMBOL:.*]]) : !fir.ref<!fir.char<1,32>>
+// CHECK: %[[CONV:.*]] = fir.convert %[[GLOBAL]] : (!fir.ref<!fir.char<1,32>>) -> !fir.ref<i8>
+// CHECK: fir.call @_FortranAioBeginExternalListOutput(%{{.*}}, %[[CONV]], %{{.*}}) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
+// CHECK: fir.global linkonce @_QQcl[[SYMBOL]] constant : !fir.char<1,32>
>From 0e23112b187eb6f054de20ef8d4cf7c05f556e26 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 20 Sep 2024 12:28:58 -0700
Subject: [PATCH 2/3] Cleanup
---
flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
index 5f78bf8f005765..9b19c2c48451d2 100644
--- a/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
@@ -55,10 +55,6 @@ class CufImplicitDeviceGlobal
: public fir::impl::CufImplicitDeviceGlobalBase<CufImplicitDeviceGlobal> {
public:
void runOnOperation() override {
- auto *ctx = &getContext();
- mlir::RewritePatternSet patterns(ctx);
- mlir::ConversionTarget target(*ctx);
-
mlir::Operation *op = getOperation();
mlir::ModuleOp module = mlir::dyn_cast<mlir::ModuleOp>(op);
if (!module)
>From 9737a8dbcec38eae7617ce015382fc3c74ef657d Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 20 Sep 2024 12:40:22 -0700
Subject: [PATCH 3/3] Use SymbolTable
---
.../Transforms/CufImplicitDeviceGlobal.cpp | 23 ++++++++-----------
1 file changed, 9 insertions(+), 14 deletions(-)
diff --git a/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
index 9b19c2c48451d2..206400c2ef8e53 100644
--- a/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CufImplicitDeviceGlobal.cpp
@@ -13,6 +13,7 @@
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Runtime/CUDA/common.h"
#include "flang/Runtime/allocatable.h"
+#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/DialectConversion.h"
@@ -23,23 +24,16 @@ namespace fir {
namespace {
-static fir::GlobalOp getGlobalOpFromValue(mlir::Value v) {
- if (auto addrOfOp{mlir::dyn_cast_or_null<fir::AddrOfOp>(v.getDefiningOp())}) {
- auto sym{mlir::SymbolTable::lookupNearestSymbolFrom(
- addrOfOp, addrOfOp.getSymbolAttr())};
- return mlir::dyn_cast_or_null<fir::GlobalOp>(sym);
- }
- return nullptr;
-}
-
static void prepareImplicitDeviceGlobals(mlir::func::FuncOp funcOp,
+ mlir::SymbolTable &symbolTable,
bool onlyConstant = true) {
auto cudaProcAttr{
funcOp->getAttrOfType<cuf::ProcAttributeAttr>(cuf::getProcAttrName())};
if (!cudaProcAttr || cudaProcAttr.getValue() == cuf::ProcAttribute::Host)
return;
for (auto addrOfOp : funcOp.getBody().getOps<fir::AddrOfOp>()) {
- if (auto globalOp{getGlobalOpFromValue(addrOfOp.getResult())}) {
+ if (auto globalOp = symbolTable.lookup<fir::GlobalOp>(
+ addrOfOp.getSymbol().getRootReference().getValue())) {
bool isCandidate{(onlyConstant ? globalOp.getConstant() : true) &&
!globalOp.getDataAttr()};
if (isCandidate)
@@ -56,12 +50,13 @@ class CufImplicitDeviceGlobal
public:
void runOnOperation() override {
mlir::Operation *op = getOperation();
- mlir::ModuleOp module = mlir::dyn_cast<mlir::ModuleOp>(op);
- if (!module)
+ mlir::ModuleOp mod = mlir::dyn_cast<mlir::ModuleOp>(op);
+ if (!mod)
return signalPassFailure();
- module.walk([&](mlir::func::FuncOp funcOp) {
- prepareImplicitDeviceGlobals(funcOp);
+ mlir::SymbolTable symTable(mod);
+ mod.walk([&](mlir::func::FuncOp funcOp) {
+ prepareImplicitDeviceGlobals(funcOp, symTable);
return mlir::WalkResult::advance();
});
}
More information about the flang-commits
mailing list