[flang-commits] [flang] [flang] Add reductions for CUF Kernels: Lowering (PR #95184)
Iman Hosseini via flang-commits
flang-commits at lists.llvm.org
Tue Jun 11 18:30:51 PDT 2024
https://github.com/ImanHosseini updated https://github.com/llvm/llvm-project/pull/95184
>From d19e4cbb86e1f979de67a644e0de0850b5c02fce Mon Sep 17 00:00:00 2001
From: Iman Hosseini <imanh at nvidia.com>
Date: Tue, 11 Jun 2024 17:05:40 -0700
Subject: [PATCH 1/4] Support reductions in CUF Kernels: Lowering
---
.../flang/Optimizer/Dialect/CUF/CUFOps.td | 23 +++++++++++-
flang/lib/Lower/Bridge.cpp | 34 ++++++++++++++++-
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 4 +-
flang/test/Lower/cuf_kernel_do_reduction.f90 | 37 +++++++++++++++++++
4 files changed, 94 insertions(+), 4 deletions(-)
create mode 100644 flang/test/Lower/cuf_kernel_do_reduction.f90
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 37b8da0181955..5c27b2e7f2938 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -17,6 +17,7 @@
include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
+include "flang/Optimizer/Dialect/FIRAttr.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/IR/BuiltinAttributes.td"
@@ -249,7 +250,9 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
Variadic<Index>:$lowerbound,
Variadic<Index>:$upperbound,
Variadic<Index>:$step,
- OptionalAttr<I64Attr>:$n
+ OptionalAttr<I64Attr>:$n,
+ Variadic<AnyType>:$reduceOperands,
+ OptionalAttr<ArrayAttr>:$reduceAttrs
);
let regions = (region AnyRegion:$region);
@@ -260,9 +263,27 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
( `,` `stream` `=` $stream^ )? `>` `>` `>`
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
$upperbound, type($upperbound), $step, type($step))
+ `reduce_oprnds` $reduceOperands `:` type($reduceOperands)
attr-dict
}];
+ let extraClassDeclaration = [{
+ /// Get Number of variadic operands
+ unsigned getNumOperands(unsigned idx) {
+ auto segments = (*this)->getAttrOfType<mlir::DenseI32ArrayAttr>(
+ getOperandSegmentSizeAttr());
+ return static_cast<unsigned>(segments[idx]);
+ }
+ // Get Number of reduction operands
+ unsigned getNumReduceOperands() {
+ return getNumOperands(7);
+ }
+ /// Does the operation hold operands for reduction variables
+ bool hasReduceOperands() {
+ return getNumReduceOperands() > 0;
+ }
+ }];
+
let hasVerifier = 1;
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 202efa57d4a36..27c80bf3788b3 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2669,6 +2669,35 @@ class FirConverter : public Fortran::lower::AbstractConverter {
std::get<2>(dir.t);
const std::optional<Fortran::parser::ScalarIntExpr> &stream =
std::get<3>(dir.t);
+ const std::list<Fortran::parser::CUFReduction> &cufreds =
+ std::get<4>(dir.t);
+
+ llvm::SmallVector<mlir::Value> reduceOperands;
+ llvm::SmallVector<mlir::Attribute> reduceAttrs;
+
+ for (const Fortran::parser::CUFReduction &cufred : cufreds) {
+ fir::ReduceOperationEnum redOpEnum = getReduceOperationEnum(
+ std::get<Fortran::parser::ReductionOperator>(cufred.t));
+ const std::list<Fortran::parser::Scalar<Fortran::parser::Variable>>
+ &scalarvars = std::get<1>(cufred.t);
+ for (const Fortran::parser::Scalar<Fortran::parser::Variable> &scalarvar :
+ scalarvars) {
+ auto reduce_attr =
+ fir::ReduceAttr::get(builder->getContext(), redOpEnum);
+ reduceAttrs.push_back(reduce_attr);
+ const Fortran::parser::Variable &var = scalarvar.thing;
+ if (const auto *iDesignator = std::get_if<
+ Fortran::common::Indirection<Fortran::parser::Designator>>(
+ &var.u)) {
+ const Fortran::parser::Designator &designator = iDesignator->value();
+ if (const auto *name =
+ Fortran::semantics::getDesignatorNameIfDataRef(designator)) {
+ auto val = getSymbolAddress(*name->symbol);
+ reduceOperands.push_back(val);
+ }
+ }
+ }
+ }
auto isOnlyStars =
[&](const std::list<Fortran::parser::CUFKernelDoConstruct::StarOrExpr>
@@ -2771,8 +2800,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
}
- auto op = builder->create<cuf::KernelOp>(loc, gridValues, blockValues,
- streamValue, lbs, ubs, steps, n);
+ auto op = builder->create<cuf::KernelOp>(
+ loc, gridValues, blockValues, streamValue, lbs, ubs, steps, n,
+ mlir::ValueRange(reduceOperands), builder->getArrayAttr(reduceAttrs));
builder->createBlock(&op.getRegion(), op.getRegion().end(), ivTypes,
ivLocs);
mlir::Block &b = op.getRegion().back();
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 2c0c4c2cfae34..a807e21def27a 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -227,7 +227,9 @@ mlir::LogicalResult cuf::KernelOp::verify() {
getLowerbound().size() != getStep().size())
return emitOpError(
"expect same number of values in lowerbound, upperbound and step");
-
+ if (getReduceOperands().size() != getReduceAttrs()->size())
+ return emitOpError("expect same number of values in reduce operands and "
+ "reduce attributes");
return mlir::success();
}
diff --git a/flang/test/Lower/cuf_kernel_do_reduction.f90 b/flang/test/Lower/cuf_kernel_do_reduction.f90
new file mode 100644
index 0000000000000..7088c1df6cae3
--- /dev/null
+++ b/flang/test/Lower/cuf_kernel_do_reduction.f90
@@ -0,0 +1,37 @@
+! Test CUDA Fortran kernel do reduction
+! RUN: bbc -emit-fir -fcuda -o - %s | FileCheck %s
+
+module mod1
+contains
+ subroutine host_sub()
+ integer, parameter :: asize = 4
+ integer, device :: adev(asize)
+ integer :: ahost(asize)
+ integer :: q
+ integer, device :: add_reduce_var
+ integer, device :: mul_reduce_var
+ ! CHECK: %[[VAL_0:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEadd_reduce_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ ! CHECK: %[[VAL_1:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEmul_reduce_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ do i = 1, asize
+ ahost(i) = i
+ enddo
+ adev = ahost
+ add_reduce_var = 0.0
+ mul_reduce_var = 1.0
+ ! CHECK: } reduce_oprnds %[[VAL_0:.*]], %[[VAL_1:.*]] : !fir.ref<i32>, !fir.ref<i32> {reduceAttrs = [#fir.reduce_attr<add>, #fir.reduce_attr<multiply>]}
+ !$cuf kernel do <<< *, * >>> reduce(+:add_reduce_var) reduce(*:mul_reduce_var)
+ do i = 1, asize
+ add_reduce_var = add_reduce_var + adev(i)
+ mul_reduce_var = mul_reduce_var * adev(i)
+ end do
+ q = rsum
+ ahost = adev
+ print *, q
+ end
+end
+
+program test
+ use mod1
+ implicit none
+ call host_sub()
+end program test
\ No newline at end of file
>From 77ca88f4bdcd9fa2d7279cf7aef0b7f17fe6f9de Mon Sep 17 00:00:00 2001
From: Iman Hosseini <hosseini.iman at yahoo.com>
Date: Wed, 12 Jun 2024 01:48:28 +0100
Subject: [PATCH 2/4] Update
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Valentin Clement (バレンタイン クレメン) <clementval at gmail.com>
---
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 5c27b2e7f2938..0418115bacddf 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -263,7 +263,7 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
( `,` `stream` `=` $stream^ )? `>` `>` `>`
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
$upperbound, type($upperbound), $step, type($step))
- `reduce_oprnds` $reduceOperands `:` type($reduceOperands)
+ `reduce` `(` $reduceOperands `:` type($reduceOperands) `)`
attr-dict
}];
>From 66d96aaaa76dd92dd466f020560ac831099d9587 Mon Sep 17 00:00:00 2001
From: Iman Hosseini <imanh at nvidia.com>
Date: Tue, 11 Jun 2024 18:24:52 -0700
Subject: [PATCH 3/4] Applying comments: change format. Add attr verification.
---
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 15 ++++++++++++---
flang/test/Lower/cuf_kernel_do_reduction.f90 | 2 +-
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index a807e21def27a..d32439c090779 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -14,6 +14,7 @@
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
@@ -227,9 +228,17 @@ mlir::LogicalResult cuf::KernelOp::verify() {
getLowerbound().size() != getStep().size())
return emitOpError(
"expect same number of values in lowerbound, upperbound and step");
- if (getReduceOperands().size() != getReduceAttrs()->size())
- return emitOpError("expect same number of values in reduce operands and "
- "reduce attributes");
+ auto reduceAttrs = getReduceAttrs();
+ std::size_t reduceAttrsSize = reduceAttrs ? reduceAttrs->size() : 0;
+ if (getReduceOperands().size() != reduceAttrsSize)
+ return emitOpError("expect same number of values in reduce operands and "
+ "reduce attributes");
+ if (reduceAttrs) {
+ for (const auto &attr : reduceAttrs.value()) {
+ if (!mlir::isa<fir::ReduceAttr>(attr))
+ return emitOpError("expect reduce attributes to be ReduceAttr");
+ }
+ }
return mlir::success();
}
diff --git a/flang/test/Lower/cuf_kernel_do_reduction.f90 b/flang/test/Lower/cuf_kernel_do_reduction.f90
index 7088c1df6cae3..c0dddfe06031c 100644
--- a/flang/test/Lower/cuf_kernel_do_reduction.f90
+++ b/flang/test/Lower/cuf_kernel_do_reduction.f90
@@ -18,7 +18,7 @@ subroutine host_sub()
adev = ahost
add_reduce_var = 0.0
mul_reduce_var = 1.0
- ! CHECK: } reduce_oprnds %[[VAL_0:.*]], %[[VAL_1:.*]] : !fir.ref<i32>, !fir.ref<i32> {reduceAttrs = [#fir.reduce_attr<add>, #fir.reduce_attr<multiply>]}
+ ! CHECK: } reduce(%[[VAL_0:.*]], %[[VAL_1:.*]] : !fir.ref<i32>, !fir.ref<i32>) {reduceAttrs = [#fir.reduce_attr<add>, #fir.reduce_attr<multiply>]}
!$cuf kernel do <<< *, * >>> reduce(+:add_reduce_var) reduce(*:mul_reduce_var)
do i = 1, asize
add_reduce_var = add_reduce_var + adev(i)
>From 6f66e354387074c98abdc0e007172832ae6edc3b Mon Sep 17 00:00:00 2001
From: Iman Hosseini <imanh at nvidia.com>
Date: Tue, 11 Jun 2024 18:30:32 -0700
Subject: [PATCH 4/4] clang-format
---
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index d32439c090779..00b706fec903f 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -13,8 +13,8 @@
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
-#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/FIRAttr.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/BuiltinOps.h"
@@ -231,13 +231,13 @@ mlir::LogicalResult cuf::KernelOp::verify() {
auto reduceAttrs = getReduceAttrs();
std::size_t reduceAttrsSize = reduceAttrs ? reduceAttrs->size() : 0;
if (getReduceOperands().size() != reduceAttrsSize)
- return emitOpError("expect same number of values in reduce operands and "
- "reduce attributes");
+ return emitOpError("expect same number of values in reduce operands and "
+ "reduce attributes");
if (reduceAttrs) {
- for (const auto &attr : reduceAttrs.value()) {
- if (!mlir::isa<fir::ReduceAttr>(attr))
- return emitOpError("expect reduce attributes to be ReduceAttr");
- }
+ for (const auto &attr : reduceAttrs.value()) {
+ if (!mlir::isa<fir::ReduceAttr>(attr))
+ return emitOpError("expect reduce attributes to be ReduceAttr");
+ }
}
return mlir::success();
}
More information about the flang-commits
mailing list