[flang-commits] [flang] 7665d3d - [flang] Add reductions for CUF Kernels: Lowering (#95184)
via flang-commits
flang-commits at lists.llvm.org
Wed Jun 12 11:18:45 PDT 2024
Author: Iman Hosseini
Date: 2024-06-12T19:18:41+01:00
New Revision: 7665d3d90da7f32e56cb57eb192dc8f189730686
URL: https://github.com/llvm/llvm-project/commit/7665d3d90da7f32e56cb57eb192dc8f189730686
DIFF: https://github.com/llvm/llvm-project/commit/7665d3d90da7f32e56cb57eb192dc8f189730686.diff
LOG: [flang] Add reductions for CUF Kernels: Lowering (#95184)
* Add reductionOperands and reductionAttrs to cuf's KernelOp.
* Parsing is already working and the tree has the info: here I make the
Bridge emit the updated KernelOp with reduction information added.
* Check |reductionAttrs| = |reductionOperands| in verifier
* Add a test
@clementval @vzakhari
---------
Co-authored-by: Iman Hosseini <imanh at nvidia.com>
Co-authored-by: Valentin Clement (バレンタイン クレメン) <clementval at gmail.com>
Added:
flang/test/Lower/CUDA/cuda-kernel-do-reduction.cuf
Modified:
flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
flang/lib/Lower/Bridge.cpp
flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
index 37b8da0181955..b6e08d32a6ac2 100644
--- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
+++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td
@@ -17,6 +17,7 @@
include "flang/Optimizer/Dialect/CUF/CUFDialect.td"
include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td"
include "flang/Optimizer/Dialect/FIRTypes.td"
+include "flang/Optimizer/Dialect/FIRAttr.td"
include "mlir/Interfaces/LoopLikeInterface.td"
include "mlir/IR/BuiltinAttributes.td"
@@ -249,7 +250,9 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
Variadic<Index>:$lowerbound,
Variadic<Index>:$upperbound,
Variadic<Index>:$step,
- OptionalAttr<I64Attr>:$n
+ OptionalAttr<I64Attr>:$n,
+ Variadic<AnyType>:$reduceOperands,
+ OptionalAttr<ArrayAttr>:$reduceAttrs
);
let regions = (region AnyRegion:$region);
@@ -258,11 +261,29 @@ def cuf_KernelOp : cuf_Op<"kernel", [AttrSizedOperandSegments,
`<` `<` `<` custom<CUFKernelValues>($grid, type($grid)) `,`
custom<CUFKernelValues>($block, type($block))
( `,` `stream` `=` $stream^ )? `>` `>` `>`
+ ( `reduce` `(` $reduceOperands^ `:` type($reduceOperands) `:` $reduceAttrs `)` )?
custom<CUFKernelLoopControl>($region, $lowerbound, type($lowerbound),
$upperbound, type($upperbound), $step, type($step))
attr-dict
}];
+ let extraClassDeclaration = [{
+ /// Get Number of variadic operands
+ unsigned getNumOperands(unsigned idx) {
+ auto segments = (*this)->getAttrOfType<mlir::DenseI32ArrayAttr>(
+ getOperandSegmentSizeAttr());
+ return static_cast<unsigned>(segments[idx]);
+ }
+ // Get Number of reduction operands
+ unsigned getNumReduceOperands() {
+ return getNumOperands(7);
+ }
+ /// Does the operation hold operands for reduction variables
+ bool hasReduceOperands() {
+ return getNumReduceOperands() > 0;
+ }
+ }];
+
let hasVerifier = 1;
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 9ecbbc73dce07..4dd0b7eb2a05f 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2675,6 +2675,35 @@ class FirConverter : public Fortran::lower::AbstractConverter {
std::get<2>(dir.t);
const std::optional<Fortran::parser::ScalarIntExpr> &stream =
std::get<3>(dir.t);
+ const std::list<Fortran::parser::CUFReduction> &cufreds =
+ std::get<4>(dir.t);
+
+ llvm::SmallVector<mlir::Value> reduceOperands;
+ llvm::SmallVector<mlir::Attribute> reduceAttrs;
+
+ for (const Fortran::parser::CUFReduction &cufred : cufreds) {
+ fir::ReduceOperationEnum redOpEnum = getReduceOperationEnum(
+ std::get<Fortran::parser::ReductionOperator>(cufred.t));
+ const std::list<Fortran::parser::Scalar<Fortran::parser::Variable>>
+ &scalarvars = std::get<1>(cufred.t);
+ for (const Fortran::parser::Scalar<Fortran::parser::Variable> &scalarvar :
+ scalarvars) {
+ auto reduce_attr =
+ fir::ReduceAttr::get(builder->getContext(), redOpEnum);
+ reduceAttrs.push_back(reduce_attr);
+ const Fortran::parser::Variable &var = scalarvar.thing;
+ if (const auto *iDesignator = std::get_if<
+ Fortran::common::Indirection<Fortran::parser::Designator>>(
+ &var.u)) {
+ const Fortran::parser::Designator &designator = iDesignator->value();
+ if (const auto *name =
+ Fortran::semantics::getDesignatorNameIfDataRef(designator)) {
+ auto val = getSymbolAddress(*name->symbol);
+ reduceOperands.push_back(val);
+ }
+ }
+ }
+ }
auto isOnlyStars =
[&](const std::list<Fortran::parser::CUFKernelDoConstruct::StarOrExpr>
@@ -2777,8 +2806,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
loopEval = &*std::next(loopEval->getNestedEvaluations().begin());
}
- auto op = builder->create<cuf::KernelOp>(loc, gridValues, blockValues,
- streamValue, lbs, ubs, steps, n);
+ auto op = builder->create<cuf::KernelOp>(
+ loc, gridValues, blockValues, streamValue, lbs, ubs, steps, n,
+ mlir::ValueRange(reduceOperands), builder->getArrayAttr(reduceAttrs));
builder->createBlock(&op.getRegion(), op.getRegion().end(), ivTypes,
ivLocs);
mlir::Block &b = op.getRegion().back();
diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
index 2c0c4c2cfae34..00b706fec903f 100644
--- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
+++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp
@@ -13,6 +13,7 @@
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.h"
#include "flang/Optimizer/Dialect/CUF/CUFDialect.h"
+#include "flang/Optimizer/Dialect/FIRAttr.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/BuiltinAttributes.h"
@@ -227,7 +228,17 @@ mlir::LogicalResult cuf::KernelOp::verify() {
getLowerbound().size() != getStep().size())
return emitOpError(
"expect same number of values in lowerbound, upperbound and step");
-
+ auto reduceAttrs = getReduceAttrs();
+ std::size_t reduceAttrsSize = reduceAttrs ? reduceAttrs->size() : 0;
+ if (getReduceOperands().size() != reduceAttrsSize)
+ return emitOpError("expect same number of values in reduce operands and "
+ "reduce attributes");
+ if (reduceAttrs) {
+ for (const auto &attr : reduceAttrs.value()) {
+ if (!mlir::isa<fir::ReduceAttr>(attr))
+ return emitOpError("expect reduce attributes to be ReduceAttr");
+ }
+ }
return mlir::success();
}
diff --git a/flang/test/Lower/CUDA/cuda-kernel-do-reduction.cuf b/flang/test/Lower/CUDA/cuda-kernel-do-reduction.cuf
new file mode 100644
index 0000000000000..94a269e9aa359
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-kernel-do-reduction.cuf
@@ -0,0 +1,37 @@
+! Test CUDA Fortran kernel do reduction
+! RUN: bbc -emit-fir -fcuda -o - %s | FileCheck %s
+
+module mod1
+contains
+ subroutine host_sub()
+ integer, parameter :: asize = 4
+ integer, device :: adev(asize)
+ integer :: ahost(asize)
+ integer :: q
+ integer, device :: add_reduce_var
+ integer, device :: mul_reduce_var
+ ! CHECK: %[[VAL_0:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEadd_reduce_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ ! CHECK: %[[VAL_1:.*]] = fir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEmul_reduce_var"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ do i = 1, asize
+ ahost(i) = i
+ enddo
+ adev = ahost
+ add_reduce_var = 0.0
+ mul_reduce_var = 1.0
+ ! CHECK: {{.*}} reduce(%[[VAL_0:.*]], %[[VAL_1:.*]] : !fir.ref<i32>, !fir.ref<i32> : [#fir.reduce_attr<add>, #fir.reduce_attr<multiply>]) {{.*}}
+ !$cuf kernel do <<< *, * >>> reduce(+:add_reduce_var) reduce(*:mul_reduce_var)
+ do i = 1, asize
+ add_reduce_var = add_reduce_var + adev(i)
+ mul_reduce_var = mul_reduce_var * adev(i)
+ end do
+ q = rsum
+ ahost = adev
+ print *, q
+ end
+end
+
+program test
+ use mod1
+ implicit none
+ call host_sub()
+end program test
More information about the flang-commits
mailing list