[flang-commits] [flang] [flang][openacc] Lower DO CONCURRENT with acc loop (PR #79223)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Jan 23 17:02:38 PST 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/79223
>From f77ef7890586c84bff05e5ba68e84ee15b72847c Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 23 Jan 2024 13:31:52 -0800
Subject: [PATCH 1/2] [flang][openacc] Lower DO CONCURRENT with acc loop
Lower basic DO CONCURRENT with acc loop construct. The DO CONCURRENT
is lowered to an acc.loop operation.
This does not currently cover the DO CONCURRENT with locality specs.
---
flang/lib/Lower/OpenACC.cpp | 170 +++++++++++++++++---------
flang/test/Lower/OpenACC/acc-loop.f90 | 20 +++
2 files changed, 135 insertions(+), 55 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index d619d47fc235971..4f06436d38739c2 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1607,6 +1607,48 @@ mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder,
return builder.getIntegerType(ivTypeSize * 8);
}
+static void privatizeIv(Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym,
+ mlir::Location loc,
+ llvm::SmallVector<mlir::Type> &ivTypes,
+ llvm::SmallVector<mlir::Location> &ivLocs,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<mlir::Value> &ivPrivate,
+ llvm::SmallVector<mlir::Attribute> &privatizations,
+ bool isDoConcurrent = false) {
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+ mlir::Type ivTy = getTypeFromIvTypeSize(builder, sym);
+ ivTypes.push_back(ivTy);
+ ivLocs.push_back(loc);
+ mlir::Value ivValue = converter.getSymbolAddress(sym);
+ if (!ivValue && isDoConcurrent) {
+ // DO CONCURRENT induction variables are not mapped yet since they are local
+ // to the DO CONCURRENT scope.
+ mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(builder.getAllocaBlock());
+ ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
+ builder.restoreInsertionPoint(insPt);
+ }
+
+ std::string recipeName =
+ fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
+ Fortran::lower::privatizationRecipePrefix);
+ auto recipe = Fortran::lower::createOrGetPrivateRecipe(
+ builder, recipeName, loc, ivValue.getType());
+
+ std::stringstream asFortran;
+ auto op = createDataEntryOp<mlir::acc::PrivateOp>(
+ builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
+ mlir::acc::DataClause::acc_private, ivValue.getType());
+
+ privateOperands.push_back(op.getAccPtr());
+ ivPrivate.push_back(op.getAccPtr());
+ privatizations.push_back(mlir::SymbolRefAttr::get(builder.getContext(),
+ recipe.getSymName().str()));
+ converter.bindSymbol(sym, op.getAccPtr());
+}
+
static mlir::acc::LoopOp
createLoopOp(Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
@@ -1640,68 +1682,86 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
llvm::SmallVector<mlir::Location> ivLocs;
llvm::SmallVector<bool> inclusiveBounds;
- if (outerDoConstruct.IsDoConcurrent())
- TODO(currentLocation, "OpenACC loop with DO CONCURRENT");
-
llvm::SmallVector<mlir::Location> locs;
locs.push_back(currentLocation); // Location of the directive
-
- int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
- for (unsigned i = 0; i < collapseValue; ++i) {
- const Fortran::parser::LoopControl *loopControl;
- if (i == 0) {
- loopControl = &*outerDoConstruct.GetLoopControl();
+ bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
+ if (isDoConcurrent) {
+ const Fortran::parser::LoopControl *loopControl =
+ &*outerDoConstruct.GetLoopControl();
+ const auto &concurrent =
+ std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
+ if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
+ .empty())
+ TODO(currentLocation, "DO CONCURRENT with locality spec");
+
+ const auto &concurrentHeader =
+ std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
+ const auto &controls =
+ std::get<std::list<Fortran::parser::ConcurrentControl>>(
+ concurrentHeader.t);
+ for (const auto &control : controls) {
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
- } else {
- auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
- assert(doCons && "expect do construct");
- loopControl = &*doCons->GetLoopControl();
- locs.push_back(
- converter.genLocation(Fortran::parser::FindSourceLocation(*doCons)));
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
+ if (const auto &expr =
+ std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
+ control.t))
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+ const auto &name = std::get<Fortran::parser::Name>(control.t);
+ privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizations, isDoConcurrent);
+
+ inclusiveBounds.push_back(true);
}
+ } else {
+ int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
+ for (unsigned i = 0; i < collapseValue; ++i) {
+ const Fortran::parser::LoopControl *loopControl;
+ if (i == 0) {
+ loopControl = &*outerDoConstruct.GetLoopControl();
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(outerDoConstruct)));
+ } else {
+ auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
+ assert(doCons && "expect do construct");
+ loopControl = &*doCons->GetLoopControl();
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(*doCons)));
+ }
- const Fortran::parser::LoopControl::Bounds *bounds =
- std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
- assert(bounds && "Expected bounds on the loop construct");
- lowerbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
- upperbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
- if (bounds->step)
- steps.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
- else // If `step` is not present, assume it as `1`.
- steps.push_back(builder.createIntegerConstant(
- currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
- Fortran::semantics::Symbol &ivSym =
- bounds->name.thing.symbol->GetUltimate();
-
- mlir::Type ivTy = getTypeFromIvTypeSize(builder, ivSym);
- mlir::Value ivValue = converter.getSymbolAddress(ivSym);
- ivTypes.push_back(ivTy);
- ivLocs.push_back(currentLocation);
- std::string recipeName =
- fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
- Fortran::lower::privatizationRecipePrefix);
- auto recipe = Fortran::lower::createOrGetPrivateRecipe(
- builder, recipeName, currentLocation, ivValue.getType());
- std::stringstream asFortran;
- auto op = createDataEntryOp<mlir::acc::PrivateOp>(
- builder, currentLocation, ivValue, asFortran, {}, true,
- /*implicit=*/true, mlir::acc::DataClause::acc_private,
- ivValue.getType());
-
- privateOperands.push_back(op.getAccPtr());
- ivPrivate.push_back(op.getAccPtr());
- privatizations.push_back(mlir::SymbolRefAttr::get(
- builder.getContext(), recipe.getSymName().str()));
- inclusiveBounds.push_back(true);
- converter.bindSymbol(ivSym, op.getAccPtr());
- if (i < collapseValue - 1)
- crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+ const Fortran::parser::LoopControl::Bounds *bounds =
+ std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
+ assert(bounds && "Expected bounds on the loop construct");
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
+ if (bounds->step)
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+ Fortran::semantics::Symbol &ivSym =
+ bounds->name.thing.symbol->GetUltimate();
+ privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizations);
+
+ inclusiveBounds.push_back(true);
+
+ if (i < collapseValue - 1)
+ crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+ }
}
for (const Fortran::parser::AccClause &clause : accClauseList.v) {
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index 6bd4dd61bd893b4..28da9f99282ceda 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -306,3 +306,23 @@ program acc_loop
! CHECK: acc.loop gang([#acc.device_type<nvidia>, #acc.device_type<default>])
end program
+
+subroutine sub1(i, j, k)
+ integer :: i,j,k
+ integer :: a(i,j,k)
+ !$acc parallel loop
+ do concurrent (i=1:10,j=1:100,k=1:200)
+ a(i,j,k) = a(i,j,k) + 1
+ end do
+end subroutine
+
+! CHECK: func.func @_QPsub1
+! CHECK: %[[DC_K:.*]] = fir.alloca i32 {bindc_name = "k"}
+! CHECK: %[[DC_J:.*]] = fir.alloca i32 {bindc_name = "j"}
+! CHECK: %[[DC_I:.*]] = fir.alloca i32 {bindc_name = "i"}
+! CHECK: acc.parallel
+! CHECK: %[[P_I:.*]] = acc.private varPtr(%[[DC_I]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = ""}
+! CHECK: %[[P_J:.*]] = acc.private varPtr(%[[DC_J]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = ""}
+! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = ""}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[P_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[P_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[P_K]] : !fir.ref<i32>) (%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%c10{{.*}}, %c100{{.*}}, %c200{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true, true, true>}
>From 5df7f6f7c8b917fc9fc45d76fcd98c7e69e361f8 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 23 Jan 2024 17:02:26 -0800
Subject: [PATCH 2/2] Move where location of do concurrent is added
---
flang/lib/Lower/OpenACC.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 4f06436d38739c2..422803270df20d0 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1687,6 +1687,8 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
if (isDoConcurrent) {
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(outerDoConstruct)));
const Fortran::parser::LoopControl *loopControl =
&*outerDoConstruct.GetLoopControl();
const auto &concurrent =
@@ -1701,8 +1703,6 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
std::get<std::list<Fortran::parser::ConcurrentControl>>(
concurrentHeader.t);
for (const auto &control : controls) {
- locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(outerDoConstruct)));
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
More information about the flang-commits
mailing list