[flang-commits] [flang] 3c3fb6a - [flang][OpenACC] Support DO CONCURRENT locality specs inside ACC constructs (#190406)
via flang-commits
flang-commits at lists.llvm.org
Wed Apr 8 11:01:04 PDT 2026
Author: khaki3
Date: 2026-04-08T11:00:58-07:00
New Revision: 3c3fb6ab30192ac9a5c1923927fa10ec78c5ea29
URL: https://github.com/llvm/llvm-project/commit/3c3fb6ab30192ac9a5c1923927fa10ec78c5ea29
DIFF: https://github.com/llvm/llvm-project/commit/3c3fb6ab30192ac9a5c1923927fa10ec78c5ea29.diff
LOG: [flang][OpenACC] Support DO CONCURRENT locality specs inside ACC constructs (#190406)
- Lower DO CONCURRENT locality specs (REDUCE, LOCAL, LOCAL\_INIT) that
appear inside OpenACC compute constructs and combined directives.
- Previously, any locality spec on DO CONCURRENT inside ACC hit a `TODO`
and aborted. This resolves that limitation.
- Per OpenACC 2.17.2, DO CONCURRENT without a loop construct in a
kernels construct is treated as `loop auto`; in a parallel construct it
is treated as `loop independent`. Both cases are covered.
## Mapping
| Locality Spec | ACC Operation |
|---|---|
| `REDUCE(op:vars)` | `acc.reduction` with reduction recipe |
| `LOCAL(vars)` | `acc.private` with privatization recipe |
| `LOCAL_INIT(vars)` | `acc.firstprivate` with firstprivatization recipe
|
| `SHARED` / `DEFAULT(NONE)` | No-op (variables already accessible) |
## Details
- Adds `processDoConcurrentLocalitySpecs` to convert locality specs into
the corresponding ACC data-entry operations and recipes.
- Handles the `HostAssoc` symbol indirection that DO CONCURRENT creates
for LOCAL/LOCAL\_INIT variables: after `remapDataOperandSymbols` binds
the ultimate symbol inside the compute region, the binding is copied to
the `HostAssoc` symbol so that body references resolve correctly.
- Separates `firstprivateOperands` into its own operand segment on
`acc.loop` (previously hardcoded empty).
- Fixes pre-existing build errors where `createOrGetReductionRecipe`,
`createOrGetPrivateRecipe`, and `createOrGetFirstprivateRecipe` were
called with `mlir::Type` instead of `mlir::Value` after an upstream API
change. The original variable is passed (not the acc op result) to
preserve correct recipe names.
- New FileCheck test
`flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90` with 12 cases
covering REDUCE, LOCAL, LOCAL\_INIT, mixed locality, and interop with
explicit ACC clauses — across kernels regions, parallel regions, and
combined directives.
Added:
flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90
Modified:
flang/lib/Lower/OpenACC.cpp
Removed:
################################################################################
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 2154f38dca568..5a7fe899b372f 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1553,6 +1553,125 @@ static void visitLoopControl(
}
}
+// Process DO CONCURRENT locality specs (REDUCE, LOCAL, LOCAL_INIT, SHARED)
+// that appear inside an ACC construct, converting them to the corresponding
+// ACC clauses. localSymPairs tracks (HostAssoc, ultimate) symbol pairs for
+// LOCAL/LOCAL_INIT so that the HostAssoc symbol can be bound after region
+// creation.
+static void processDoConcurrentLocalitySpecs(
+ Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+ fir::FirOpBuilder &builder,
+ const std::list<Fortran::parser::LocalitySpec> &localityList,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<mlir::Value> &firstprivateOperands,
+ llvm::SmallVector<mlir::Value> &reductionOperands, AccDataMap &dataMap,
+ llvm::SmallVector<
+ std::pair<Fortran::semantics::SymbolRef, Fortran::semantics::SymbolRef>>
+ &localSymPairs) {
+ for (const Fortran::parser::LocalitySpec &locSpec : localityList) {
+ if (const auto *reduceSpec =
+ std::get_if<Fortran::parser::LocalitySpec::Reduce>(&locSpec.u)) {
+ const auto &reduceOp =
+ std::get<Fortran::parser::ReductionOperator>(reduceSpec->t);
+ const auto &names =
+ std::get<std::list<Fortran::parser::Name>>(reduceSpec->t);
+ for (const Fortran::parser::Name &name : names) {
+ const Fortran::semantics::Symbol &sym = name.symbol->GetUltimate();
+ mlir::Value symAddr = converter.getSymbolAddress(sym);
+ assert(symAddr && "expected symbol to have an address");
+
+ mlir::Type reductionTy = fir::unwrapRefType(symAddr.getType());
+ if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(reductionTy))
+ reductionTy = seqTy.getEleTy();
+
+ if (!isSupportedReductionType(reductionTy))
+ TODO(loc, "DO CONCURRENT reduction with unsupported type");
+
+ mlir::acc::ReductionOperator mlirOp =
+ getReductionOperator(reduceOp, reductionTy, converter);
+
+ llvm::SmallVector<mlir::Value> bounds;
+ std::stringstream asFortran;
+ asFortran << Fortran::lower::mangle::demangleName(
+ toStringRef(sym.name()));
+ auto op = createDataEntryOp<mlir::acc::ReductionOp>(
+ builder, loc, symAddr, asFortran, bounds, /*structured=*/true,
+ /*implicit=*/false, mlir::acc::DataClause::acc_reduction,
+ symAddr.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::Attribute fastMathAttr;
+ if (builder.getFastMathFlags() != mlir::arith::FastMathFlags::none)
+ fastMathAttr = mlir::arith::FastMathFlagsAttr::get(
+ builder.getContext(), builder.getFastMathFlags());
+ mlir::SymbolRefAttr recipe = fir::acc::createOrGetReductionRecipe(
+ builder, loc, symAddr, mlirOp, bounds, fastMathAttr);
+ op.setRecipeAttr(recipe);
+ reductionOperands.push_back(op.getAccVar());
+ dataMap.emplaceSymbol(op.getAccVar(),
+ Fortran::semantics::SymbolRef(sym));
+ }
+ } else if (const auto *localSpec =
+ std::get_if<Fortran::parser::LocalitySpec::Local>(
+ &locSpec.u)) {
+ for (const Fortran::parser::Name &name : localSpec->v) {
+ const Fortran::semantics::Symbol &ultimateSym =
+ name.symbol->GetUltimate();
+ mlir::Value symAddr = converter.getSymbolAddress(ultimateSym);
+ assert(symAddr && "expected symbol to have an address");
+
+ llvm::SmallVector<mlir::Value> bounds;
+ std::stringstream asFortran;
+ asFortran << Fortran::lower::mangle::demangleName(
+ toStringRef(ultimateSym.name()));
+ auto op = createDataEntryOp<mlir::acc::PrivateOp>(
+ builder, loc, symAddr, asFortran, bounds, /*structured=*/true,
+ /*implicit=*/false, mlir::acc::DataClause::acc_private,
+ symAddr.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::SymbolRefAttr recipe =
+ fir::acc::createOrGetPrivateRecipe(builder, loc, symAddr, bounds);
+ op.setRecipeAttr(recipe);
+ privateOperands.push_back(op.getAccVar());
+ dataMap.emplaceSymbol(op.getAccVar(),
+ Fortran::semantics::SymbolRef(ultimateSym));
+ if (name.symbol->HasLocalLocality())
+ localSymPairs.emplace_back(
+ Fortran::semantics::SymbolRef(*name.symbol),
+ Fortran::semantics::SymbolRef(ultimateSym));
+ }
+ } else if (const auto *localInitSpec =
+ std::get_if<Fortran::parser::LocalitySpec::LocalInit>(
+ &locSpec.u)) {
+ for (const Fortran::parser::Name &name : localInitSpec->v) {
+ const Fortran::semantics::Symbol &ultimateSym =
+ name.symbol->GetUltimate();
+ mlir::Value symAddr = converter.getSymbolAddress(ultimateSym);
+ assert(symAddr && "expected symbol to have an address");
+
+ llvm::SmallVector<mlir::Value> bounds;
+ std::stringstream asFortran;
+ asFortran << Fortran::lower::mangle::demangleName(
+ toStringRef(ultimateSym.name()));
+ auto op = createDataEntryOp<mlir::acc::FirstprivateOp>(
+ builder, loc, symAddr, asFortran, bounds, /*structured=*/true,
+ /*implicit=*/false, mlir::acc::DataClause::acc_firstprivate,
+ symAddr.getType(),
+ /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ mlir::SymbolRefAttr recipe = fir::acc::createOrGetFirstprivateRecipe(
+ builder, loc, symAddr, bounds);
+ op.setRecipeAttr(recipe);
+ firstprivateOperands.push_back(op.getAccVar());
+ dataMap.emplaceSymbol(op.getAccVar(),
+ Fortran::semantics::SymbolRef(ultimateSym));
+ if (name.symbol->HasLocalLocality())
+ localSymPairs.emplace_back(
+ Fortran::semantics::SymbolRef(*name.symbol),
+ Fortran::semantics::SymbolRef(ultimateSym));
+ }
+ }
+ }
+}
+
// Extract loop bounds, steps, induction variables, and privatization info
// for both DO CONCURRENT and regular do loops
static void processDoLoopBounds(
@@ -1570,7 +1689,12 @@ static void processDoLoopBounds(
llvm::SmallVector<mlir::Type> &ivTypes,
llvm::SmallVector<mlir::Location> &ivLocs,
llvm::SmallVector<bool> &inclusiveBounds,
- llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
+ llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess,
+ llvm::SmallVector<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Value> &firstprivateOperands, AccDataMap &dataMap,
+ llvm::SmallVector<
+ std::pair<Fortran::semantics::SymbolRef, Fortran::semantics::SymbolRef>>
+ &localSymPairs) {
assert(loopsToProcess > 0 && "expect at least one loop");
locs.push_back(currentLocation); // Location of the directive
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
@@ -1582,9 +1706,13 @@ static void processDoLoopBounds(
&*outerDoConstruct.GetLoopControl();
const auto &concurrent =
std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
- if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
- .empty())
- TODO(currentLocation, "DO CONCURRENT with locality spec inside ACC");
+
+ const auto &localityList =
+ std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t);
+ if (!localityList.empty())
+ processDoConcurrentLocalitySpecs(
+ converter, currentLocation, builder, localityList, privateOperands,
+ firstprivateOperands, reductionOperands, dataMap, localSymPairs);
const auto &concurrentHeader =
std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
@@ -1832,15 +1960,20 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
llvm::SmallVector<bool> inclusiveBounds;
llvm::SmallVector<mlir::Location> locs;
llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps;
+ llvm::SmallVector<mlir::Value> firstprivateOperands;
+ llvm::SmallVector<
+ std::pair<Fortran::semantics::SymbolRef, Fortran::semantics::SymbolRef>>
+ localSymPairs;
// Look at the do/do concurrent loops to extract bounds information unless
// this loop is lowered in an unstructured fashion, in which case bounds are
// not represented on acc.loop and explicit control flow is used inside body.
if (!eval.lowerAsUnstructured()) {
- processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
- outerDoConstruct, eval, lowerbounds, upperbounds, steps,
- privateOperands, ivPrivate, ivTypes, ivLocs,
- inclusiveBounds, locs, loopsToProcess);
+ processDoLoopBounds(
+ converter, currentLocation, stmtCtx, builder, outerDoConstruct, eval,
+ lowerbounds, upperbounds, steps, privateOperands, ivPrivate, ivTypes,
+ ivLocs, inclusiveBounds, locs, loopsToProcess, reductionOperands,
+ firstprivateOperands, dataMap, localSymPairs);
} else {
// When the loop contains early exits, privatize induction variables, but do
// not create acc.loop bounds. The control flow of the loop will be
@@ -1860,9 +1993,7 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
addOperands(operands, operandSegments, tileOperands);
addOperands(operands, operandSegments, cacheOperands);
addOperands(operands, operandSegments, privateOperands);
- // fill empty firstprivate operands since they are not permitted
- // from OpenACC language perspective.
- addOperands(operands, operandSegments, {});
+ addOperands(operands, operandSegments, firstprivateOperands);
addOperands(operands, operandSegments, reductionOperands);
auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
@@ -1877,6 +2008,13 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
// Remap symbols from data clauses to use data operation results
dataMap.remapDataOperandSymbols(converter, builder, loopOp.getRegion());
+ // For DO CONCURRENT LOCAL/LOCAL_INIT variables, the body references the
+ // HostAssoc symbol (with LocalityLocal flag), not the ultimate symbol.
+ // Copy the binding from the ultimate to the HostAssoc symbol so lookups
+ // inside the region find the privatized variable.
+ for (auto &[hostAssocSym, ultimateSym] : localSymPairs)
+ converter.copySymbolBinding(ultimateSym, hostAssocSym);
+
if (!eval.lowerAsUnstructured()) {
for (auto [arg, iv] :
llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),
diff --git a/flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90 b/flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90
new file mode 100644
index 0000000000000..a93f3939c36c1
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90
@@ -0,0 +1,296 @@
+! Test lowering of DO CONCURRENT with locality specs inside ACC constructs.
+! Per OpenACC 2.17.2:
+! - DO CONCURRENT without a loop construct in a kernels construct is
+! treated as if annotated with loop auto.
+! - DO CONCURRENT in a parallel construct or accelerator routine is
+! treated as if annotated with loop independent.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! ---------------------------------------------------------------------------
+! REDUCE locality spec
+! ---------------------------------------------------------------------------
+
+! Scalar reduction in kernels region (no explicit loop → auto)
+! CHECK-LABEL: func.func @_QPreduce_kernels_region
+subroutine reduce_kernels_region()
+ real :: a(10), s
+ integer :: i
+ s = 0.
+ !$acc kernels
+ do concurrent(i=1:10) reduce(+:s)
+ s = s + a(i)
+ end do
+ !$acc end kernels
+end subroutine
+
+! CHECK: acc.kernels {
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! Scalar reduction in parallel region (no explicit loop → independent)
+! CHECK-LABEL: func.func @_QPreduce_parallel_region
+subroutine reduce_parallel_region()
+ real :: a(10), s
+ integer :: i
+ s = 0.
+ !$acc parallel
+ do concurrent(i=1:10) reduce(+:s)
+ s = s + a(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+! CHECK: acc.parallel {
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! Combined kernels loop with reduce (auto)
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop
+subroutine reduce_kernels_loop()
+ real :: a(16,16), b(16,16), s
+ integer :: i, j
+ s = 0.
+ !$acc kernels loop
+ do concurrent(i=1:16, j=1:16) reduce(+:s)
+ b(i,j) = a(i,j)**2
+ s = s + b(i,j)
+ end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true>}
+
+! Combined parallel loop with reduce (independent)
+! CHECK-LABEL: func.func @_QPreduce_parallel_loop
+subroutine reduce_parallel_loop()
+ real :: a(10), s
+ integer :: i
+ s = 0.
+ !$acc parallel loop
+ do concurrent(i=1:10) reduce(+:s)
+ s = s + a(i)
+ end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(parallel) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! Multiple reductions (add + multiply)
+! CHECK-LABEL: func.func @_QPmulti_reduce
+subroutine multi_reduce()
+ real :: a(10), s, p
+ integer :: i
+ s = 0.
+ p = 1.
+ !$acc parallel loop
+ do concurrent(i=1:10) reduce(+:s) reduce(*:p)
+ s = s + a(i)
+ p = p * a(i)
+ end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_mul{{.*}}) -> !fir.ref<f32> {name = "p"}
+! CHECK: acc.loop {{.*}}reduction(
+
+! Max/min reductions
+! CHECK-LABEL: func.func @_QPreduce_max_min
+subroutine reduce_max_min()
+ real :: a(10), mx, mn
+ integer :: i
+ mx = -huge(mx)
+ mn = huge(mn)
+ !$acc kernels loop
+ do concurrent(i=1:10) reduce(max:mx) reduce(min:mn)
+ mx = max(mx, a(i))
+ mn = min(mn, a(i))
+ end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_max{{.*}}) -> !fir.ref<f32> {name = "mx"}
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_min{{.*}}) -> !fir.ref<f32> {name = "mn"}
+! CHECK: acc.loop {{.*}}reduction(
+
+! Integer multiply reduction
+! CHECK-LABEL: func.func @_QPint_reduce
+subroutine int_reduce()
+ integer :: a(10), prod, i
+ prod = 1
+ !$acc kernels loop
+ do concurrent(i=1:10) reduce(*:prod)
+ prod = prod * a(i)
+ end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) recipe(@reduction_mul{{.*}}) -> !fir.ref<i32> {name = "prod"}
+
+! ---------------------------------------------------------------------------
+! LOCAL locality spec → acc.private
+! ---------------------------------------------------------------------------
+
+! LOCAL in kernels region (auto)
+! CHECK-LABEL: func.func @_QPlocal_kernels_region
+subroutine local_kernels_region()
+ real :: a(10), tmp
+ integer :: i
+ !$acc kernels
+ do concurrent(i=1:10) local(tmp)
+ tmp = a(i) * 2.0
+ a(i) = tmp + 1.0
+ end do
+ !$acc end kernels
+end subroutine
+
+! CHECK: acc.kernels {
+! CHECK: %[[PRIV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "tmp"}
+! CHECK: acc.loop private(%[[PRIV]],
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! LOCAL in parallel region (independent)
+! CHECK-LABEL: func.func @_QPlocal_parallel_region
+subroutine local_parallel_region()
+ real :: a(10), tmp
+ integer :: i
+ !$acc parallel
+ do concurrent(i=1:10) local(tmp)
+ tmp = a(i) * 2.0
+ a(i) = tmp + 1.0
+ end do
+ !$acc end parallel
+end subroutine
+
+! CHECK: acc.parallel {
+! CHECK: %[[PRIV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "tmp"}
+! CHECK: acc.loop private(%[[PRIV]],
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! ---------------------------------------------------------------------------
+! LOCAL_INIT locality spec → acc.firstprivate
+! ---------------------------------------------------------------------------
+
+! LOCAL_INIT in kernels region (auto)
+! CHECK-LABEL: func.func @_QPlocal_init_kernels_region
+subroutine local_init_kernels_region()
+ real :: a(10), scale
+ integer :: i
+ scale = 2.0
+ !$acc kernels
+ do concurrent(i=1:10) local_init(scale)
+ a(i) = a(i) * scale
+ end do
+ !$acc end kernels
+end subroutine
+
+! CHECK: acc.kernels {
+! CHECK: %[[FP:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<f32>) recipe(@firstprivatization_ref_f32) -> !fir.ref<f32> {name = "scale"}
+! CHECK: acc.loop {{.*}}firstprivate(%[[FP]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! ---------------------------------------------------------------------------
+! Mixed locality specs: REDUCE + LOCAL
+! ---------------------------------------------------------------------------
+
+! CHECK-LABEL: func.func @_QPmixed_locality
+subroutine mixed_locality()
+ real :: a(10), s, tmp
+ integer :: i
+ s = 0.
+ !$acc parallel loop
+ do concurrent(i=1:10) reduce(+:s) local(tmp)
+ tmp = a(i) * a(i)
+ s = s + tmp
+ end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK-DAG: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK-DAG: %[[PRIV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "tmp"}
+! CHECK: acc.loop {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! ---------------------------------------------------------------------------
+! Reduce combined with explicit ACC reduction clause
+! ---------------------------------------------------------------------------
+
+! CHECK-LABEL: func.func @_QPreduce_with_acc_clause
+subroutine reduce_with_acc_clause()
+ real :: a(10), s1, s2
+ integer :: i
+ s1 = 0.
+ s2 = 0.
+ !$acc parallel loop reduction(+:s1)
+ do concurrent(i=1:10) reduce(+:s2)
+ s1 = s1 + a(i)
+ s2 = s2 + a(i) * 2.0
+ end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s1"}
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s2"}
+! CHECK: acc.loop {{.*}}reduction(
+
+! ---------------------------------------------------------------------------
+! Explicit loop par mode clauses: auto, seq, independent
+! ---------------------------------------------------------------------------
+
+! kernels loop auto with reduce
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop_auto
+subroutine reduce_kernels_loop_auto()
+ real :: a(10), s
+ integer :: i
+ s = 0.
+ !$acc kernels loop auto
+ do concurrent(i=1:10) reduce(+:s)
+ s = s + a(i)
+ end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! kernels loop seq with reduce
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop_seq
+subroutine reduce_kernels_loop_seq()
+ real :: a(10), s
+ integer :: i
+ s = 0.
+ !$acc kernels loop seq
+ do concurrent(i=1:10) reduce(+:s)
+ s = s + a(i)
+ end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]}
+
+! kernels loop independent with reduce
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop_independent
+subroutine reduce_kernels_loop_independent()
+ real :: a(10), s
+ integer :: i
+ s = 0.
+ !$acc kernels loop independent
+ do concurrent(i=1:10) reduce(+:s)
+ s = s + a(i)
+ end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
More information about the flang-commits
mailing list