[flang-commits] [flang] 3c3fb6a - [flang][OpenACC] Support DO CONCURRENT locality specs inside ACC constructs (#190406)

via flang-commits flang-commits at lists.llvm.org
Wed Apr 8 11:01:04 PDT 2026


Author: khaki3
Date: 2026-04-08T11:00:58-07:00
New Revision: 3c3fb6ab30192ac9a5c1923927fa10ec78c5ea29

URL: https://github.com/llvm/llvm-project/commit/3c3fb6ab30192ac9a5c1923927fa10ec78c5ea29
DIFF: https://github.com/llvm/llvm-project/commit/3c3fb6ab30192ac9a5c1923927fa10ec78c5ea29.diff

LOG: [flang][OpenACC] Support DO CONCURRENT locality specs inside ACC constructs (#190406)

- Lower DO CONCURRENT locality specs (REDUCE, LOCAL, LOCAL\_INIT) that
appear inside OpenACC compute constructs and combined directives.
- Previously, any locality spec on DO CONCURRENT inside ACC hit a `TODO`
and aborted. This resolves that limitation.
- Per OpenACC 2.17.2, DO CONCURRENT without a loop construct in a
kernels construct is treated as `loop auto`; in a parallel construct it
is treated as `loop independent`. Both cases are covered.

## Mapping

| Locality Spec | ACC Operation |
|---|---|
| `REDUCE(op:vars)` | `acc.reduction` with reduction recipe |
| `LOCAL(vars)` | `acc.private` with privatization recipe |
| `LOCAL_INIT(vars)` | `acc.firstprivate` with firstprivatization recipe
|
| `SHARED` / `DEFAULT(NONE)` | No-op (variables already accessible) |

## Details

- Adds `processDoConcurrentLocalitySpecs` to convert locality specs into
the corresponding ACC data-entry operations and recipes.
- Handles the `HostAssoc` symbol indirection that DO CONCURRENT creates
for LOCAL/LOCAL\_INIT variables: after `remapDataOperandSymbols` binds
the ultimate symbol inside the compute region, the binding is copied to
the `HostAssoc` symbol so that body references resolve correctly.
- Separates `firstprivateOperands` into its own operand segment on
`acc.loop` (previously hardcoded empty).
- Fixes pre-existing build errors where `createOrGetReductionRecipe`,
`createOrGetPrivateRecipe`, and `createOrGetFirstprivateRecipe` were
called with `mlir::Type` instead of `mlir::Value` after an upstream API
change. The original variable is passed (not the acc op result) to
preserve correct recipe names.
- New FileCheck test
`flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90` with 12 cases
covering REDUCE, LOCAL, LOCAL\_INIT, mixed locality, and interop with
explicit ACC clauses — across kernels regions, parallel regions, and
combined directives.

Added: 
    flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90

Modified: 
    flang/lib/Lower/OpenACC.cpp

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 2154f38dca568..5a7fe899b372f 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1553,6 +1553,125 @@ static void visitLoopControl(
   }
 }
 
+// Process DO CONCURRENT locality specs (REDUCE, LOCAL, LOCAL_INIT, SHARED)
+// that appear inside an ACC construct, converting them to the corresponding
+// ACC clauses. localSymPairs tracks (HostAssoc, ultimate) symbol pairs for
+// LOCAL/LOCAL_INIT so that the HostAssoc symbol can be bound after region
+// creation.
+static void processDoConcurrentLocalitySpecs(
+    Fortran::lower::AbstractConverter &converter, mlir::Location loc,
+    fir::FirOpBuilder &builder,
+    const std::list<Fortran::parser::LocalitySpec> &localityList,
+    llvm::SmallVector<mlir::Value> &privateOperands,
+    llvm::SmallVector<mlir::Value> &firstprivateOperands,
+    llvm::SmallVector<mlir::Value> &reductionOperands, AccDataMap &dataMap,
+    llvm::SmallVector<
+        std::pair<Fortran::semantics::SymbolRef, Fortran::semantics::SymbolRef>>
+        &localSymPairs) {
+  for (const Fortran::parser::LocalitySpec &locSpec : localityList) {
+    if (const auto *reduceSpec =
+            std::get_if<Fortran::parser::LocalitySpec::Reduce>(&locSpec.u)) {
+      const auto &reduceOp =
+          std::get<Fortran::parser::ReductionOperator>(reduceSpec->t);
+      const auto &names =
+          std::get<std::list<Fortran::parser::Name>>(reduceSpec->t);
+      for (const Fortran::parser::Name &name : names) {
+        const Fortran::semantics::Symbol &sym = name.symbol->GetUltimate();
+        mlir::Value symAddr = converter.getSymbolAddress(sym);
+        assert(symAddr && "expected symbol to have an address");
+
+        mlir::Type reductionTy = fir::unwrapRefType(symAddr.getType());
+        if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(reductionTy))
+          reductionTy = seqTy.getEleTy();
+
+        if (!isSupportedReductionType(reductionTy))
+          TODO(loc, "DO CONCURRENT reduction with unsupported type");
+
+        mlir::acc::ReductionOperator mlirOp =
+            getReductionOperator(reduceOp, reductionTy, converter);
+
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream asFortran;
+        asFortran << Fortran::lower::mangle::demangleName(
+            toStringRef(sym.name()));
+        auto op = createDataEntryOp<mlir::acc::ReductionOp>(
+            builder, loc, symAddr, asFortran, bounds, /*structured=*/true,
+            /*implicit=*/false, mlir::acc::DataClause::acc_reduction,
+            symAddr.getType(),
+            /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+        mlir::Attribute fastMathAttr;
+        if (builder.getFastMathFlags() != mlir::arith::FastMathFlags::none)
+          fastMathAttr = mlir::arith::FastMathFlagsAttr::get(
+              builder.getContext(), builder.getFastMathFlags());
+        mlir::SymbolRefAttr recipe = fir::acc::createOrGetReductionRecipe(
+            builder, loc, symAddr, mlirOp, bounds, fastMathAttr);
+        op.setRecipeAttr(recipe);
+        reductionOperands.push_back(op.getAccVar());
+        dataMap.emplaceSymbol(op.getAccVar(),
+                              Fortran::semantics::SymbolRef(sym));
+      }
+    } else if (const auto *localSpec =
+                   std::get_if<Fortran::parser::LocalitySpec::Local>(
+                       &locSpec.u)) {
+      for (const Fortran::parser::Name &name : localSpec->v) {
+        const Fortran::semantics::Symbol &ultimateSym =
+            name.symbol->GetUltimate();
+        mlir::Value symAddr = converter.getSymbolAddress(ultimateSym);
+        assert(symAddr && "expected symbol to have an address");
+
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream asFortran;
+        asFortran << Fortran::lower::mangle::demangleName(
+            toStringRef(ultimateSym.name()));
+        auto op = createDataEntryOp<mlir::acc::PrivateOp>(
+            builder, loc, symAddr, asFortran, bounds, /*structured=*/true,
+            /*implicit=*/false, mlir::acc::DataClause::acc_private,
+            symAddr.getType(),
+            /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+        mlir::SymbolRefAttr recipe =
+            fir::acc::createOrGetPrivateRecipe(builder, loc, symAddr, bounds);
+        op.setRecipeAttr(recipe);
+        privateOperands.push_back(op.getAccVar());
+        dataMap.emplaceSymbol(op.getAccVar(),
+                              Fortran::semantics::SymbolRef(ultimateSym));
+        if (name.symbol->HasLocalLocality())
+          localSymPairs.emplace_back(
+              Fortran::semantics::SymbolRef(*name.symbol),
+              Fortran::semantics::SymbolRef(ultimateSym));
+      }
+    } else if (const auto *localInitSpec =
+                   std::get_if<Fortran::parser::LocalitySpec::LocalInit>(
+                       &locSpec.u)) {
+      for (const Fortran::parser::Name &name : localInitSpec->v) {
+        const Fortran::semantics::Symbol &ultimateSym =
+            name.symbol->GetUltimate();
+        mlir::Value symAddr = converter.getSymbolAddress(ultimateSym);
+        assert(symAddr && "expected symbol to have an address");
+
+        llvm::SmallVector<mlir::Value> bounds;
+        std::stringstream asFortran;
+        asFortran << Fortran::lower::mangle::demangleName(
+            toStringRef(ultimateSym.name()));
+        auto op = createDataEntryOp<mlir::acc::FirstprivateOp>(
+            builder, loc, symAddr, asFortran, bounds, /*structured=*/true,
+            /*implicit=*/false, mlir::acc::DataClause::acc_firstprivate,
+            symAddr.getType(),
+            /*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+        mlir::SymbolRefAttr recipe = fir::acc::createOrGetFirstprivateRecipe(
+            builder, loc, symAddr, bounds);
+        op.setRecipeAttr(recipe);
+        firstprivateOperands.push_back(op.getAccVar());
+        dataMap.emplaceSymbol(op.getAccVar(),
+                              Fortran::semantics::SymbolRef(ultimateSym));
+        if (name.symbol->HasLocalLocality())
+          localSymPairs.emplace_back(
+              Fortran::semantics::SymbolRef(*name.symbol),
+              Fortran::semantics::SymbolRef(ultimateSym));
+      }
+    }
+  }
+}
+
 // Extract loop bounds, steps, induction variables, and privatization info
 // for both DO CONCURRENT and regular do loops
 static void processDoLoopBounds(
@@ -1570,7 +1689,12 @@ static void processDoLoopBounds(
     llvm::SmallVector<mlir::Type> &ivTypes,
     llvm::SmallVector<mlir::Location> &ivLocs,
     llvm::SmallVector<bool> &inclusiveBounds,
-    llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
+    llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess,
+    llvm::SmallVector<mlir::Value> &reductionOperands,
+    llvm::SmallVector<mlir::Value> &firstprivateOperands, AccDataMap &dataMap,
+    llvm::SmallVector<
+        std::pair<Fortran::semantics::SymbolRef, Fortran::semantics::SymbolRef>>
+        &localSymPairs) {
   assert(loopsToProcess > 0 && "expect at least one loop");
   locs.push_back(currentLocation); // Location of the directive
   bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
@@ -1582,9 +1706,13 @@ static void processDoLoopBounds(
         &*outerDoConstruct.GetLoopControl();
     const auto &concurrent =
         std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
-    if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
-             .empty())
-      TODO(currentLocation, "DO CONCURRENT with locality spec inside ACC");
+
+    const auto &localityList =
+        std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t);
+    if (!localityList.empty())
+      processDoConcurrentLocalitySpecs(
+          converter, currentLocation, builder, localityList, privateOperands,
+          firstprivateOperands, reductionOperands, dataMap, localSymPairs);
 
     const auto &concurrentHeader =
         std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
@@ -1832,15 +1960,20 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
   llvm::SmallVector<bool> inclusiveBounds;
   llvm::SmallVector<mlir::Location> locs;
   llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps;
+  llvm::SmallVector<mlir::Value> firstprivateOperands;
+  llvm::SmallVector<
+      std::pair<Fortran::semantics::SymbolRef, Fortran::semantics::SymbolRef>>
+      localSymPairs;
 
   // Look at the do/do concurrent loops to extract bounds information unless
   // this loop is lowered in an unstructured fashion, in which case bounds are
   // not represented on acc.loop and explicit control flow is used inside body.
   if (!eval.lowerAsUnstructured()) {
-    processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
-                        outerDoConstruct, eval, lowerbounds, upperbounds, steps,
-                        privateOperands, ivPrivate, ivTypes, ivLocs,
-                        inclusiveBounds, locs, loopsToProcess);
+    processDoLoopBounds(
+        converter, currentLocation, stmtCtx, builder, outerDoConstruct, eval,
+        lowerbounds, upperbounds, steps, privateOperands, ivPrivate, ivTypes,
+        ivLocs, inclusiveBounds, locs, loopsToProcess, reductionOperands,
+        firstprivateOperands, dataMap, localSymPairs);
   } else {
     // When the loop contains early exits, privatize induction variables, but do
     // not create acc.loop bounds. The control flow of the loop will be
@@ -1860,9 +1993,7 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
   addOperands(operands, operandSegments, tileOperands);
   addOperands(operands, operandSegments, cacheOperands);
   addOperands(operands, operandSegments, privateOperands);
-  // fill empty firstprivate operands since they are not permitted
-  // from OpenACC language perspective.
-  addOperands(operands, operandSegments, {});
+  addOperands(operands, operandSegments, firstprivateOperands);
   addOperands(operands, operandSegments, reductionOperands);
 
   auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(
@@ -1877,6 +2008,13 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
   // Remap symbols from data clauses to use data operation results
   dataMap.remapDataOperandSymbols(converter, builder, loopOp.getRegion());
 
+  // For DO CONCURRENT LOCAL/LOCAL_INIT variables, the body references the
+  // HostAssoc symbol (with LocalityLocal flag), not the ultimate symbol.
+  // Copy the binding from the ultimate to the HostAssoc symbol so lookups
+  // inside the region find the privatized variable.
+  for (auto &[hostAssocSym, ultimateSym] : localSymPairs)
+    converter.copySymbolBinding(ultimateSym, hostAssocSym);
+
   if (!eval.lowerAsUnstructured()) {
     for (auto [arg, iv] :
          llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),

diff  --git a/flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90 b/flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90
new file mode 100644
index 0000000000000..a93f3939c36c1
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-do-concurrent-locality.f90
@@ -0,0 +1,296 @@
+! Test lowering of DO CONCURRENT with locality specs inside ACC constructs.
+! Per OpenACC 2.17.2:
+!   - DO CONCURRENT without a loop construct in a kernels construct is
+!     treated as if annotated with loop auto.
+!   - DO CONCURRENT in a parallel construct or accelerator routine is
+!     treated as if annotated with loop independent.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! ---------------------------------------------------------------------------
+! REDUCE locality spec
+! ---------------------------------------------------------------------------
+
+! Scalar reduction in kernels region (no explicit loop → auto)
+! CHECK-LABEL: func.func @_QPreduce_kernels_region
+subroutine reduce_kernels_region()
+  real :: a(10), s
+  integer :: i
+  s = 0.
+  !$acc kernels
+  do concurrent(i=1:10) reduce(+:s)
+    s = s + a(i)
+  end do
+  !$acc end kernels
+end subroutine
+
+! CHECK: acc.kernels {
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! Scalar reduction in parallel region (no explicit loop → independent)
+! CHECK-LABEL: func.func @_QPreduce_parallel_region
+subroutine reduce_parallel_region()
+  real :: a(10), s
+  integer :: i
+  s = 0.
+  !$acc parallel
+  do concurrent(i=1:10) reduce(+:s)
+    s = s + a(i)
+  end do
+  !$acc end parallel
+end subroutine
+
+! CHECK: acc.parallel {
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! Combined kernels loop with reduce (auto)
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop
+subroutine reduce_kernels_loop()
+  real :: a(16,16), b(16,16), s
+  integer :: i, j
+  s = 0.
+  !$acc kernels loop
+  do concurrent(i=1:16, j=1:16) reduce(+:s)
+    b(i,j) = a(i,j)**2
+    s = s + b(i,j)
+  end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true>}
+
+! Combined parallel loop with reduce (independent)
+! CHECK-LABEL: func.func @_QPreduce_parallel_loop
+subroutine reduce_parallel_loop()
+  real :: a(10), s
+  integer :: i
+  s = 0.
+  !$acc parallel loop
+  do concurrent(i=1:10) reduce(+:s)
+    s = s + a(i)
+  end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(parallel) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! Multiple reductions (add + multiply)
+! CHECK-LABEL: func.func @_QPmulti_reduce
+subroutine multi_reduce()
+  real :: a(10), s, p
+  integer :: i
+  s = 0.
+  p = 1.
+  !$acc parallel loop
+  do concurrent(i=1:10) reduce(+:s) reduce(*:p)
+    s = s + a(i)
+    p = p * a(i)
+  end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_mul{{.*}}) -> !fir.ref<f32> {name = "p"}
+! CHECK: acc.loop {{.*}}reduction(
+
+! Max/min reductions
+! CHECK-LABEL: func.func @_QPreduce_max_min
+subroutine reduce_max_min()
+  real :: a(10), mx, mn
+  integer :: i
+  mx = -huge(mx)
+  mn = huge(mn)
+  !$acc kernels loop
+  do concurrent(i=1:10) reduce(max:mx) reduce(min:mn)
+    mx = max(mx, a(i))
+    mn = min(mn, a(i))
+  end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_max{{.*}}) -> !fir.ref<f32> {name = "mx"}
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_min{{.*}}) -> !fir.ref<f32> {name = "mn"}
+! CHECK: acc.loop {{.*}}reduction(
+
+! Integer multiply reduction
+! CHECK-LABEL: func.func @_QPint_reduce
+subroutine int_reduce()
+  integer :: a(10), prod, i
+  prod = 1
+  !$acc kernels loop
+  do concurrent(i=1:10) reduce(*:prod)
+    prod = prod * a(i)
+  end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) recipe(@reduction_mul{{.*}}) -> !fir.ref<i32> {name = "prod"}
+
+! ---------------------------------------------------------------------------
+! LOCAL locality spec → acc.private
+! ---------------------------------------------------------------------------
+
+! LOCAL in kernels region (auto)
+! CHECK-LABEL: func.func @_QPlocal_kernels_region
+subroutine local_kernels_region()
+  real :: a(10), tmp
+  integer :: i
+  !$acc kernels
+  do concurrent(i=1:10) local(tmp)
+    tmp = a(i) * 2.0
+    a(i) = tmp + 1.0
+  end do
+  !$acc end kernels
+end subroutine
+
+! CHECK: acc.kernels {
+! CHECK: %[[PRIV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "tmp"}
+! CHECK: acc.loop private(%[[PRIV]],
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! LOCAL in parallel region (independent)
+! CHECK-LABEL: func.func @_QPlocal_parallel_region
+subroutine local_parallel_region()
+  real :: a(10), tmp
+  integer :: i
+  !$acc parallel
+  do concurrent(i=1:10) local(tmp)
+    tmp = a(i) * 2.0
+    a(i) = tmp + 1.0
+  end do
+  !$acc end parallel
+end subroutine
+
+! CHECK: acc.parallel {
+! CHECK: %[[PRIV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "tmp"}
+! CHECK: acc.loop private(%[[PRIV]],
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! ---------------------------------------------------------------------------
+! LOCAL_INIT locality spec → acc.firstprivate
+! ---------------------------------------------------------------------------
+
+! LOCAL_INIT in kernels region (auto)
+! CHECK-LABEL: func.func @_QPlocal_init_kernels_region
+subroutine local_init_kernels_region()
+  real :: a(10), scale
+  integer :: i
+  scale = 2.0
+  !$acc kernels
+  do concurrent(i=1:10) local_init(scale)
+    a(i) = a(i) * scale
+  end do
+  !$acc end kernels
+end subroutine
+
+! CHECK: acc.kernels {
+! CHECK: %[[FP:.*]] = acc.firstprivate varPtr(%{{.*}} : !fir.ref<f32>) recipe(@firstprivatization_ref_f32) -> !fir.ref<f32> {name = "scale"}
+! CHECK: acc.loop {{.*}}firstprivate(%[[FP]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! ---------------------------------------------------------------------------
+! Mixed locality specs: REDUCE + LOCAL
+! ---------------------------------------------------------------------------
+
+! CHECK-LABEL: func.func @_QPmixed_locality
+subroutine mixed_locality()
+  real :: a(10), s, tmp
+  integer :: i
+  s = 0.
+  !$acc parallel loop
+  do concurrent(i=1:10) reduce(+:s) local(tmp)
+    tmp = a(i) * a(i)
+    s = s + tmp
+  end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK-DAG: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK-DAG: %[[PRIV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<f32>) recipe(@privatization_ref_f32) -> !fir.ref<f32> {name = "tmp"}
+! CHECK: acc.loop {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+
+! ---------------------------------------------------------------------------
+! Reduce combined with explicit ACC reduction clause
+! ---------------------------------------------------------------------------
+
+! CHECK-LABEL: func.func @_QPreduce_with_acc_clause
+subroutine reduce_with_acc_clause()
+  real :: a(10), s1, s2
+  integer :: i
+  s1 = 0.
+  s2 = 0.
+  !$acc parallel loop reduction(+:s1)
+  do concurrent(i=1:10) reduce(+:s2)
+    s1 = s1 + a(i)
+    s2 = s2 + a(i) * 2.0
+  end do
+end subroutine
+
+! CHECK: acc.parallel combined(loop)
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s1"}
+! CHECK-DAG: acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s2"}
+! CHECK: acc.loop {{.*}}reduction(
+
+! ---------------------------------------------------------------------------
+! Explicit loop par mode clauses: auto, seq, independent
+! ---------------------------------------------------------------------------
+
+! kernels loop auto with reduce
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop_auto
+subroutine reduce_kernels_loop_auto()
+  real :: a(10), s
+  integer :: i
+  s = 0.
+  !$acc kernels loop auto
+  do concurrent(i=1:10) reduce(+:s)
+    s = s + a(i)
+  end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {auto_ = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>}
+
+! kernels loop seq with reduce
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop_seq
+subroutine reduce_kernels_loop_seq()
+  real :: a(10), s
+  integer :: i
+  s = 0.
+  !$acc kernels loop seq
+  do concurrent(i=1:10) reduce(+:s)
+    s = s + a(i)
+  end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]}
+
+! kernels loop independent with reduce
+! CHECK-LABEL: func.func @_QPreduce_kernels_loop_independent
+subroutine reduce_kernels_loop_independent()
+  real :: a(10), s
+  integer :: i
+  s = 0.
+  !$acc kernels loop independent
+  do concurrent(i=1:10) reduce(+:s)
+    s = s + a(i)
+  end do
+end subroutine
+
+! CHECK: acc.kernels combined(loop)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<f32>) recipe(@reduction_add{{.*}}) -> !fir.ref<f32> {name = "s"}
+! CHECK: acc.loop combined(kernels) {{.*}}reduction(%[[RED]] : !fir.ref<f32>)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}


        


More information about the flang-commits mailing list