[flang-commits] [flang] [flang][openacc] Add implicit copy for reduction in combined construct (PR #70148)

Razvan Lupusoru via flang-commits flang-commits at lists.llvm.org
Tue Oct 24 17:25:49 PDT 2023


https://github.com/razvanlupusoru created https://github.com/llvm/llvm-project/pull/70148

After PR#69417, lowering for combined constructs was updated to adhere to OpenACC 3.3, section 2.11: `A private or reduction clause on a combined construct is treated as if it appeared on the loop construct.`

However, the second part of that paragraph notes `In addition, a reduction clause on a combined construct implies a copy clause`. Since the acc dialect decomposes combined constructs, it is important to distinguish between the case where an explicit data clause is required (as noted in section 2.6.2) and the case where an implicit data action must be generated by compiler.

>From d4c1f5050aac70537a34c6bd060eb5813a6b0395 Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 24 Oct 2023 17:24:38 -0700
Subject: [PATCH] [flang][openacc] Add implicit copy for reduction in combined
 construct

After PR#69417, lowering for combined constructs was updated to adhere
to OpenACC 3.3, section 2.11: `A private or reduction clause on a
combined construct is treated as if it appeared on the loop construct.`

However, the second part of that paragraph notes `In addition, a
reduction clause on a combined construct implies a copy clause`. Since
the acc dialect decomposes combined constructs, it is important to
distinguish between the case where an explicit data clause is required
(as noted in section 2.6.2) and the case where an implicit data action
must be generated by compiler.
---
 flang/lib/Lower/OpenACC.cpp                   | 56 +++++++++++--------
 flang/test/Lower/OpenACC/acc-kernels-loop.f90 |  6 +-
 .../test/Lower/OpenACC/acc-parallel-loop.f90  |  6 +-
 flang/test/Lower/OpenACC/acc-serial-loop.f90  |  6 +-
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp       |  6 +-
 5 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 90644279d9e78ce..5c29c781417301b 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -336,7 +336,7 @@ static void genDeclareDataOperandOperationsWithModifier(
 template <typename EntryOp, typename ExitOp>
 static void genDataExitOperations(fir::FirOpBuilder &builder,
                                   llvm::SmallVector<mlir::Value> operands,
-                                  bool structured, bool implicit) {
+                                  bool structured) {
   for (mlir::Value operand : operands) {
     auto entryOp = mlir::dyn_cast_or_null<EntryOp>(operand.getDefiningOp());
     assert(entryOp && "data entry op expected");
@@ -346,7 +346,7 @@ static void genDataExitOperations(fir::FirOpBuilder &builder,
       varPtr = entryOp.getVarPtr();
     builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
                            entryOp.getBounds(), entryOp.getDataClause(),
-                           structured, implicit,
+                           structured, entryOp.getImplicit(),
                            builder.getStringAttr(*entryOp.getName()));
   }
 }
@@ -1872,9 +1872,24 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
     } else if (const auto *reductionClause =
                    std::get_if<Fortran::parser::AccClause::Reduction>(
                        &clause.u)) {
-      if (!outerCombined)
+      // A reduction clause on a combined construct is treated as if it appeared
+      // on the loop construct. So don't generate a reduction clause when it is
+      // combined - delay it to the loop. However, a reduction clause on a
+      // combined construct implies a copy clause so issue an implicit copy
+      // instead.
+      if (!outerCombined) {
         genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
                       reductionOperands, reductionRecipes);
+      } else {
+        auto crtDataStart = dataClauseOperands.size();
+        genDataOperandOperations<mlir::acc::CopyinOp>(
+            std::get<Fortran::parser::AccObjectList>(reductionClause->v.t),
+            converter, semanticsContext, stmtCtx, dataClauseOperands,
+            mlir::acc::DataClause::acc_reduction,
+            /*structured=*/true, /*implicit=*/true);
+        copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
+                                 dataClauseOperands.end());
+      }
     } else if (const auto *defaultClause =
                    std::get_if<Fortran::parser::AccClause::Default>(
                        &clause.u)) {
@@ -1944,13 +1959,13 @@ createComputeOp(Fortran::lower::AbstractConverter &converter,
 
   // Create the exit operations after the region.
   genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
-      builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, copyEntryOperands, /*structured=*/true);
   genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
-      builder, copyoutEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, copyoutEntryOperands, /*structured=*/true);
   genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
-      builder, attachEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, attachEntryOperands, /*structured=*/true);
   genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
-      builder, createEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, createEntryOperands, /*structured=*/true);
 
   builder.restoreInsertionPoint(insPt);
   return computeOp;
@@ -2099,13 +2114,13 @@ static void genACCDataOp(Fortran::lower::AbstractConverter &converter,
 
   // Create the exit operations after the region.
   genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
-      builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, copyEntryOperands, /*structured=*/true);
   genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
-      builder, copyoutEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, copyoutEntryOperands, /*structured=*/true);
   genDataExitOperations<mlir::acc::AttachOp, mlir::acc::DetachOp>(
-      builder, attachEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, attachEntryOperands, /*structured=*/true);
   genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
-      builder, createEntryOperands, /*structured=*/true, /*implicit=*/false);
+      builder, createEntryOperands, /*structured=*/true);
 
   builder.restoreInsertionPoint(insPt);
 }
@@ -2415,11 +2430,11 @@ genACCExitDataOp(Fortran::lower::AbstractConverter &converter,
     exitDataOp.setFinalizeAttr(builder.getUnitAttr());
 
   genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::CopyoutOp>(
-      builder, copyoutOperands, /*structured=*/false, /*implicit=*/false);
+      builder, copyoutOperands, /*structured=*/false);
   genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DeleteOp>(
-      builder, deleteOperands, /*structured=*/false, /*implicit=*/false);
+      builder, deleteOperands, /*structured=*/false);
   genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::DetachOp>(
-      builder, detachOperands, /*structured=*/false, /*implicit=*/false);
+      builder, detachOperands, /*structured=*/false);
 }
 
 template <typename Op>
@@ -2594,7 +2609,7 @@ genACCUpdateOp(Fortran::lower::AbstractConverter &converter,
       builder, currentLocation, operands, operandSegments);
 
   genDataExitOperations<mlir::acc::GetDevicePtrOp, mlir::acc::UpdateHostOp>(
-      builder, updateHostOperands, /*structured=*/false, /*implicit=*/false);
+      builder, updateHostOperands, /*structured=*/false);
 
   if (addAsyncAttr)
     updateOp.setAsyncAttr(builder.getUnitAttr());
@@ -3054,17 +3069,14 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
       builder.setInsertionPointAfter(declareOp);
     }
     genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
-        builder, createEntryOperands, /*structured=*/true,
-        /*implicit=*/false);
+        builder, createEntryOperands, /*structured=*/true);
     genDataExitOperations<mlir::acc::DeclareDeviceResidentOp,
                           mlir::acc::DeleteOp>(
-        builder, deviceResidentEntryOperands, /*structured=*/true,
-        /*implicit=*/false);
+        builder, deviceResidentEntryOperands, /*structured=*/true);
     genDataExitOperations<mlir::acc::CopyinOp, mlir::acc::CopyoutOp>(
-        builder, copyEntryOperands, /*structured=*/true, /*implicit=*/false);
+        builder, copyEntryOperands, /*structured=*/true);
     genDataExitOperations<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
-        builder, copyoutEntryOperands, /*structured=*/true,
-        /*implicit=*/false);
+        builder, copyoutEntryOperands, /*structured=*/true);
   });
 }
 
diff --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
index e708ca65e7c14ae..8679e5f6ccd5f24 100644
--- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -751,12 +751,16 @@ subroutine acc_kernels_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.kernels {
+! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
+! CHECK:      acc.kernels dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
 ! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
 ! CHECK-NEXT: }{{$}}
+! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 
 end subroutine
diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index eea4950b6d38f92..e0a29273bd783e7 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -770,13 +770,17 @@ subroutine acc_parallel_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.parallel {
+! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
+! CHECK:      acc.parallel dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
 ! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
+! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 
   !$acc parallel loop
   do 10 i=0, n
diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index fb7e3615b698c1c..c94b5a577350a99 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -705,12 +705,16 @@ subroutine acc_serial_loop
     reduction_i = 1
   end do
 
-! CHECK:      acc.serial {
+! CHECK:      %[[COPYINREDR:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK:      %[[COPYINREDI:.*]] = acc.copyin varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
+! CHECK:      acc.serial dataOperands(%[[COPYINREDR]], %[[COPYINREDI]] : !fir.ref<f32>, !fir.ref<i32>) {
 ! CHECK:        acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref<f32>, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
 ! CHECK:          fir.do_loop
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
 ! CHECK-NEXT: }{{$}}
+! CHECK:      acc.copyout accPtr(%[[COPYINREDR]] : !fir.ref<f32>) to varPtr(%{{.*}} : !fir.ref<f32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_r"}
+! CHECK:      acc.copyout accPtr(%[[COPYINREDI]] : !fir.ref<i32>) to varPtr(%{{.*}} : !fir.ref<i32>) {dataClause = #acc<data_clause acc_reduction>, implicit = true, name = "reduction_i"}
 
 end subroutine acc_serial_loop
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index b7e2aec6a4e6a92..98c800033cbe91c 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -131,7 +131,8 @@ LogicalResult acc::CopyinOp::verify() {
   // Test for all clauses this operation can be decomposed from:
   if (!getImplicit() && getDataClause() != acc::DataClause::acc_copyin &&
       getDataClause() != acc::DataClause::acc_copyin_readonly &&
-      getDataClause() != acc::DataClause::acc_copy)
+      getDataClause() != acc::DataClause::acc_copy &&
+      getDataClause() != acc::DataClause::acc_reduction)
     return emitError(
         "data clause associated with copyin operation must match its intent"
         " or specify original clause this operation was decomposed from");
@@ -212,7 +213,8 @@ LogicalResult acc::CopyoutOp::verify() {
   // Test for all clauses this operation can be decomposed from:
   if (getDataClause() != acc::DataClause::acc_copyout &&
       getDataClause() != acc::DataClause::acc_copyout_zero &&
-      getDataClause() != acc::DataClause::acc_copy)
+      getDataClause() != acc::DataClause::acc_copy &&
+      getDataClause() != acc::DataClause::acc_reduction)
     return emitError(
         "data clause associated with copyout operation must match its intent"
         " or specify original clause this operation was decomposed from");



More information about the flang-commits mailing list