[flang-commits] [flang] [flang][acc] remap symbol appearing in reduction clause (PR #168876)
via flang-commits
flang-commits at lists.llvm.org
Thu Nov 20 06:09:36 PST 2025
https://github.com/jeanPerier created https://github.com/llvm/llvm-project/pull/168876
This patch is a follow-up of #162306 for the reduction clause.
Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address.
>From 901d0cf65f8dbc09e795c72d96915c56547d424e Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier at nvidia.com>
Date: Thu, 20 Nov 2025 06:02:40 -0800
Subject: [PATCH] [flang][acc] remap symbol appearing in reduction clause
---
flang/lib/Lower/OpenACC.cpp | 35 ++--
.../Lower/OpenACC/acc-reduction-remapping.f90 | 160 ++++++++++++++++++
2 files changed, 183 insertions(+), 12 deletions(-)
create mode 100644 flang/test/Lower/OpenACC/acc-reduction-remapping.f90
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 98a3aced3f528..d10169e236471 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1622,16 +1622,18 @@ static bool isSupportedReductionType(mlir::Type ty) {
return fir::isa_trivial(ty);
}
-static void
-genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
- Fortran::lower::AbstractConverter &converter,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::StatementContext &stmtCtx,
- llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
- llvm::SmallVector<mlir::Attribute> &reductionRecipes,
- llvm::ArrayRef<mlir::Value> async,
- llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
- llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
+static void genReductions(
+ const Fortran::parser::AccObjectListWithReduction &objectList,
+ Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Attribute> &reductionRecipes,
+ llvm::ArrayRef<mlir::Value> async,
+ llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
+ llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
@@ -1644,6 +1646,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
[&](auto &&s) { return ea.Analyze(s); }, accObject.u);
+ bool isWholeSymbol =
+ !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
fir::factory::AddrAndBoundsInfo info =
Fortran::lower::gatherDataOperandAddrAndBounds<
mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
@@ -1680,6 +1684,11 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
reductionRecipes.push_back(mlir::SymbolRefAttr::get(
builder.getContext(), recipe.getSymName().str()));
reductionOperands.push_back(op.getAccVar());
+ // Track the symbol and its corresponding mlir::Value if requested so that
+ // accesses inside the compute/loop regions use the acc.reduction variable.
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
}
}
@@ -2545,7 +2554,8 @@ static mlir::acc::LoopOp createLoopOp(
&clause.u)) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, /*async=*/{},
- /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
+ &dataOperandSymbolPairs);
} else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
for (auto crtDeviceTypeAttr : crtDeviceTypes)
seqDeviceTypes.push_back(crtDeviceTypeAttr);
@@ -2995,7 +3005,8 @@ static Op createComputeOp(
if (!combinedConstructs) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, async,
- asyncDeviceTypes, asyncOnlyDeviceTypes);
+ asyncDeviceTypes, asyncOnlyDeviceTypes,
+ &dataOperandSymbolPairs);
} else {
auto crtDataStart = dataClauseOperands.size();
genDataOperandOperations<mlir::acc::CopyinOp>(
diff --git a/flang/test/Lower/OpenACC/acc-reduction-remapping.f90 b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
new file mode 100644
index 0000000000000..6ee365f278678
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
@@ -0,0 +1,160 @@
+! Test remapping of variables appearing in OpenACC reduction clause
+! to the related acc dialect data operation result.
+
+! This tests checks how the hlfir.declare is recreated and used inside
+! the acc compute region.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+subroutine scalar_combined(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine scalar_split(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_combined(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_split(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPscalar_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: acc.parallel {{.*}} reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED_PAR:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFscalar_combinedEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_0:.*]] to %[[DECLARE_I]]#0 : !fir.ref<i32>
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED_PAR]]#0 : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.ref<!fir.array<100xf32>>, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED_PAR]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+!
+!
+! CHECK-LABEL: func.func @_QPscalar_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[CREATE_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y_PAR]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {{.*}} {
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED]]#0 : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 3 {uniq_name = "_QFarray_combinedEn"} : (!fir.ref<i64>, !fir.dscope) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_Y:.*]] = fir.box_addr %[[CREATE_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_Y]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y_PAR]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {{.*}} {
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_LOOP_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
More information about the flang-commits
mailing list