[flang-commits] [flang] 077a280 - [flang][acc] remap symbol appearing in reduction clause (#168876)
via flang-commits
flang-commits at lists.llvm.org
Tue Nov 25 06:25:19 PST 2025
Author: jeanPerier
Date: 2025-11-25T15:25:15+01:00
New Revision: 077a280cf586b29c6aa37a17637bcb6b91dc121c
URL: https://github.com/llvm/llvm-project/commit/077a280cf586b29c6aa37a17637bcb6b91dc121c
DIFF: https://github.com/llvm/llvm-project/commit/077a280cf586b29c6aa37a17637bcb6b91dc121c.diff
LOG: [flang][acc] remap symbol appearing in reduction clause (#168876)
This patch is a follow-up of #162306 for the reduction clause.
Inside the compute region that carries the reduction clause, a new
hlfir.declare is generated for symbol appearing in the reduction clause.
The input of this hlfir.declare is the acc.reduction result. The related
semantics::Symbol is remapped to the hlfir.declare result so that any
reference to the symbol inside the compute region will use this SSA
value as the starting point instead of the SSA value for the host
address.
Added:
flang/test/Lower/OpenACC/acc-reduction-remapping.f90
Modified:
flang/lib/Lower/OpenACC.cpp
Removed:
################################################################################
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 98a3aced3f528..d10169e236471 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1622,16 +1622,18 @@ static bool isSupportedReductionType(mlir::Type ty) {
return fir::isa_trivial(ty);
}
-static void
-genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
- Fortran::lower::AbstractConverter &converter,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::StatementContext &stmtCtx,
- llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
- llvm::SmallVector<mlir::Attribute> &reductionRecipes,
- llvm::ArrayRef<mlir::Value> async,
- llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
- llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
+static void genReductions(
+ const Fortran::parser::AccObjectListWithReduction &objectList,
+ Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Attribute> &reductionRecipes,
+ llvm::ArrayRef<mlir::Value> async,
+ llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
+ llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
@@ -1644,6 +1646,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
[&](auto &&s) { return ea.Analyze(s); }, accObject.u);
+ bool isWholeSymbol =
+ !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
fir::factory::AddrAndBoundsInfo info =
Fortran::lower::gatherDataOperandAddrAndBounds<
mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
@@ -1680,6 +1684,11 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
reductionRecipes.push_back(mlir::SymbolRefAttr::get(
builder.getContext(), recipe.getSymName().str()));
reductionOperands.push_back(op.getAccVar());
+ // Track the symbol and its corresponding mlir::Value if requested so that
+ // accesses inside the compute/loop regions use the acc.reduction variable.
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
}
}
@@ -2545,7 +2554,8 @@ static mlir::acc::LoopOp createLoopOp(
&clause.u)) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, /*async=*/{},
- /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
+ &dataOperandSymbolPairs);
} else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
for (auto crtDeviceTypeAttr : crtDeviceTypes)
seqDeviceTypes.push_back(crtDeviceTypeAttr);
@@ -2995,7 +3005,8 @@ static Op createComputeOp(
if (!combinedConstructs) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, async,
- asyncDeviceTypes, asyncOnlyDeviceTypes);
+ asyncDeviceTypes, asyncOnlyDeviceTypes,
+ &dataOperandSymbolPairs);
} else {
auto crtDataStart = dataClauseOperands.size();
genDataOperandOperations<mlir::acc::CopyinOp>(
diff --git a/flang/test/Lower/OpenACC/acc-reduction-remapping.f90 b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
new file mode 100644
index 0000000000000..6ee365f278678
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
@@ -0,0 +1,160 @@
+! Test remapping of variables appearing in OpenACC reduction clause
+! to the related acc dialect data operation result.
+
+! This tests checks how the hlfir.declare is recreated and used inside
+! the acc compute region.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+subroutine scalar_combined(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine scalar_split(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_combined(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_split(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPscalar_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: acc.parallel {{.*}} reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED_PAR:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFscalar_combinedEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_0:.*]] to %[[DECLARE_I]]#0 : !fir.ref<i32>
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED_PAR]]#0 : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.ref<!fir.array<100xf32>>, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED_PAR]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+!
+!
+! CHECK-LABEL: func.func @_QPscalar_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[CREATE_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y_PAR]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {{.*}} {
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED]]#0 : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 3 {uniq_name = "_QFarray_combinedEn"} : (!fir.ref<i64>, !fir.dscope) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_Y:.*]] = fir.box_addr %[[CREATE_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_Y]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y_PAR]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {{.*}} {
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_LOOP_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
More information about the flang-commits
mailing list