[flang-commits] [flang] [flang][acc] remap symbol appearing in reduction clause (PR #168876)
via flang-commits
flang-commits at lists.llvm.org
Thu Nov 20 06:10:11 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-openacc
Author: None (jeanPerier)
<details>
<summary>Changes</summary>
This patch is a follow-up of #<!-- -->162306 for the reduction clause.
Inside the compute region that carries the reduction clause, a new hlfir.declare is generated for symbol appearing in the reduction clause. The input of this hlfir.declare is the acc.reduction result. The related semantics::Symbol is remapped to the hlfir.declare result so that any reference to the symbol inside the compute region will use this SSA value as the starting point instead of the SSA value for the host address.
---
Full diff: https://github.com/llvm/llvm-project/pull/168876.diff
2 Files Affected:
- (modified) flang/lib/Lower/OpenACC.cpp (+23-12)
- (added) flang/test/Lower/OpenACC/acc-reduction-remapping.f90 (+160)
``````````diff
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 98a3aced3f528..d10169e236471 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1622,16 +1622,18 @@ static bool isSupportedReductionType(mlir::Type ty) {
return fir::isa_trivial(ty);
}
-static void
-genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
- Fortran::lower::AbstractConverter &converter,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::StatementContext &stmtCtx,
- llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
- llvm::SmallVector<mlir::Attribute> &reductionRecipes,
- llvm::ArrayRef<mlir::Value> async,
- llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
- llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
+static void genReductions(
+ const Fortran::parser::AccObjectListWithReduction &objectList,
+ Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ llvm::SmallVectorImpl<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Attribute> &reductionRecipes,
+ llvm::ArrayRef<mlir::Value> async,
+ llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
+ llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
const auto &objects = std::get<Fortran::parser::AccObjectList>(objectList.t);
const auto &op = std::get<Fortran::parser::ReductionOperator>(objectList.t);
@@ -1644,6 +1646,8 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
Fortran::semantics::Symbol &symbol = getSymbolFromAccObject(accObject);
Fortran::semantics::MaybeExpr designator = Fortran::common::visit(
[&](auto &&s) { return ea.Analyze(s); }, accObject.u);
+ bool isWholeSymbol =
+ !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
fir::factory::AddrAndBoundsInfo info =
Fortran::lower::gatherDataOperandAddrAndBounds<
mlir::acc::DataBoundsOp, mlir::acc::DataBoundsType>(
@@ -1680,6 +1684,11 @@ genReductions(const Fortran::parser::AccObjectListWithReduction &objectList,
reductionRecipes.push_back(mlir::SymbolRefAttr::get(
builder.getContext(), recipe.getSymName().str()));
reductionOperands.push_back(op.getAccVar());
+ // Track the symbol and its corresponding mlir::Value if requested so that
+ // accesses inside the compute/loop regions use the acc.reduction variable.
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
}
}
@@ -2545,7 +2554,8 @@ static mlir::acc::LoopOp createLoopOp(
&clause.u)) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, /*async=*/{},
- /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
+ &dataOperandSymbolPairs);
} else if (std::get_if<Fortran::parser::AccClause::Seq>(&clause.u)) {
for (auto crtDeviceTypeAttr : crtDeviceTypes)
seqDeviceTypes.push_back(crtDeviceTypeAttr);
@@ -2995,7 +3005,8 @@ static Op createComputeOp(
if (!combinedConstructs) {
genReductions(reductionClause->v, converter, semanticsContext, stmtCtx,
reductionOperands, reductionRecipes, async,
- asyncDeviceTypes, asyncOnlyDeviceTypes);
+ asyncDeviceTypes, asyncOnlyDeviceTypes,
+ &dataOperandSymbolPairs);
} else {
auto crtDataStart = dataClauseOperands.size();
genDataOperandOperations<mlir::acc::CopyinOp>(
diff --git a/flang/test/Lower/OpenACC/acc-reduction-remapping.f90 b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
new file mode 100644
index 0000000000000..6ee365f278678
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-reduction-remapping.f90
@@ -0,0 +1,160 @@
+! Test remapping of variables appearing in OpenACC reduction clause
+! to the related acc dialect data operation result.
+
+! This tests checks how the hlfir.declare is recreated and used inside
+! the acc compute region.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+subroutine scalar_combined(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine scalar_split(x, y)
+ real :: y, x(100)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do i=1,100
+ y = y + x(i)
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_combined(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+subroutine array_split(x, y, n)
+ integer(8) :: n
+ real :: y(n), x(100, n)
+ !$acc parallel copyin(x) copyout(y)
+ !$acc loop reduction(+:y)
+ do j=1,n
+ do i=1,100
+ y(j) = y(j) + x(i, j)
+ end do
+ end do
+ !$acc end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPscalar_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: acc.parallel {{.*}} reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED_PAR:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_combinedEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DECLARE_I:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFscalar_combinedEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_0:.*]] to %[[DECLARE_I]]#0 : !fir.ref<i32>
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED_PAR]]#0 : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.ref<!fir.array<100xf32>>, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED_PAR]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+!
+!
+! CHECK-LABEL: func.func @_QPscalar_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create varPtr(%[[DECLARE_Y]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[CREATE_Y]] dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%[[DECLARE_Y_PAR]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {name = "y"}
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_f32 -> %[[REDUCTION_Y]] : !fir.ref<f32>) {{.*}} {
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_RED:.*]]:2 = hlfir.declare %[[REDUCTION_Y]] dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFscalar_splitEy"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[LOAD_RED:.*]] = fir.load %[[DECLARE_RED]]#0 : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_RED]], {{.*}} {{.*}}: f32
+! CHECK: hlfir.assign %[[ADDF]] to %[[DECLARE_RED]]#0 : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_combined(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_N:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %[[DUMMY_SCOPE_0]] arg 3 {uniq_name = "_QFarray_combinedEn"} : (!fir.ref<i64>, !fir.dscope) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_combinedEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
+
+
+! CHECK-LABEL: func.func @_QParray_split(
+! CHECK: %[[DUMMY_SCOPE_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y:.*]]:2 = hlfir.declare %{{.*}}({{.*}}) dummy_scope %[[DUMMY_SCOPE_0]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[CREATE_Y:.*]] = acc.create var(%[[DECLARE_Y]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copyout>, name = "y"}
+! CHECK: acc.parallel dataOperands({{.*}}%[[CREATE_Y]] : {{.*}}) {
+! CHECK: %[[DUMMY_SCOPE_1:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[BOX_ADDR_Y:.*]] = fir.box_addr %[[CREATE_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DECLARE_Y_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_Y]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_1]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction var(%[[DECLARE_Y_PAR]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "y"}
+! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "j"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) reduction(@reduction_add_box_Uxf32 -> %[[REDUCTION_Y]] : !fir.box<!fir.array<?xf32>>) {{.*}} {
+! CHECK: %[[BOX_ADDR_RED:.*]] = fir.box_addr %[[REDUCTION_Y]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[DUMMY_SCOPE_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[DECLARE_Y_LOOP_PAR:.*]]:2 = hlfir.declare %[[BOX_ADDR_RED]]({{.*}}) dummy_scope %[[DUMMY_SCOPE_2]] arg 2 {uniq_name = "_QFarray_splitEy"} : (!fir.ref<!fir.array<?xf32>>, {{.*}}, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr({{.*}}) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) {{.*}} {
+! CHECK: %[[DESIGNATE_RED:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: %[[LOAD_OLD:.*]] = fir.load %[[DESIGNATE_RED]] : !fir.ref<f32>
+! CHECK: {{.*}} = hlfir.designate {{.*}} : (!fir.box<!fir.array<100x?xf32>>, i64, i64) -> !fir.ref<f32>
+! CHECK: {{.*}} = fir.load {{.*}} : !fir.ref<f32>
+! CHECK: %[[ADDF:.*]] = arith.addf %[[LOAD_OLD]], {{.*}} {{.*}}: f32
+! CHECK: %[[DESIGNATE_RED2:.*]] = hlfir.designate %[[DECLARE_Y_LOOP_PAR]]#0 ({{.*}}) : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[ADDF]] to %[[DESIGNATE_RED2]] : f32, !fir.ref<f32>
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: return
+! CHECK: }
``````````
</details>
https://github.com/llvm/llvm-project/pull/168876
More information about the flang-commits
mailing list