[flang-commits] [flang] [flang] Support `do concurrent ... reduce` for associating names (PR #148597)
via flang-commits
flang-commits at lists.llvm.org
Mon Jul 14 02:27:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Kareem Ergawy (ergawy)
<details>
<summary>Changes</summary>
Extends reduction support for `do concurrent`, in particular, for associating names. Consider the following input:
```fortran
subroutine dc_associate_reduce
integer :: i
real, allocatable, dimension(:) :: x
associate(x_associate => x)
do concurrent (i = 1:10) reduce(+: x_associate)
end do
end associate
end subroutine
```
The declaration of `x_associate` is emitted as follows:
```mlir
%13:2 = hlfir.declare %10(%12) {uniq_name = "...."} : (!fir.heap<!fir.array<?xf32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xf32>>, !fir.heap<!fir.array<?xf32>>)
```
where the HLFIR base type is an array descriptor (i.e. the allocatable/heap attribute is dropped as stipulated by the spec; section 11.1.3.3).
The problem here is that `declare_reduction` ops accept only reference types. This restriction is alreay partially handed for `fir::BaseBoxType`'s by allocating a stack slot for the descriptor and storing the box in that stack allocation. We have to modify this a littble bit for `associate` since the HLFIR and FIR base types are different (unlike most scenarios).
---
Full diff: https://github.com/llvm/llvm-project/pull/148597.diff
7 Files Affected:
- (modified) flang/lib/Lower/Support/ReductionProcessor.cpp (+28-6)
- (modified) flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90 (+1-1)
- (modified) flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 (+2-2)
- (modified) flang/test/Lower/OpenMP/sections-array-reduction.f90 (+1-1)
- (modified) flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 (+1-1)
- (modified) flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 (+1-1)
- (added) flang/test/Lower/do_concurrent_reduce_associate.f90 (+20)
``````````diff
diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp
index 14b2c9836748f..ddcecd8c1bb02 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -633,13 +633,25 @@ void ReductionProcessor::processReductionArguments(
}
}
- fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
-
// Reduction variable processing common to both intrinsic operators and
// procedure designators
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+ mlir::OpBuilder::InsertPoint dcIP;
+ constexpr bool isDoConcurrent =
+ std::is_same_v<OpType, fir::DeclareReductionOp>;
+
+ if (isDoConcurrent) {
+ dcIP = builder.saveInsertionPoint();
+ builder.setInsertionPoint(
+ builder.getRegion().getParentOfType<fir::DoConcurrentOp>());
+ }
+
for (const semantics::Symbol *symbol : reductionSymbols) {
mlir::Value symVal = converter.getSymbolAddress(*symbol);
+
+ if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
+ symVal = declOp.getBase();
+
mlir::Type eleType;
auto refType = mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType());
if (refType)
@@ -667,13 +679,20 @@ void ReductionProcessor::processReductionArguments(
// boxed arrays are passed as values not by reference. Unfortunately,
// we can't pass a box by value to omp.redution_declare, so turn it
// into a reference
+ auto oldIP = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(builder.getAllocaBlock());
+ auto alloca =
+ builder.create<fir::AllocaOp>(currentLocation, symVal.getType());
+ builder.restoreInsertionPoint(oldIP);
+ builder.create<fir::StoreOp>(currentLocation, symVal, alloca);
+ symVal = alloca;
+ }
+ if (mlir::isa<fir::BaseBoxType>(symVal.getType())) {
auto alloca =
builder.create<fir::AllocaOp>(currentLocation, symVal.getType());
builder.create<fir::StoreOp>(currentLocation, symVal, alloca);
symVal = alloca;
- } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) {
- symVal = declOp.getBase();
}
// this isn't the same as the by-val and by-ref passing later in the
@@ -693,7 +712,7 @@ void ReductionProcessor::processReductionArguments(
unsigned idx = 0;
for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
- const auto &kindMap = firOpBuilder.getKindMap();
+ const auto &kindMap = builder.getKindMap();
std::string reductionName;
ReductionIdentifier redId;
@@ -745,9 +764,12 @@ void ReductionProcessor::processReductionArguments(
OpType decl = createDeclareReduction<OpType>(
converter, reductionName, redId, redType, currentLocation, isByRef);
reductionDeclSymbols.push_back(
- mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName()));
+ mlir::SymbolRefAttr::get(builder.getContext(), decl.getSymName()));
++idx;
}
+
+ if (isDoConcurrent)
+ builder.restoreInsertionPoint(dcIP);
}
const semantics::SourceName
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90
index a5710fcf5352b..ec54294c7104f 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-array-lb.f90
@@ -69,6 +69,7 @@ program reduce
! CHECK: }
! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce"} {
+! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<3x2xi32>>
! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref<!fir.array<3x2xi32>>
! CHECK: %[[VAL_1:.*]] = arith.constant 2 : index
! CHECK: %[[VAL_2:.*]] = arith.constant 3 : index
@@ -76,7 +77,6 @@ program reduce
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : index
! CHECK: %[[VAL_5:.*]] = fir.shape_shift %[[VAL_1]], %[[VAL_2]], %[[VAL_3]], %[[VAL_4]] : (index, index, index, index) -> !fir.shapeshift<2>
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_5]]) {uniq_name = "_QFEi"} : (!fir.ref<!fir.array<3x2xi32>>, !fir.shapeshift<2>) -> (!fir.box<!fir.array<3x2xi32>>, !fir.ref<!fir.array<3x2xi32>>)
-! CHECK: %[[VAL_7:.*]] = fir.alloca !fir.box<!fir.array<3x2xi32>>
! CHECK: fir.store %[[VAL_6]]#0 to %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<3x2xi32>>>
! CHECK: omp.parallel reduction(byref @add_reduction_byref_box_3x2xi32 %[[VAL_7]] -> %[[VAL_8:.*]] : !fir.ref<!fir.box<!fir.array<3x2xi32>>>) {
! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {uniq_name = "_QFEi"} : (!fir.ref<!fir.box<!fir.array<3x2xi32>>>) -> (!fir.ref<!fir.box<!fir.array<3x2xi32>>>, !fir.ref<!fir.box<!fir.array<3x2xi32>>>)
diff --git a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
index 0cf88cf889868..104904497745d 100644
--- a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
+++ b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
@@ -64,11 +64,11 @@ subroutine max_array_reduction(l, r)
! CHECK-LABEL: func.func @_QPmax_array_reduction(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "l"},
! CHECK-SAME: %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "r"}) {
+! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFmax_array_reductionEl"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFmax_array_reductionEr"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
-! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_3]]#0 to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: omp.parallel reduction(byref @max_byref_box_Uxi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmax_array_reductionEl"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
diff --git a/flang/test/Lower/OpenMP/sections-array-reduction.f90 b/flang/test/Lower/OpenMP/sections-array-reduction.f90
index 91e0680692637..2f2808cebfc0c 100644
--- a/flang/test/Lower/OpenMP/sections-array-reduction.f90
+++ b/flang/test/Lower/OpenMP/sections-array-reduction.f90
@@ -34,7 +34,7 @@ subroutine sectionsReduction(x)
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFsectionsreductionEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
! CHECK: omp.parallel {
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
-! CHECK: fir.store %[[VAL_2]]#1 to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+! CHECK: fir.store %[[VAL_2]]#0 to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
! CHECK: omp.sections reduction(byref @add_reduction_byref_box_Uxf32 %[[VAL_3]] -> %[[VAL_4:.*]] : !fir.ref<!fir.box<!fir.array<?xf32>>>) {
! CHECK: omp.section {
! CHECK: ^bb0(%[[VAL_5:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>):
diff --git a/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90 b/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90
index 18d45217272fc..18a4f75b86309 100644
--- a/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90
+++ b/flang/test/Lower/OpenMP/taskgroup-task-array-reduction.f90
@@ -22,7 +22,7 @@
! CHECK-SAME: {uniq_name = "_QFtask_reductionEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
! CHECK: omp.parallel {
! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
-! CHECK: fir.store %[[VAL_2]]#1 to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+! CHECK: fir.store %[[VAL_2]]#0 to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
! CHECK: omp.taskgroup task_reduction(byref @add_reduction_byref_box_Uxf32 %[[VAL_3]] -> %[[VAL_4:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>) {
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]]
! CHECK-SAME: {uniq_name = "_QFtask_reductionEx"} : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> (!fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>)
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
index 290f9e1981361..d8c5706b912a5 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
@@ -81,7 +81,7 @@ subroutine reduce(r)
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %{{[0-9]+}} {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box<!fir.array<?xf64>>, !fir.dscope) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
! CHECK: omp.parallel {
! CHECK: %[[VAL_4:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
-! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_4]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK: fir.store %[[VAL_3]]#0 to %[[VAL_4]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
! CHECK: %[[VAL_7:.*]] = arith.constant 0 : i32
! CHECK: %[[VAL_8:.*]] = arith.constant 10 : i32
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
diff --git a/flang/test/Lower/do_concurrent_reduce_associate.f90 b/flang/test/Lower/do_concurrent_reduce_associate.f90
new file mode 100644
index 0000000000000..828bd89e75c7b
--- /dev/null
+++ b/flang/test/Lower/do_concurrent_reduce_associate.f90
@@ -0,0 +1,20 @@
+! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s
+
+subroutine dc_associate_reduce
+ integer :: i
+ real, allocatable, dimension(:) :: x
+
+ associate(x_associate => x)
+ do concurrent (i = 1:10) reduce(+: x_associate)
+ end do
+ end associate
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdc_associate_reduce() {
+! CHECK: %[[BOX_ALLOC:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
+! CHECK: %[[ASSOC_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "{{.*}}x_associate"}
+! CHECK: fir.store %[[ASSOC_DECL]]#0 to %[[BOX_ALLOC]]
+! CHECK-NEXT: fir.do_concurrent {
+! CHECK: fir.do_concurrent.loop {{.*}} reduce(byref @{{.*}} #fir.reduce_attr<add> %[[BOX_ALLOC]] -> %{{.*}} : !{{.*}}) {
+! CHECK: }
+! CHECK: }
``````````
</details>
https://github.com/llvm/llvm-project/pull/148597
More information about the flang-commits
mailing list